From 2a94de75b3299681cf2379c2718c4ebc939069a9 Mon Sep 17 00:00:00 2001
From: Yaodong Sheng <yas616@lehigh.edu>
Date: Fri, 15 Sep 2023 11:07:24 -0400
Subject: [PATCH] first commit

---
 Artifact.md                                   |  151 ++
 Docker/Dockerfile                             |   36 +
 LICENSE                                       |   21 +
 Makefile                                      |    5 +
 artifact/Makefile                             |    8 +
 artifact/ds/README.md                         |   14 +
 artifact/ds/STMCAS/dlist_carumap.h            |  646 +++++++++
 artifact/ds/STMCAS/dlist_omap.h               |  351 +++++
 artifact/ds/STMCAS/ibst_omap.h                |  397 ++++++
 artifact/ds/STMCAS/rbtree_omap.h              |  948 +++++++++++++
 artifact/ds/STMCAS/skiplist_cached_opt_omap.h |  567 ++++++++
 artifact/ds/STMCAS/slist_omap.h               |  328 +++++
 artifact/ds/baseline/ext_ticket_bst/plaf.h    |   41 +
 .../ds/baseline/ext_ticket_bst/ticket_impl.h  |  379 +++++
 .../ds/baseline/int_bst_pathcas/casword.h     |   70 +
 .../int_bst_pathcas/internal_kcas_avl.h       | 1251 +++++++++++++++++
 .../int_bst_pathcas/internal_kcas_bst.h       |  767 ++++++++++
 artifact/ds/baseline/int_bst_pathcas/kcas.h   |  104 ++
 .../int_bst_pathcas/kcas_reuse_htm_impl.h     |  575 ++++++++
 artifact/ds/baseline/lazylist_omap.h          |  158 +++
 artifact/ds/baseline/lfskiplist_omap.h        |  561 ++++++++
 artifact/ds/handSTM/dlist_carumap.h           |  499 +++++++
 artifact/ds/handSTM/dlist_omap.h              |  172 +++
 artifact/ds/handSTM/ibst_omap.h               |  267 ++++
 artifact/ds/handSTM/iht_carumap.h             |  286 ++++
 artifact/ds/handSTM/rbtree_omap.h             |  365 +++++
 artifact/ds/handSTM/rbtree_omap_opt.h         |  385 +++++
 artifact/ds/handSTM/rbtree_tl2_omap.h         |  437 ++++++
 artifact/ds/handSTM/skiplist_omap_bigtx.h     |  334 +++++
 artifact/ds/handSTM/slist_omap.h              |  154 ++
 artifact/ds/hybrid/dlist_carumap.h            |  696 +++++++++
 artifact/ds/hybrid/rbtree_omap_drop.h         |  599 ++++++++
 artifact/ds/include/ca_umap_list_adapter.h    |   86 ++
 artifact/ds/xSTM/dlist_omap.h                 |  174 +++
 artifact/ds/xSTM/ibst_omap.h                  |  260 ++++
 artifact/ds/xSTM/rbtree_omap.h                |  370 +++++
 artifact/ds/xSTM/rbtree_tl2_omap.h            |  428 ++++++
 artifact/policies/README.md                   |   78 +
 artifact/policies/STMCAS/include/base.h       |   98 ++
 artifact/policies/STMCAS/include/field.h      |   50 +
 artifact/policies/STMCAS/include/raii.h       |  100 ++
 artifact/policies/STMCAS/stmcas.h             |   38 +
 artifact/policies/baseline/thread.h           |   82 ++
 artifact/policies/exoTM/exotm.h               |  302 ++++
 artifact/policies/handSTM/eager_c1.h          |   51 +
 artifact/policies/handSTM/eager_c2.h          |   51 +
 artifact/policies/handSTM/include/field.h     |  651 +++++++++
 artifact/policies/handSTM/include/raii.h      |   84 ++
 artifact/policies/handSTM/include/redo_base.h |  146 ++
 artifact/policies/handSTM/include/undo_base.h |  146 ++
 artifact/policies/handSTM/lazy.h              |   52 +
 artifact/policies/handSTM/wb_c1.h             |   53 +
 artifact/policies/handSTM/wb_c2.h             |   50 +
 artifact/policies/hybrid/include/base.h       |  179 +++
 artifact/policies/hybrid/include/field.h      |  458 ++++++
 artifact/policies/hybrid/include/raii.h       |  196 +++
 artifact/policies/hybrid/lazy.h               |   53 +
 artifact/policies/hybrid/wb_c1.h              |   56 +
 artifact/policies/hybrid/wb_c2.h              |   53 +
 artifact/policies/include/hash.h              |   18 +
 artifact/policies/include/minivector.h        |  102 ++
 artifact/policies/include/orec_policies.h     |   76 +
 artifact/policies/include/rdtsc_rand.h        |   17 +
 artifact/policies/include/redolog_nocast.h    |  332 +++++
 artifact/policies/include/timestamp_smr.h     |  141 ++
 artifact/policies/include/undolog.h           |  206 +++
 artifact/policies/xSTM/Makefile               |    7 +
 artifact/policies/xSTM/common/tm_api.h        |  223 +++
 artifact/policies/xSTM/common/tm_defines.h    |  184 +++
 artifact/policies/xSTM/common/xSTM.mk         |    3 +
 artifact/policies/xSTM/libs/.gitignore        |    1 +
 artifact/policies/xSTM/libs/Makefile          |   45 +
 artifact/policies/xSTM/libs/README.md         |   33 +
 artifact/policies/xSTM/libs/include/cm.h      |   66 +
 .../policies/xSTM/libs/include/constants.h    |   28 +
 artifact/policies/xSTM/libs/include/epochs.h  |  286 ++++
 artifact/policies/xSTM/libs/include/orec_t.h  |   74 +
 .../policies/xSTM/libs/include/pad_word.h     |   32 +
 .../policies/xSTM/libs/include/platform.h     |   82 ++
 .../policies/xSTM/libs/include/timesource.h   |   55 +
 .../xSTM/libs/stm_algs/exo_eager_c1.h         |  304 ++++
 .../xSTM/libs/stm_algs/exo_eager_c2.h         |  307 ++++
 .../policies/xSTM/libs/stm_algs/exo_lazy_c1.h |  304 ++++
 .../policies/xSTM/libs/stm_algs/exo_lazy_c2.h |  305 ++++
 .../xSTM/libs/stm_algs/include/alloc.h        |   94 ++
 .../xSTM/libs/stm_algs/include/deferred.h     |   35 +
 .../xSTM/libs/stm_algs/include/redolog.h      |  370 +++++
 .../xSTM/libs/stm_algs/include/stackframe.h   |   70 +
 .../xSTM/libs/stm_algs/orec_eager_c1.h        |  315 +++++
 .../xSTM/libs/stm_algs/orec_eager_c2.h        |  325 +++++
 .../xSTM/libs/stm_algs/orec_lazy_c1.h         |  321 +++++
 .../xSTM/libs/stm_algs/orec_lazy_c2.h         |  326 +++++
 .../xSTM/libs/stm_instances/exo_eager_c1_q.cc |   34 +
 .../xSTM/libs/stm_instances/exo_eager_c2_q.cc |   34 +
 .../xSTM/libs/stm_instances/exo_lazy_c1_q.cc  |   34 +
 .../xSTM/libs/stm_instances/exo_lazy_c2_q.cc  |   34 +
 .../xSTM/libs/stm_instances/include/clone.h   |   78 +
 .../xSTM/libs/stm_instances/include/execute.h |  125 ++
 .../xSTM/libs/stm_instances/include/frame.h   |   28 +
 .../libs/stm_instances/include/loadstore.h    |   40 +
 .../xSTM/libs/stm_instances/include/mem.h     |   37 +
 .../libs/stm_instances/include/memfuncs.h     |  135 ++
 .../xSTM/libs/stm_instances/include/stats.h   |   12 +
 .../libs/stm_instances/orec_gv1_eager_c1_q.cc |   39 +
 .../libs/stm_instances/orec_gv1_eager_c2_q.cc |   39 +
 .../libs/stm_instances/orec_gv1_lazy_c1_q.cc  |   39 +
 .../libs/stm_instances/orec_gv1_lazy_c2_q.cc  |   39 +
 .../libs/stm_instances/orec_tsc_eager_c1_q.cc |   39 +
 .../libs/stm_instances/orec_tsc_eager_c2_q.cc |   39 +
 .../libs/stm_instances/orec_tsc_lazy_c1_q.cc  |   32 +
 .../libs/stm_instances/orec_tsc_lazy_c2_q.cc  |   39 +
 artifact/policies/xSTM/libs/tm_names.mk       |   10 +
 artifact/policies/xSTM/plugin/Makefile        |    5 +
 .../policies/xSTM/plugin/plugin/.gitignore    |    3 +
 .../xSTM/plugin/plugin/CMakeLists.txt         |   61 +
 artifact/policies/xSTM/plugin/plugin/Makefile |   10 +
 .../policies/xSTM/plugin/plugin/README.md     |   34 +
 .../xSTM/plugin/plugin/boundary_transform.cc  |  108 ++
 .../policies/xSTM/plugin/plugin/discovery.cc  |  312 ++++
 .../xSTM/plugin/plugin/function_transform.cc  |  518 +++++++
 .../xSTM/plugin/plugin/local_config.h         |   15 +
 .../policies/xSTM/plugin/plugin/mappings.cc   |   56 +
 .../xSTM/plugin/plugin/optimizations.cc       |   45 +
 .../policies/xSTM/plugin/plugin/raii_lite.cc  |  252 ++++
 .../policies/xSTM/plugin/plugin/signatures.cc |  148 ++
 .../policies/xSTM/plugin/plugin/signatures.h  |   94 ++
 .../policies/xSTM/plugin/plugin/tm_plugin.cc  |   71 +
 .../policies/xSTM/plugin/plugin/tm_plugin.h   |  174 +++
 artifact/policies/xSTM/plugin/plugin/types.h  |   18 +
 artifact/scripts/.gitignore                   |    1 +
 artifact/scripts/ChartCfg.py                  |   17 +
 artifact/scripts/ExpCfg.py                    |  100 ++
 artifact/scripts/GetData.py                   |   49 +
 artifact/scripts/MakeChart.py                 |  128 ++
 artifact/scripts/Makefile                     |    7 +
 artifact/scripts/README.md                    |   44 +
 artifact/scripts/Runner.py                    |   14 +
 artifact/scripts/Targets.py                   |  167 +++
 artifact/scripts/Types.py                     |  119 ++
 artifact/scripts/Util.py                      |   31 +
 artifact/ubench/Makefile                      |   15 +
 artifact/ubench/README.md                     |   50 +
 artifact/ubench/STMCAS/Makefile               |   44 +
 artifact/ubench/STMCAS/build.mk               |   16 +
 artifact/ubench/STMCAS/common.mk              |   22 +
 artifact/ubench/STMCAS/dlist_carumap.cc       |   10 +
 artifact/ubench/STMCAS/dlist_caumap.cc        |   12 +
 artifact/ubench/STMCAS/dlist_omap.cc          |   10 +
 artifact/ubench/STMCAS/dlist_opt_caumap.cc    |   12 +
 artifact/ubench/STMCAS/dlist_opt_omap.cc      |   10 +
 artifact/ubench/STMCAS/ibst_omap.cc           |   10 +
 artifact/ubench/STMCAS/rbtree_omap.cc         |   10 +
 .../ubench/STMCAS/skiplist_cached_opt_omap.cc |   10 +
 artifact/ubench/STMCAS/slist_omap.cc          |   10 +
 artifact/ubench/STMCAS/slist_opt_caumap.cc    |   12 +
 artifact/ubench/baseline/Makefile             |   37 +
 artifact/ubench/baseline/ebst_ticket_omap.cc  |   11 +
 artifact/ubench/baseline/iavl_pathcas_omap.cc |   14 +
 artifact/ubench/baseline/ibst_pathcas_omap.cc |   14 +
 artifact/ubench/baseline/lazylist_caumap.cc   |   13 +
 artifact/ubench/baseline/lazylist_omap.cc     |   11 +
 artifact/ubench/baseline/lfskiplist_omap.cc   |   11 +
 artifact/ubench/config.mk                     |   12 +
 artifact/ubench/handSTM/Makefile              |   44 +
 artifact/ubench/handSTM/build.mk              |   16 +
 artifact/ubench/handSTM/common.mk             |   15 +
 artifact/ubench/handSTM/dlist_carumap.cc      |   10 +
 artifact/ubench/handSTM/dlist_caumap.cc       |   12 +
 artifact/ubench/handSTM/ibst_omap.cc          |   10 +
 artifact/ubench/handSTM/rbtree_omap.cc        |   10 +
 .../ubench/handSTM/skiplist_omap_bigtx.cc     |   10 +
 artifact/ubench/handSTM/slist_omap.cc         |   10 +
 artifact/ubench/hybrid/Makefile               |   44 +
 artifact/ubench/hybrid/build.mk               |   16 +
 artifact/ubench/hybrid/common.mk              |   14 +
 artifact/ubench/hybrid/dlist_carumap.cc       |   10 +
 artifact/ubench/hybrid/rbtree_omap_drop.cc    |   10 +
 .../ubench/include/bench_thread_context.h     |   32 +
 artifact/ubench/include/config.h              |  138 ++
 artifact/ubench/include/experiment.h          |  173 +++
 artifact/ubench/include/experiment_pathcas.h  |  173 +++
 artifact/ubench/include/launch.h              |   17 +
 artifact/ubench/include/launch_multi.h        |   18 +
 artifact/ubench/include/manager.h             |  146 ++
 artifact/ubench/xSTM/Makefile                 |   40 +
 artifact/ubench/xSTM/build.mk                 |   26 +
 artifact/ubench/xSTM/common.mk                |   24 +
 artifact/ubench/xSTM/ibst_omap.cc             |   11 +
 188 files changed, 25916 insertions(+)
 create mode 100644 Artifact.md
 create mode 100644 Docker/Dockerfile
 create mode 100644 LICENSE
 create mode 100644 Makefile
 create mode 100644 artifact/Makefile
 create mode 100644 artifact/ds/README.md
 create mode 100644 artifact/ds/STMCAS/dlist_carumap.h
 create mode 100644 artifact/ds/STMCAS/dlist_omap.h
 create mode 100644 artifact/ds/STMCAS/ibst_omap.h
 create mode 100644 artifact/ds/STMCAS/rbtree_omap.h
 create mode 100644 artifact/ds/STMCAS/skiplist_cached_opt_omap.h
 create mode 100644 artifact/ds/STMCAS/slist_omap.h
 create mode 100644 artifact/ds/baseline/ext_ticket_bst/plaf.h
 create mode 100644 artifact/ds/baseline/ext_ticket_bst/ticket_impl.h
 create mode 100644 artifact/ds/baseline/int_bst_pathcas/casword.h
 create mode 100644 artifact/ds/baseline/int_bst_pathcas/internal_kcas_avl.h
 create mode 100644 artifact/ds/baseline/int_bst_pathcas/internal_kcas_bst.h
 create mode 100644 artifact/ds/baseline/int_bst_pathcas/kcas.h
 create mode 100644 artifact/ds/baseline/int_bst_pathcas/kcas_reuse_htm_impl.h
 create mode 100644 artifact/ds/baseline/lazylist_omap.h
 create mode 100644 artifact/ds/baseline/lfskiplist_omap.h
 create mode 100644 artifact/ds/handSTM/dlist_carumap.h
 create mode 100644 artifact/ds/handSTM/dlist_omap.h
 create mode 100644 artifact/ds/handSTM/ibst_omap.h
 create mode 100644 artifact/ds/handSTM/iht_carumap.h
 create mode 100644 artifact/ds/handSTM/rbtree_omap.h
 create mode 100644 artifact/ds/handSTM/rbtree_omap_opt.h
 create mode 100644 artifact/ds/handSTM/rbtree_tl2_omap.h
 create mode 100644 artifact/ds/handSTM/skiplist_omap_bigtx.h
 create mode 100644 artifact/ds/handSTM/slist_omap.h
 create mode 100644 artifact/ds/hybrid/dlist_carumap.h
 create mode 100644 artifact/ds/hybrid/rbtree_omap_drop.h
 create mode 100644 artifact/ds/include/ca_umap_list_adapter.h
 create mode 100644 artifact/ds/xSTM/dlist_omap.h
 create mode 100644 artifact/ds/xSTM/ibst_omap.h
 create mode 100644 artifact/ds/xSTM/rbtree_omap.h
 create mode 100644 artifact/ds/xSTM/rbtree_tl2_omap.h
 create mode 100644 artifact/policies/README.md
 create mode 100644 artifact/policies/STMCAS/include/base.h
 create mode 100644 artifact/policies/STMCAS/include/field.h
 create mode 100644 artifact/policies/STMCAS/include/raii.h
 create mode 100644 artifact/policies/STMCAS/stmcas.h
 create mode 100644 artifact/policies/baseline/thread.h
 create mode 100644 artifact/policies/exoTM/exotm.h
 create mode 100644 artifact/policies/handSTM/eager_c1.h
 create mode 100644 artifact/policies/handSTM/eager_c2.h
 create mode 100644 artifact/policies/handSTM/include/field.h
 create mode 100644 artifact/policies/handSTM/include/raii.h
 create mode 100644 artifact/policies/handSTM/include/redo_base.h
 create mode 100644 artifact/policies/handSTM/include/undo_base.h
 create mode 100644 artifact/policies/handSTM/lazy.h
 create mode 100644 artifact/policies/handSTM/wb_c1.h
 create mode 100644 artifact/policies/handSTM/wb_c2.h
 create mode 100644 artifact/policies/hybrid/include/base.h
 create mode 100644 artifact/policies/hybrid/include/field.h
 create mode 100644 artifact/policies/hybrid/include/raii.h
 create mode 100644 artifact/policies/hybrid/lazy.h
 create mode 100644 artifact/policies/hybrid/wb_c1.h
 create mode 100644 artifact/policies/hybrid/wb_c2.h
 create mode 100644 artifact/policies/include/hash.h
 create mode 100644 artifact/policies/include/minivector.h
 create mode 100644 artifact/policies/include/orec_policies.h
 create mode 100644 artifact/policies/include/rdtsc_rand.h
 create mode 100644 artifact/policies/include/redolog_nocast.h
 create mode 100644 artifact/policies/include/timestamp_smr.h
 create mode 100644 artifact/policies/include/undolog.h
 create mode 100644 artifact/policies/xSTM/Makefile
 create mode 100644 artifact/policies/xSTM/common/tm_api.h
 create mode 100644 artifact/policies/xSTM/common/tm_defines.h
 create mode 100644 artifact/policies/xSTM/common/xSTM.mk
 create mode 100644 artifact/policies/xSTM/libs/.gitignore
 create mode 100644 artifact/policies/xSTM/libs/Makefile
 create mode 100644 artifact/policies/xSTM/libs/README.md
 create mode 100644 artifact/policies/xSTM/libs/include/cm.h
 create mode 100644 artifact/policies/xSTM/libs/include/constants.h
 create mode 100644 artifact/policies/xSTM/libs/include/epochs.h
 create mode 100644 artifact/policies/xSTM/libs/include/orec_t.h
 create mode 100644 artifact/policies/xSTM/libs/include/pad_word.h
 create mode 100644 artifact/policies/xSTM/libs/include/platform.h
 create mode 100644 artifact/policies/xSTM/libs/include/timesource.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/exo_eager_c1.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/exo_eager_c2.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/exo_lazy_c1.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/exo_lazy_c2.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/include/alloc.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/include/deferred.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/include/redolog.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/include/stackframe.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/orec_eager_c1.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/orec_eager_c2.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/orec_lazy_c1.h
 create mode 100644 artifact/policies/xSTM/libs/stm_algs/orec_lazy_c2.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/exo_eager_c1_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/exo_eager_c2_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/exo_lazy_c1_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/exo_lazy_c2_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/clone.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/execute.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/frame.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/loadstore.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/mem.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/memfuncs.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/include/stats.h
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c1_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c2_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c1_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c2_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c1_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c2_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c1_q.cc
 create mode 100644 artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c2_q.cc
 create mode 100644 artifact/policies/xSTM/libs/tm_names.mk
 create mode 100644 artifact/policies/xSTM/plugin/Makefile
 create mode 100644 artifact/policies/xSTM/plugin/plugin/.gitignore
 create mode 100644 artifact/policies/xSTM/plugin/plugin/CMakeLists.txt
 create mode 100644 artifact/policies/xSTM/plugin/plugin/Makefile
 create mode 100644 artifact/policies/xSTM/plugin/plugin/README.md
 create mode 100644 artifact/policies/xSTM/plugin/plugin/boundary_transform.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/discovery.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/function_transform.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/local_config.h
 create mode 100644 artifact/policies/xSTM/plugin/plugin/mappings.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/optimizations.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/raii_lite.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/signatures.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/signatures.h
 create mode 100644 artifact/policies/xSTM/plugin/plugin/tm_plugin.cc
 create mode 100644 artifact/policies/xSTM/plugin/plugin/tm_plugin.h
 create mode 100644 artifact/policies/xSTM/plugin/plugin/types.h
 create mode 100644 artifact/scripts/.gitignore
 create mode 100644 artifact/scripts/ChartCfg.py
 create mode 100644 artifact/scripts/ExpCfg.py
 create mode 100644 artifact/scripts/GetData.py
 create mode 100644 artifact/scripts/MakeChart.py
 create mode 100644 artifact/scripts/Makefile
 create mode 100644 artifact/scripts/README.md
 create mode 100644 artifact/scripts/Runner.py
 create mode 100644 artifact/scripts/Targets.py
 create mode 100644 artifact/scripts/Types.py
 create mode 100644 artifact/scripts/Util.py
 create mode 100644 artifact/ubench/Makefile
 create mode 100644 artifact/ubench/README.md
 create mode 100644 artifact/ubench/STMCAS/Makefile
 create mode 100644 artifact/ubench/STMCAS/build.mk
 create mode 100644 artifact/ubench/STMCAS/common.mk
 create mode 100644 artifact/ubench/STMCAS/dlist_carumap.cc
 create mode 100644 artifact/ubench/STMCAS/dlist_caumap.cc
 create mode 100644 artifact/ubench/STMCAS/dlist_omap.cc
 create mode 100644 artifact/ubench/STMCAS/dlist_opt_caumap.cc
 create mode 100644 artifact/ubench/STMCAS/dlist_opt_omap.cc
 create mode 100644 artifact/ubench/STMCAS/ibst_omap.cc
 create mode 100644 artifact/ubench/STMCAS/rbtree_omap.cc
 create mode 100644 artifact/ubench/STMCAS/skiplist_cached_opt_omap.cc
 create mode 100644 artifact/ubench/STMCAS/slist_omap.cc
 create mode 100644 artifact/ubench/STMCAS/slist_opt_caumap.cc
 create mode 100644 artifact/ubench/baseline/Makefile
 create mode 100644 artifact/ubench/baseline/ebst_ticket_omap.cc
 create mode 100644 artifact/ubench/baseline/iavl_pathcas_omap.cc
 create mode 100644 artifact/ubench/baseline/ibst_pathcas_omap.cc
 create mode 100644 artifact/ubench/baseline/lazylist_caumap.cc
 create mode 100644 artifact/ubench/baseline/lazylist_omap.cc
 create mode 100644 artifact/ubench/baseline/lfskiplist_omap.cc
 create mode 100644 artifact/ubench/config.mk
 create mode 100644 artifact/ubench/handSTM/Makefile
 create mode 100644 artifact/ubench/handSTM/build.mk
 create mode 100644 artifact/ubench/handSTM/common.mk
 create mode 100644 artifact/ubench/handSTM/dlist_carumap.cc
 create mode 100644 artifact/ubench/handSTM/dlist_caumap.cc
 create mode 100644 artifact/ubench/handSTM/ibst_omap.cc
 create mode 100644 artifact/ubench/handSTM/rbtree_omap.cc
 create mode 100644 artifact/ubench/handSTM/skiplist_omap_bigtx.cc
 create mode 100644 artifact/ubench/handSTM/slist_omap.cc
 create mode 100644 artifact/ubench/hybrid/Makefile
 create mode 100644 artifact/ubench/hybrid/build.mk
 create mode 100644 artifact/ubench/hybrid/common.mk
 create mode 100644 artifact/ubench/hybrid/dlist_carumap.cc
 create mode 100644 artifact/ubench/hybrid/rbtree_omap_drop.cc
 create mode 100644 artifact/ubench/include/bench_thread_context.h
 create mode 100644 artifact/ubench/include/config.h
 create mode 100644 artifact/ubench/include/experiment.h
 create mode 100644 artifact/ubench/include/experiment_pathcas.h
 create mode 100644 artifact/ubench/include/launch.h
 create mode 100644 artifact/ubench/include/launch_multi.h
 create mode 100644 artifact/ubench/include/manager.h
 create mode 100644 artifact/ubench/xSTM/Makefile
 create mode 100644 artifact/ubench/xSTM/build.mk
 create mode 100644 artifact/ubench/xSTM/common.mk
 create mode 100644 artifact/ubench/xSTM/ibst_omap.cc

diff --git a/Artifact.md b/Artifact.md
new file mode 100644
index 0000000..38a2e98
--- /dev/null
+++ b/Artifact.md
@@ -0,0 +1,151 @@
+# Artifact: exoTM/STMCAS Mechanisms, Policies, and Data Structures
+
+## Abstract
+
+This artifact consists of synchronization libraries and data structures for
+evaluating the performance of the exoTM synchronization mechanism and STMCAS
+synchronization policy.  It consists of synchronization libraries, data
+structure implementations, and microbenchmarks for stress-testing those data
+structures.  The code requires an Intel CPU with support for the `rdtscp`
+instruction, which has been available on most Intel CPUs for more than 10 years.
+For the most meaningful evaluation, a system with a large number of cores is
+recommended.  The provided Dockerfile handles all of the necessary software
+dependencies.
+
+## Description
+
+This repository consists of the following components:
+
+* Synchronization Policies (`artifact/policies`)
+* Data Structures (`artifact/ds`)
+* Microbenchmarks (`artifact/ubench`)
+* Evaluation Scripts (`artifact/scripts`)
+* Build Environment (`Docker`)
+
+### Synchronization Policies
+
+This artifact considers five synchronization policies
+
+* Compiler-based STM (xSTM)
+* Hand-instrumented STM (handSTM)
+* Software Transactional Multiword Compare and Swap (STMCAS)
+* handSTM+STMCAS (hybrid)
+* Traditional blocking/nonblocking approaches (baseline)
+
+Each synchronization policy can be found in a subfolder of `artifact/policies`.
+Most policies are "header-only" C++ files, which do not require special
+compilation.  The exception is xSTM, for which we provide a version of the
+llvm-transmem TM plugin for C++.
+
+### Data Structures
+
+This artifact includes several data structures implemented with STMCAS
+(doubly-linked list, skip list, singly-linked list, closed addressing resizable
+unordered map, binary search tree, red/black tree).  As appropriate, these data
+structures are also provided for other synchronization policies.  The `ds`
+folder holds all data structures.  The subfolders of `ds` correspond to the
+different synchronization policies.
+
+### Microbechmarks
+
+The artifact's microbenchmark harness runs a stress test microbenchmark.  The
+microbenchmark has a variety of configuration options, some related to the data
+structure's configuration (e.g., initial size of the unordered map), others
+related to the experiment's configuration (e.g., operation mix, number of
+threads).
+
+### Build Environment
+
+The easiest way to set up an appropriate build environment is to build a Docker
+container.  The included `Dockerfile` has instructions for building an
+appropriate container.  The dependencies are relatively minimal:
+
+* Ubuntu 22.04
+* Clang++ 15
+* CMake (only needed for xSTM)
+* Standard Linux build tools
+* Standard Python3 charting tools
+
+## Hardware Dependencies
+
+This artifact has been tested on a system with 192 GB of RAM and two Intel Xeon
+Platinum 8160 CPUs (48 threads / 96 cores), running Ubuntu 22.04.  In general,
+any modern x86 CPU should work.  The exoTM/STMCAS codes do not require many
+advanced x86 features.  The most noteworthy is the `rdtscp` instruction, which
+has been available in most Intel processors for over a decade.
+
+Please note that the baseline data structures based on the PathCAS
+synchronization methodology require support for Intel TSX.  If you do not have a
+machine with TSX support, you will need to comment out lines 112/113 and 138/139
+in `artifact/scripts/Targets.py`.  Otherwise the automated testing/charting
+scripts will fail.
+
+## Software Dependencies
+
+This artifact was developed and tested on Linux systems, running a variety of
+kernel versions.  The xSTM policy that we compare against requires Clang 15, so
+we have opted to use Clang throughout the artifact.  Our build configuration
+uses the `-std=c++20` flag, but we do not require any particularly advanced
+features (e.g., no concepts or coroutines).  For exoTM/STMCAS, any modern C++
+compiler should be satisfactory.
+
+## Data Sets
+
+The artifact does not require any special data sets.
+
+## Instructions for Repeating the Experiments in the Paper
+
+If you wish to repeat the experiments from our paper, follow these instructions:
+
+1. Check out this repository (`git clone git@github.com:exotm/pact23.git`)
+2. Build the Docker image (`cd Docker && sudo docker build -t exotm_ae . && cd ..`)
+3. Launch a container (`sudo docker run --privileged --rm -v $(pwd):/root -it exotm_ae`)
+4. Build and run (`make`)
+
+Please note that the Docker image will require roughly 1.7 GB of disk space.  To
+check out and build the source code will require another 60 MB.
+
+Also note that you will probably want to run a parallel make command in step 4
+(e.g., `make -j 16`).
+
+### Experiment Workflow
+
+The top-level Makefile first builds all necessary executable files.  Please see
+the README.md files in subfolders for more details.  In general, each data
+structure will produce its own executable.
+
+Once all executables are built, the Makefile will invoke `scripts/Runner.py` to
+collect data and plot charts.  For the charts in the paper, this script took
+about 6 hours to run, and required about 1GB of space to store the charts and
+data files.
+
+When the script completes, the `scripts/data` folder will hold all results.  The
+charts can be found in the `scripts/charts` folder.  A second set of charts,
+with error bars, can be found in `scripts/variance`.
+
+Note that typing `make clean` will remove all build artifacts and also all
+experiment results and charts.
+
+## Instructions for Reusing the Artifact (Adding New Data Structures)
+
+Below we discuss the process one can use to add new data structures.
+
+1. Create a new `.h` file with the implementation of the data structure.  This
+   should go in the appropriate sub-folder of `artifact/ds`, based on the
+   synchronization policy used by the data structure.
+2. Create a new `.cc` file in the appropriate sub-folder of `artifact/ubench`,
+   depending on the synchronization policy used by the data structure.  Note
+   that these files are typically quite small (~7 lines), as they only include
+   other files, define some types, and invoke a policy's initializer.
+3. In the same folder as the `.cc` file, add the `.cc` file's name (without an
+   extension) to the `DS` variable in the `common.mk` file.  Typing `make`
+   should now build a version of the microbenchmark for testing the new data
+   structure.  Under rare circumstances, the Makefile might issue a warning
+   about duplicate rules in the generated `rules.mk` file.  Should this happen,
+   type `make clean` and then `make` (or `make -j 16`, for a parallel build).
+4. To integrate the new data structure into the test scripts for an existing
+   chart, first add it to the `exeNames` listing in
+   `artifact/scripts/ExpCfg.py`.  Then locate the chart(s) to augment in
+   `artifact/scripts/Targets.py` and add a new `Curve` with a matching
+   `exeName`.
+
diff --git a/Docker/Dockerfile b/Docker/Dockerfile
new file mode 100644
index 0000000..3e3db3c
--- /dev/null
+++ b/Docker/Dockerfile
@@ -0,0 +1,36 @@
+# Dockerfile to build llvm-15 developer image
+FROM ubuntu:jammy
+
+# Apply all updates
+RUN apt-get update -y
+RUN apt-get upgrade -y
+
+# Install basic C++ and Python development tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y build-essential cmake g++-multilib pip
+
+# Install LLVM 15
+RUN DEBIAN_FRONTEND=noninteractive apt install -y wget gnupg gnupg2 gnupg1 lsb-release software-properties-common
+RUN DEBIAN_FRONTEND=noninteractive wget https://apt.llvm.org/llvm.sh
+RUN DEBIAN_FRONTEND=noninteractive chmod +x llvm.sh
+RUN DEBIAN_FRONTEND=noninteractive ./llvm.sh 15
+RUN DEBIAN_FRONTEND=noninteractive rm ./llvm.sh
+
+# Install Python charting tools
+RUN pip3 install --no-cache-dir numpy matplotlib
+
+# Set the working directory
+WORKDIR /root
+
+# To use this Dockerfile
+# 1 - Make an image named exotm_ae
+#     - Go to the folder where this Dockerfile exists
+#     - sudo docker build -t exotm_ae .
+#       - Note: you don't need the 'sudo' part on Windows
+#     - The resulting image size will be about 1.64 GB
+# 2 - Go to top level folder
+# 3 - Launch an interactive container, and mount your working folder
+#     - sudo docker run --privileged --rm -v $(pwd):/root -it exotm_ae
+# 4 - When your terminal starts up:
+#     - You will be logged in as root, and in the `/root` folder
+#     - You should see your exotm folder's contents in there
+#     - Type 'make' to run all experiments and build all charts
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d6162e6
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Yaodong Sheng, Ahmed Hassan, Michael Spear 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b6de3d5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,5 @@
+all:
+	$(MAKE) -C artifact
+
+clean:
+	$(MAKE) -C artifact clean
diff --git a/artifact/Makefile b/artifact/Makefile
new file mode 100644
index 0000000..ae0a7dc
--- /dev/null
+++ b/artifact/Makefile
@@ -0,0 +1,8 @@
+all:
+	$(MAKE) -C policies/xSTM
+	$(MAKE) -C ubench
+	$(MAKE) -C scripts
+clean:
+	$(MAKE) -C policies/xSTM clean
+	$(MAKE) -C ubench clean
+	$(MAKE) -C scripts clean
diff --git a/artifact/ds/README.md b/artifact/ds/README.md
new file mode 100644
index 0000000..ae0e992
--- /dev/null
+++ b/artifact/ds/README.md
@@ -0,0 +1,14 @@
+# Data Structures
+
+This folder stores the data structures that we use in our evaluation.  They are
+organized according to the synchronization policy they employ.
+
+The data structures in the `baseline` folder are taken from the open-source
+repositories that correspond to those works.  We have modified them in the
+following ways:
+
+- We have converted code, as necessary, to move the data structure
+  implementation entirely to headers.
+- We have modified the data structures to use the facilities in the
+  `policies/baseline` policy, so that there is an apples-to-apples comparison
+  with regard to hashing, random numbers, and safe memory reclamation.
diff --git a/artifact/ds/STMCAS/dlist_carumap.h b/artifact/ds/STMCAS/dlist_carumap.h
new file mode 100644
index 0000000..1994a50
--- /dev/null
+++ b/artifact/ds/STMCAS/dlist_carumap.h
@@ -0,0 +1,646 @@
+#pragma once
+
+#include <atomic>
+#include <bit>
+#include <functional>
+
+/// An unordered map, implemented as a resizable array of lists (closed
+/// addressing, resizable).  This map supports get(), insert() and remove()
+/// operations.
+///
+/// This implementation is based loosely on Liu's nonblocking resizable hash
+/// table from PODC 2014.  At the current time, we do not support the heuristic
+/// for contracting the list, but we do support expanding the list.
+///
+/// @param K      The type of the keys stored in this map
+/// @param V      The type of the values stored in this map
+/// @param STMCAS The STMCAS implementation (PO or PS)
+template <typename K, typename V, class STMCAS> class dlist_carumap {
+  using WSTEP = typename STMCAS::WSTEP;
+  using RSTEP = typename STMCAS::RSTEP;
+  using snapshot_t = typename STMCAS::snapshot_t;
+  using ownable_t = typename STMCAS::ownable_t;
+  template <typename T> using FIELD = typename STMCAS::template sField<T>;
+
+  /// A list node.  It has prev and next pointers, but no key or value.  It's
+  /// useful for sentinels, so that K and V don't have to be default
+  /// constructable.
+  ///
+  /// NB: we do not need a `valid` bit, because any operation that would clear
+  ///     it would also acquire this node's orec, and thus any node that would
+  ///     encounter a cleared valid bit would also detect an orec inconsistency.
+  struct node_t : ownable_t {
+    FIELD<node_t *> prev; // Pointer to predecessor
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : ownable_t(), prev(nullptr), next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// We need to know if buckets have been rehashed to a new table.  We do this
+  /// by making the head of each bucket a `sentinel_t`, and adding a `closed`
+  /// bool.  Note that the tail of each bucket's list is just a node_t.
+  struct sentinel_t : node_t {
+    /// Track if this sentinel is for a bucket that has been rehashed
+    ///
+    /// NB: Could we use `prev` to indicated `closed`
+    FIELD<bool> closed; // Has it been rehashed?
+
+    /// Construct a sentinel_t
+    sentinel_t() : node_t(), closed(false) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~sentinel_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const, and
+  /// values are only accessed while the node is locked, so neither is a
+  /// tm_field.
+  struct data_t : node_t {
+    const K key; // The key of this key/value pair
+    V val;       // The value of this key/value pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key The key that is stored in this node
+    /// @param _val The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~data_t() {}
+  };
+
+  /// An array of lists, along with its size
+  ///
+  /// NB: to avoid indirection, the array is in-lined into the tbl_t.  To make
+  ///     this compatible with SMR, tbl_t must be ownable.
+  class tbl_t : public ownable_t {
+    using bucket_t = FIELD<sentinel_t *>;
+
+    /// Construct a table
+    ///
+    /// @param `_size` The desired size of the table
+    tbl_t(uint64_t _size) : size(_size) {}
+
+  public:
+    const uint64_t size; // The size of the table
+    bucket_t tbl[];      // The buckets of the table
+
+    /// Allocate a tbl_t of size `size`
+    ///
+    /// @param size The desired size
+    /// @param tx   The calling operation's descriptor
+    ///
+    /// @return A table, all of whose buckets are set to null
+    static tbl_t *make(uint64_t size, WSTEP &tx) {
+      tbl_t *tbl = (tbl_t *)malloc(sizeof(tbl_t) + size * sizeof(bucket_t));
+      auto ret = new (tbl) tbl_t(size);
+      for (size_t i = 0; i < size; ++i)
+        ret->tbl[i].set(nullptr, tx);
+      return ret;
+    }
+  };
+
+  ownable_t *tbl_orec;    // An orec for protecting `active` and `frozen`
+  FIELD<tbl_t *> active;  // The active table
+  FIELD<tbl_t *> frozen;  // The frozen table
+  std::hash<K> _pre_hash; // A weak hash function for converting keys to ints
+  const uint64_t RESIZE_THRESHOLD; // Max bucket size before resizing
+
+  /// A pair consisting of a pointer and an orec version.
+  struct node_ver_t {
+    node_t *_obj = nullptr; // The start of a bucket
+    uint64_t _ver = 0;      // NB: _ver may not be related to _obj
+  };
+
+  /// Result of trying to resize a bucket
+  enum resize_result_t {
+    CANNOT_ACQUIRE,  // Couldn't get orec... retry
+    ALREADY_RESIZED, // Already resized by another thread
+    RESIZE_OK        // Bucket successfully resized
+  };
+
+  /// Given a key, determine the bucket into which it should go.  As in the Liu
+  /// hash, we do not change the hash function when we resize, we just change
+  /// the number of bits to use
+  ///
+  /// @param key  The key to hash
+  /// @param size The size of the table into which this should be hashed
+  ///
+  /// @return An integer between 0 and size
+  uint64_t table_hash(STMCAS *me, const K &key, const uint64_t size) const {
+    return me->hash(_pre_hash(key)) % size;
+  }
+
+public:
+  /// Default construct a map as having a valid active table.
+  ///
+  /// NB: This constructor calls std::terminate if the provided size is not a
+  ///     power of 2.
+  ///
+  /// @param me  The operation that is creating this umap
+  /// @param cfg A config object with `buckets` and `resize_threshold`
+  dlist_carumap(STMCAS *me, auto *cfg)
+      : tbl_orec(new ownable_t()), RESIZE_THRESHOLD(cfg->resize_threshold) {
+    // Enforce power-of-2 initial size
+    if (std::popcount(cfg->buckets) != 1)
+      throw("cfg->buckets should be power of 2");
+
+    // Create an initial active table in which all of the buckets are
+    // initialized but empty (null <- head <-> tail -> null).
+    WSTEP tx(me);
+    active.set(tbl_t::make(cfg->buckets, tx), tx);
+    for (size_t i = 0; i < cfg->buckets; ++i)
+      active.get(tx)->tbl[i].set(create_list(tx), tx);
+    // NB: since all buckets are initialized, nobody will ever go to the
+    //     frozen table, so we can leave it as null
+    frozen.set(nullptr, tx);
+  }
+
+private:
+  /// Create a dlist with head and tail sentinels
+  ///
+  /// @param tx A writing TM context.  Even though this code can't fail, we need
+  ///           the context in order to use tm_field correctly.
+  ///
+  /// @return A pointer to the head sentinel of the list
+  sentinel_t *create_list(WSTEP &tx) {
+    // NB: By default, a node's prev and next will be nullptr, which is what we
+    //     want for head->prev and tail->next.
+    auto head = new sentinel_t();
+    auto tail = new node_t();
+    head->next.set(tail, tx);
+    tail->prev.set(head, tx);
+    return head;
+  }
+
+  /// `resize()` is an internal method for changing the size of the active
+  /// table. Strictly speaking, it should be called `expand`, because for now we
+  /// only support expansion, not contraction.  When `insert()` discovers that
+  /// it has made a bucket "too big", it will continue to do its insertion and
+  /// then, after linearizing, it will call `resize()`.  `remove()` does not
+  /// currently call `resize()`.
+  ///
+  /// At a high level, `resize()` is supposed to be open-nested and not to incur
+  /// any blocking, except due to orec conflicts.  We accomplish this through
+  /// laziness and stealing.  resize() finishes the /last/ resize, moves the
+  /// `active` table to `frozen`, and installs a new `active` table.  Subsequent
+  /// operations will do most of the migrating.
+  ///
+  /// @param me    The calling thread's descriptor
+  /// @param a_ver The version of `active` when the resize was triggered
+  void resize(STMCAS *me, uint64_t a_ver) {
+    // Get the current active and frozen tables, and the frozen table size
+    tbl_t *ft = nullptr, *at = nullptr;
+    {
+      RSTEP tx(me);
+      ft = frozen.get(tx);
+      at = active.get(tx);
+      if (!tx.check_continuation(tbl_orec, a_ver))
+        return; // someone else must be starting a resize, so we can quit
+    }
+
+    // If ft is null, then there's no frozen table, so things will be easy
+    if (ft == nullptr) {
+      WSTEP tx(me);
+
+      // Make and initialize the table *before* acquiring orecs, to minimize the
+      // critical section.  The table is 2x as big.
+      auto new_tbl = tbl_t::make(at->size * 2, tx);
+
+      // Lock the table, move it from `active` to `frozen`, then install the new
+      // table.
+      if (!tx.acquire_continuation(tbl_orec, a_ver)) {
+        // NB: new_tbl is private.  We don't need SMR
+        delete new_tbl;
+        return; // Someone else is resizing, and that's good enough for `me`
+      }
+      frozen.set(at, tx);
+      active.set(new_tbl, tx);
+      return;
+    }
+
+    // Migrate everything out of frozen, remove the frozen table, and retry
+    //
+    // NB: prepare_resize removes the frozen table.  That will change a_ver, so
+    //     we need to capture the new a_ver value so that our next attempt won't
+    //     fail erroneously.
+    a_ver = prepare_resize(me, a_ver, ft, at);
+    if (a_ver == 0)
+      return; // Someone else finished resizing for `me`
+
+    resize(me, a_ver); // Try again now that it's clean
+  }
+
+  /// Finish one lazy resize, so that another may begin.
+  ///
+  /// This really just boils down to migrating everything from `frozen` to
+  /// `active` and then nulling `frozen` and reclaiming it.
+  ///
+  /// NB: This code takes the "frozen" and "active" tables as arguments.
+  ///     Consequently, we don't care about arbitrary delays.  If a thread calls
+  ///     this, rehashes half the table, and then suspends, another thread can
+  ///     rehash everything else and install a new active table.  When the first
+  ///     thread wakes, it'll find a bunch of empty buckets, and it'll be safe.
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param a_ver  The active table version when this was called
+  /// @param f_tbl  The "frozen table", really the "source" table
+  /// @param a_tbl  The "active table", really the "destination" table
+  ///
+  /// @return {0}       if another thread stole the job of nulling `frozen`.
+  ///                   When this happens, there must be a concurrent resize,
+  ///                   and since both are trying to do the same thing (expand),
+  ///                   the one who receives {0} can just get out of the other's
+  ///                   way
+  ///         {integer} the new orec version of `active`
+  uint64_t prepare_resize(STMCAS *me, uint64_t a_ver, tbl_t *f_tbl,
+                          tbl_t *a_tbl) {
+    // NB: Right now, next_index == completed.  If we randomized the start
+    //     point, concurrent calls to prepare_resize() would contend less
+    uint64_t next_index = 0; // Next bucket to migrate
+    uint64_t completed = 0;  // Number of buckets migrated
+
+    // Migrate all data from `frozen` to `active`
+    while (completed != f_tbl->size) {
+      WSTEP tx(me);
+
+      // Try to rehash the next bucket
+      auto bucket = f_tbl->tbl[next_index].get(tx);
+      auto res =
+          rehash_expand_bucket(me, bucket, next_index, f_tbl->size, a_tbl, tx);
+      // If we can't acquire all nodes in this bucket, try again, because it
+      // might just mean someone else was doing an operation in the bucket.
+      if (res == CANNOT_ACQUIRE) {
+        tx.unwind();
+        continue;
+      }
+      // If this bucket is already rehashed by others, there is a chance that
+      // the current resize phase is finished, so check
+      if (res == ALREADY_RESIZED) {
+        // check if the active table version changed since resize() was
+        // called, if so, we know resize is finished, return
+        if (!tx.check_continuation(tbl_orec, a_ver)) {
+          tx.unwind();
+          return 0;
+        }
+      }
+
+      // Move to the next bucket
+      ++next_index;
+      ++completed;
+    }
+
+    // Uninstall the `frozen` table, since it has been emptied.  Save the commit
+    // time, so we can validate tbl_orec later.
+    tbl_t *old;
+    {
+      WSTEP tx(me);
+      if (tx.acquire_continuation(tbl_orec, a_ver)) {
+        old = f_tbl;
+        frozen.set(nullptr, tx);
+      } else
+        return 0;
+    }
+    auto last_commit_time = me->get_last_wo_end_time();
+
+    // Reclaim `old`'s buckets, then `old` itself
+    {
+      WSTEP tx(me);
+      for (size_t i = 0; i < f_tbl->size; i++) {
+        // use singleton_reclaim to reclaim head and tail of each bucket
+        auto head = old->tbl[i].get(tx);
+        auto tail = head->next.get(tx);
+        tx.reclaim(head);
+        tx.reclaim(tail);
+      }
+      tx.reclaim(old);
+    }
+    return last_commit_time;
+  }
+
+  /// Get a pointer to the bucket in the active table that holds `key`.  This
+  /// may cause some rehashing to happen.
+  ///
+  /// NB: The pattern here is unconventional.  get_bucket() is the first step in
+  ///     WSTEP transactions.  If it doesn't rehash, then the caller WSTEP
+  ///     continues its operation.  If it does rehash, then the caller WSTEP
+  ///     commits and restarts, which is a poor-man's open-nested transaction.
+  ///     If it encounters an inconsistency, the caller WSTEP should "abort" by
+  ///     unwinding and restarting. In the third case, this returns *while
+  ///     holding an orec*
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key whose bucket is sought
+  /// @param tx  An active WSTEP transaction
+  ///
+  /// @return On success, a pointer to the head of a bucket, along with
+  ///         `tbl_orec`'s value.  {nullptr, 0} on any rehash or inconsistency
+  node_ver_t get_bucket(STMCAS *me, const K &key, WSTEP &tx) {
+    // Get the head of the appropriate bucket in the active table
+    //
+    // NB: Validate or else a_tbl[a_idx] could be out of bounds
+    auto a_tbl = active.get(tx);
+    uint64_t a_ver = tx.check_orec(tbl_orec);
+    if (a_ver == STMCAS::END_OF_TIME)
+      return {nullptr, a_ver};
+    auto a_idx = table_hash(me, key, a_tbl->size);
+    auto a_bucket = a_tbl->tbl[a_idx].get(tx); // NB: caller will validate
+    if (a_bucket)
+      return {a_bucket, a_ver}; // not null --> no resize needed
+
+    // Find the bucket in the frozen table that needs rehashing
+    auto f_tbl = frozen.get(tx);
+    if (tx.check_orec(tbl_orec) == STMCAS::END_OF_TIME)
+      return {nullptr, 0}; // this op delayed, rehash finished by someone else!
+    auto f_idx = table_hash(me, key, f_tbl->size);
+    auto f_bucket = f_tbl->tbl[f_idx].get(tx);
+    if (!tx.acquire_consistent(f_bucket))
+      return {nullptr, 0}; // someone else is using the old bucket
+
+    // Rehash it, tell caller to commit so the rehash appears to be open nested
+    //
+    // NB: if the rehash fails, it's due to someone else rehashing, which is OK
+    rehash_expand_bucket(me, f_bucket, f_idx, f_tbl->size, a_tbl, tx);
+    return {nullptr, 0};
+  }
+
+  /// Re-hash one list in the frozen table into two lists in the active table
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param f_list A pointer to an (acquired!) list head in the frozen table
+  /// @param f_idx  The index of flist in the frozen table
+  /// @param f_size The size of the frozen table
+  /// @param a_tbl  A reference to the active table
+  /// @param tx     An active WSTEP transaction
+  ///
+  /// @return RESIZE_OK       - The frozen bucket was rehashed into `a_tbl`
+  ///         ALREADY_RESIZED - The frozen bucket was empty
+  ///         CANNOT_ACQUIRE  - The operation could not acquire all orecs
+  resize_result_t rehash_expand_bucket(STMCAS *me, sentinel_t *f_list,
+                                       uint64_t f_idx, uint64_t f_size,
+                                       tbl_t *a_tbl, WSTEP &tx) {
+    // Stop if this bucket is already rehashed
+    if (f_list->closed.get(tx)) // true is effectively const, skip validation
+      return ALREADY_RESIZED;
+    // Fail if we cannot acquire all nodes in f_list
+    if (!list_acquire_all(f_list, tx))
+      return CANNOT_ACQUIRE;
+
+    // Shuffle nodes from f_list into two new lists that will go into `a_tbl`
+    auto l1 = create_list(tx), l2 = create_list(tx);
+    auto curr = f_list->next.get(tx);
+    while (curr->next.get(tx) != nullptr) {
+      auto next = curr->next.get(tx);
+      auto data = static_cast<data_t *>(curr);
+      auto dest = table_hash(me, data->key, a_tbl->size) == f_idx ? l1 : l2;
+      auto succ = dest->next.get(tx);
+      dest->next.set(data, tx);
+      data->next.set(succ, tx);
+      data->prev.set(dest, tx);
+      succ->prev.set(data, tx);
+      curr = next;
+    }
+    // curr is tail, set head->tail
+    f_list->next.set(curr, tx);
+    // put the lists into the active table, close the frozen bucket
+    a_tbl->tbl[f_idx].set(l1, tx);
+    a_tbl->tbl[f_idx + f_size].set(l2, tx);
+    f_list->closed.set(true, tx);
+    return RESIZE_OK;
+  }
+
+  /// Acquire all of the nodes in the list starting at `head`, including the
+  /// head and tail sentinels
+  ///
+  /// @param head The head of the list whose nodes should be acquired
+  /// @param tail The calling WSTEP transaction
+  ///
+  /// @return true if all nodes are acquired, false otherwise
+  bool list_acquire_all(node_t *head, WSTEP &tx) {
+    node_t *curr = head;
+    while (curr) {
+      if (!tx.acquire_consistent(curr))
+        return false;
+      curr = curr->next.get(tx);
+    }
+    return true;
+  }
+
+  /// Given the head sentinel of a list, search through the list to find the
+  /// node with key `key`, if such a node exists in the list.  If it doesn't,
+  /// then return the head pointer, along with a count of non-sentinel nodes in
+  /// the list
+  ///
+  /// @param key  The key for which we are searching
+  /// @param head The start of the list to search
+  /// @param tx   An active WSTEP transaction
+  ///
+  /// @return {nullptr, 0}  if the transaction discovered an inconsistency
+  ///         {head, count} if the key was not found
+  ///         {node, 0}     if the key was found at `node`
+  std::pair<node_t *, uint64_t> list_get_or_head(const K &key, sentinel_t *head,
+                                                 WSTEP &tx) {
+    // Get the head's successor; on any inconsistency, return.
+    auto curr = head->next.get(tx);
+    uint64_t head_orec = tx.check_orec(head);
+    if (head_orec == STMCAS::END_OF_TIME)
+      return {nullptr, 0};
+
+    uint64_t count = 0; // Number of nodes encountered during the loop
+
+    while (true) {
+      // if we reached the tail, return the head
+      if (curr->next.get(tx) == nullptr)
+        return {head, count}; // No validation: tail's next is effectively const
+
+      // return curr if it has a matching key
+      if (static_cast<data_t *>(curr)->key == key)
+        return {curr, 0};
+
+      // read `next` consistently
+      //
+      // NB: We could skip this, and just validate before `return {curr, 0}`
+      auto next = curr->next.get(tx);
+      if (tx.check_orec(curr) == STMCAS::END_OF_TIME)
+        return {nullptr, 0};
+      curr = next;
+      ++count;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) {
+    while (true) {
+      WSTEP tx(me);
+      // Get the bucket in `active` where `key` should be.  "Abort" and retry on
+      // any inconsistency; commit and retry if `get_bucket` resized
+      auto [bucket, _] = get_bucket(me, key, tx);
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      auto [node, __] =
+          list_get_or_head(key, static_cast<sentinel_t *>(bucket), tx);
+
+      // If we got back null, there was an inconsistency, so retry
+      if (!node) {
+        tx.unwind();
+        continue;
+      }
+
+      // If we got back the head, return false
+      if (node == bucket) {
+        tx.unwind(); // because we didn't update shared memory
+        return false;
+      }
+
+      if (std::is_scalar<V>::value) {
+        data_t *dn = static_cast<data_t *>(node);
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (tx.check_orec(node) == STMCAS::END_OF_TIME) {
+          tx.unwind();
+          continue;
+        }
+        val = val_copy;
+        return true;
+      } else {
+        // Acquire, read, unwind (because no writes!)
+        if (!tx.acquire_consistent(node)) {
+          tx.unwind();
+          continue;
+        }
+        val = static_cast<data_t *>(node)->val;
+        tx.unwind();
+        return true;
+      }
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    // If we discover that a bucket becomes too full, we'll insert, linearize,
+    // and then resize in a new transaction before returning.  Tracking
+    // `active`'s version prevents double-resizing under concurrency.
+    uint64_t a_ver = 0;
+    while (true) {
+      WSTEP tx(me);
+      auto [bucket, a_version] = get_bucket(me, key, tx);
+      if (!bucket)
+        continue;
+      a_ver = a_version;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      auto [node, count] =
+          list_get_or_head(key, static_cast<sentinel_t *>(bucket), tx);
+
+      // If we got back null, there was an inconsistency, so retry
+      if (!node) {
+        tx.unwind();
+        continue;
+      }
+
+      // If we didn't get the head, the key already exists, so return false
+      if (node != bucket) {
+        tx.unwind();
+        return false;
+      }
+
+      // Lock the node and its successor
+      if (!tx.acquire_consistent(node)) {
+        tx.unwind();
+        continue;
+      }
+      auto next = node->next.get(tx);
+      if (!tx.acquire_aggressive(next)) {
+        tx.unwind();
+        continue;
+      }
+
+      // Stitch in a new node
+      data_t *new_dn = new data_t(key, val);
+      new_dn->next.set(next, tx);
+      new_dn->prev.set(node, tx);
+      node->next.set(new_dn, tx);
+      next->prev.set(new_dn, tx);
+      if (count >= RESIZE_THRESHOLD)
+        break; // need to resize!
+      return true;
+    }
+
+    resize(me, a_ver);
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    while (true) {
+      WSTEP tx(me);
+      // Get the bucket in `active` where `key` should be.  Abort and retry on
+      // any inconsistency; commit and retry if `get_bucket` resized
+      auto [bucket, _] = get_bucket(me, key, tx);
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      //
+      // NB: While `bucket` has not been reclaimed, `active.tbl` may have
+      //     changed.  Fortunately, list_get_or_head will validate it.
+      auto [node, __] =
+          list_get_or_head(key, static_cast<sentinel_t *>(bucket), tx);
+
+      // If we got back the head, return false
+      if (node == bucket) {
+        tx.unwind(); // because we didn't update shared memory
+        return false;
+      }
+
+      // If the `node` is null, list_get_or_head failed and we need to retry
+      // Otherwise, it's unowned and the keys match, so lock `node` and its
+      // neighbors, else retry
+      if (!node || !tx.acquire_consistent(node) ||
+          !tx.acquire_aggressive(node->prev.get(tx)) ||
+          !tx.acquire_aggressive(node->next.get(tx))) {
+        tx.unwind();
+        continue;
+      }
+
+      // unstitch it
+      auto pred = node->prev.get(tx), succ = node->next.get(tx);
+      pred->next.set(succ, tx);
+      succ->prev.set(pred, tx);
+      tx.reclaim(node);
+      return true;
+    }
+  }
+};
diff --git a/artifact/ds/STMCAS/dlist_omap.h b/artifact/ds/STMCAS/dlist_omap.h
new file mode 100644
index 0000000..3cdd5b7
--- /dev/null
+++ b/artifact/ds/STMCAS/dlist_omap.h
@@ -0,0 +1,351 @@
+#pragma once
+
+#include <atomic>
+#include <type_traits>
+
+/// An ordered map, implemented as a doubly-linked list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// Note that the AVOID_OREC_CHECKS flag can be used to create an "optimized"
+/// version of this data structure, where list traversal (get_leq) avoids
+/// checking orecs in most cases.
+///
+/// @param K                 The type of the keys stored in this map
+/// @param V                 The type of the values stored in this map
+/// @param STMCAS            The STMCAS implementation (PO or PS)
+/// @param AVOID_OREC_CHECKS A flag to enable an optimization that avoids
+///                          checking orecs when get_leq is doing its read-only
+///                          traversal
+template <typename K, typename V, class STMCAS, bool AVOID_OREC_CHECKS>
+class dlist_omap {
+  using WSTEP = typename STMCAS::WSTEP;
+  using RSTEP = typename STMCAS::RSTEP;
+  using snapshot_t = typename STMCAS::snapshot_t;
+  using ownable_t = typename STMCAS::ownable_t;
+  template <typename T> using FIELD = typename STMCAS::template sField<T>;
+
+  /// A list node.  It has prev and next pointers, but no key or value.  It's
+  /// useful for sentinels, so that K and V don't have to be default
+  /// constructable.
+  ///
+  /// NB: we do not need a `valid` bit, because any operation that would clear
+  ///     it would also acquire this node's orec, and thus any node that would
+  ///     encounter a cleared valid bit would also detect an orec inconsistency.
+  struct node_t : ownable_t {
+    FIELD<node_t *> prev; // Pointer to predecessor
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : ownable_t(), prev(nullptr), next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const, and
+  /// values are only accessed while the node is locked, so neither is a
+  /// tm_field.
+  struct data_t : public node_t {
+    const K key; // The key of this key/value pair
+    V val;       // The value of this key/value pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key The key that is stored in this node
+    /// @param _val The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~data_t() {}
+  };
+
+  /// The pair returned by predecessor queries: a node and it's observed version
+  struct leq_t {
+    node_t *_obj = nullptr; // The object
+    uint64_t _ver = 0;      // The observed version of the object
+  };
+
+  node_t *const head; // The list head pointer
+  node_t *const tail; // The list tail pointer
+
+  /// During get_leq, we have a way to periodically capture snapshots, so that a
+  /// failed search can resume from an intermediate point.  This specifies how
+  /// frequently to take a snapshot (higher is less frequent, i.e., once per
+  /// SNAPSHOT_FREQUENCY nodes).
+  const int SNAPSHOT_FREQUENCY;
+
+public:
+  /// Default construct a list by constructing and connecting two sentinel nodes
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object that has a `snapshot_freq` field
+  dlist_omap(STMCAS *me, auto *cfg)
+      : head(new node_t()), tail(new node_t()),
+        SNAPSHOT_FREQUENCY(cfg->snapshot_freq) {
+    // NB: Even though this code can't abort and doesn't acquire orecs, we still
+    //     need to use a transaction (WSTEP), because we can't set fields of a
+    //     node_t without a legal WSTEP context.  We can cheat, though, and not
+    //     bother to acquire orecs, because we know nothing is shared.
+    WSTEP tx(me);
+    head->next.set(tail, tx);
+    tail->prev.set(head, tx);
+  }
+
+private:
+  /// Convert a snapshot_t into a leq_t
+  leq_t leq(const snapshot_t &s) { return leq_t{(node_t *)s._obj, s._ver}; }
+
+  /// Convert a leq_t into a snapshot_t
+  snapshot_t snapshot(const leq_t &l) { return snapshot_t{l._obj, l._ver}; }
+
+  /// get_leq is an inclusive predecessor query that returns the largest node
+  /// whose key is <= the provided key.  It can return the head sentinel, but
+  /// not the tail sentinel.
+  ///
+  /// There is no atomicity between get_leq and its caller.  It returns the node
+  /// it found, along with the value of the orec for that node at the time it
+  /// was accessed.  The caller needs to validate the orec before using the
+  /// returned node.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for which we are doing a predecessor query.
+  ///
+  /// @return The node that was found, and its orec value
+  leq_t get_leq(STMCAS *me, const K key) {
+    // Start a transactional traversal from the head node, or from the latest
+    // valid snapshot, if we have one. If a transaction encounters an
+    // inconsistency, it will come back to here to start a new traversal.
+    while (true) {
+      RSTEP tx(me);
+
+      // Figure out where to start this traversal: initially we start at head,
+      // but on a retry, we might have a snapshot.
+      //
+      // NB: snapshots are always < key
+      leq_t curr =
+          (me->snapshots.empty()) ? leq_t({head, 0}) : leq(me->snapshots.top());
+
+      // We need to validate the start point.  A clever trick is that we know
+      // that it hasn't been reclaimed.  Thus we can read its next pointer
+      // before validating.  This helps with a pair of performance goals for the
+      // upcoming `while` loop: (1) read all fields before validating an object,
+      // and (2) avoid re-validating in subsequent loop iterations.
+      auto *next = curr._obj->next.get(tx);
+
+      // Validate the start point
+      if (curr._obj == head) {
+        // For head, be sure to save curr._ver in case we end up returning head
+        if ((curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+          continue;
+      } else {
+        // Validate snapshot as a continuation.  Drop the snapshot on failure.
+        if (!tx.check_continuation(curr._obj, curr._ver)) {
+          me->snapshots.drop();
+          continue;
+        }
+      }
+
+      // Prepare a countdown timer for snapshots
+      int nodes_until_snapshot = SNAPSHOT_FREQUENCY;
+
+      // Starting at `next`, search for key.  Breaking out of this will take us
+      // back to the top of the function.
+      while (true) {
+        // Case 1: `next` is tail --> stop the search at curr
+        if (next == tail) {
+          if (AVOID_OREC_CHECKS) {
+            if ((curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+              break;
+          } else {
+            // it's already validated
+          }
+          return curr;
+        }
+
+        // read next's `next` and `key`, then validate
+        //
+        // NB: key is const, doesn't require validation, but it's free here :)
+        auto next_next = next->next.get(tx);
+        auto nkey = static_cast<data_t *>(next)->key;
+        uint64_t next_ver = 0;
+        if (!AVOID_OREC_CHECKS) {
+          if ((next_ver = tx.check_orec(next)) == STMCAS::END_OF_TIME)
+            break; // validation failure... goto top, get a new snapshot
+        }
+
+        // Case 2: `next` is a data node: stop if next->key >= key
+        if (nkey > key) {
+          if (AVOID_OREC_CHECKS &&
+              (curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+            break;
+          return curr;
+        }
+        if (nkey == key) {
+          if (AVOID_OREC_CHECKS) {
+            next_ver = tx.check_orec(next);
+            if (next_ver == STMCAS::END_OF_TIME)
+              break; // retry
+          }
+          return {next, next_ver};
+        }
+
+        // Case 3: keep traversing to `next`.  Maybe take a snapshot first
+        if (--nodes_until_snapshot == 0) {
+          if (AVOID_OREC_CHECKS) {
+            if ((curr._ver = tx.check_orec(curr._obj)) != STMCAS::END_OF_TIME)
+              me->snapshots.push_back(snapshot(curr));
+          } else
+            me->snapshots.push_back(snapshot(curr));
+          nodes_until_snapshot = SNAPSHOT_FREQUENCY;
+        }
+        // NB: the way we pre-read things means only one check_orec per
+        //     iteration
+        curr._obj = next;
+        if (!AVOID_OREC_CHECKS)
+          curr._ver = next_ver;
+        next = next_next;
+      }
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) {
+    // If we can't use the result of get_leq, we'll loop back, and the next
+    // get_leq will start from a snapshot
+    me->snapshots.clear();
+    while (true) {
+      // get_leq will use a read-only transaction to find the largest node with
+      // a key <= `key`.
+      //
+      // Postconditions of get_leq: n != null, n != tail, we have a valid
+      // node/version pair, and n.key <= `key`
+      auto n = get_leq(me, key);
+
+      // Since we have EBR, we can read n.key without validating and fast-fail
+      // on key-not-found
+      if (n._obj == head || static_cast<data_t *>(n._obj)->key != key)
+        return false;
+
+      // Use a hand-over-hand TM pattern to finish the get().  If the value is
+      // scalar, we can cast it to atomic, read it, and validate.  Otherwise we
+      // need to lock the node.
+      if (std::is_scalar<V>::value) {
+        RSTEP tx(me);
+
+        // NB: given EBR, we don't need to worry about n._obj being deleted, so
+        //     we don't need to validate before looking at the value
+        data_t *dn = static_cast<data_t *>(n._obj);
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (!tx.check_continuation(n._obj, n._ver))
+          continue;
+        val = val_copy;
+        return true;
+      } else {
+        WSTEP tx(me);
+
+        // If this acquire_continuation succeeds, it's not deleted, it's a data
+        // node, and it's valid.  If it fails, we need to restart
+        if (!tx.acquire_continuation(n._obj, n._ver)) {
+          tx.unwind(); // not strictly needed, but a good habit :)
+          continue;
+        }
+
+        // NB: we aren't changing val, so we can unwind when we're done with it
+        val = static_cast<data_t *>(n._obj)->val;
+        tx.unwind();
+        return true;
+      }
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    // NB: The pattern here is similar to `get`
+    me->snapshots.clear();
+    while (true) {
+      auto n = get_leq(me, key);
+
+      // Since we have EBR, we can look at n._obj->key without validation.  If
+      // it matches `key`, return false.
+      if (n._obj != head && static_cast<data_t *>(n._obj)->key == key)
+        return false;
+
+      // Either n._obj is `head`, or it's a key that's too small.  Let's insert!
+      WSTEP tx(me);
+      // lock n and n's successor, fail if we can't lock both
+      if (!tx.acquire_continuation(n._obj, n._ver)) {
+        tx.unwind();
+        continue;
+      }
+      auto next = n._obj->next.get(tx);
+      if (!tx.acquire_aggressive(next)) { // NB: don't need consistency here
+        tx.unwind();
+        continue;
+      }
+
+      // stitch in a new node
+      data_t *new_dn = new data_t(key, val);
+      new_dn->next.set(next, tx);
+      new_dn->prev.set(n._obj, tx);
+      n._obj->next.set(new_dn, tx);
+      next->prev.set(new_dn, tx);
+      return true;
+    }
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    // NB: The pattern here is similar to `get`
+    me->snapshots.clear();
+    while (true) {
+      auto n = get_leq(me, key);
+
+      if (n._obj == head || static_cast<data_t *>(n._obj)->key != key)
+        return false;
+
+      WSTEP tx(me);
+      // lock n, then its neighbors
+      //
+      // NB: Locking `n` is the secret sauce for getting this all to work
+      //     without mark bits.  The orec change means that others will discover
+      //     that they can't use `n`, which is exactly what we want
+      if (!tx.acquire_continuation(n._obj, n._ver) ||
+          !tx.acquire_aggressive(n._obj->prev.get(tx)) ||
+          !tx.acquire_aggressive(n._obj->next.get(tx))) {
+        tx.unwind();
+        continue;
+      }
+
+      // unstitch it
+      auto pred = n._obj->prev.get(tx), succ = n._obj->next.get(tx);
+      pred->next.set(succ, tx);
+      succ->prev.set(pred, tx);
+      tx.reclaim(n._obj);
+      return true;
+    }
+  }
+};
diff --git a/artifact/ds/STMCAS/ibst_omap.h b/artifact/ds/STMCAS/ibst_omap.h
new file mode 100644
index 0000000..000ac55
--- /dev/null
+++ b/artifact/ds/STMCAS/ibst_omap.h
@@ -0,0 +1,397 @@
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <type_traits>
+
+/// An ordered map, implemented as an unbalanced, internal binary search tree.
+/// This map supports get(), insert(), and remove() operations.
+///
+/// @param K      The type of the keys stored in this map
+/// @param V      The type of the values stored in this map
+/// @param STMCAS The STMCAS implementation (PO or PS)
+template <typename K, typename V, class STMCAS> class ibst_omap {
+  using WSTEP = typename STMCAS::WSTEP;
+  using RSTEP = typename STMCAS::RSTEP;
+  using snapshot_t = typename STMCAS::snapshot_t;
+  using ownable_t = typename STMCAS::ownable_t;
+  template <typename T> using FIELD = typename STMCAS::template sField<T>;
+
+  /// An easy-to-remember way of indicating the left and right children
+  enum DIRS { LEFT = 0, RIGHT = 1 };
+
+  /// node_t is the base type for all tree nodes.  It doesn't have key/value
+  /// fields.
+  struct node_t : public ownable_t {
+    /// The node's children.  Be sure to use LEFT and RIGHT to index it
+    FIELD<node_t *> children[2];
+
+    /// Construct a node_t.  This should only be called from a writer
+    /// transaction
+    ///
+    /// @param tx     A writing transactional context
+    /// @param _left  The left child of this node
+    /// @param _right The right child of this node
+    node_t(WSTEP &tx, node_t *_left = nullptr, node_t *_right = nullptr)
+        : ownable_t() {
+      children[LEFT].set(_left, tx);
+      children[RIGHT].set(_right, tx);
+    }
+  };
+
+  /// A pair with ownable and orec value; equivalent to the type in snapshots
+  struct leq_t {
+    node_t *_obj = nullptr; // The object
+    uint64_t _ver = 0;      // The observed version of the object
+  };
+
+  /// A pair holding a child node and its parent, with orec validation info
+  struct ret_pair_t {
+    leq_t child;  // The child
+    leq_t parent; // The parent of that child
+  };
+
+  /// Our tree uses a sentinel root node, so that we always have a valid node
+  /// for which to compute an orec.  The sentinel's *LEFT* child is the true
+  /// root of the tree.  That is, logically sentinel has the value "TOP".
+  node_t *sentinel;
+
+  /// data_t is the type for all internal and leaf nodes in the data structure.
+  /// It extends the base type with a key and value.
+  ///
+  /// NB: keys are *not* const, because we want to overwrite nodes instead of
+  ///     swapping them
+  struct data_t : public node_t {
+    FIELD<K> key; // The key stored in this node
+    V val;        // The value stored in this node
+
+    /// Construct a node
+    ///
+    /// @param tx a WSTEP_TM reference
+    /// @param _left left child of the node
+    /// @param _right right child of the node
+    /// @param _key the key of the node
+    /// @param _val the value of the node
+    data_t(WSTEP &tx, node_t *_left, node_t *_right, const K &_key, V &_val)
+        : node_t(tx, _left, _right), key(_key), val(_val) {}
+  };
+
+public:
+  /// Default construct an empty tree
+  ///
+  /// @param _op The operation that is constructing the list
+  /// @param cfg A configuration object
+  ibst_omap(STMCAS *me, auto *cfg) {
+    // NB: Even though the constructor is operating on private data, it needs a
+    //     TM context in order to use tm_fields
+    WSTEP tx(me);
+    sentinel = new node_t(tx);
+  }
+
+private:
+  /// Search for a `key` in the tree, and return the node holding it, as well
+  /// as the node's parent.  If the key is not found, return null, and the
+  /// node that ought to be parent of the (not found) `key`.
+  ///
+  /// NB: The caller is responsible for clearing the checkpoint stack before
+  ///     calling get_node().
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search for
+  ///
+  /// @return {{found, orec}, {parent, orec}} if `key` is in the tree
+  ///         {{nullptr, 0},  {parent, orec}} if `key` is not in the tree
+  ret_pair_t get_node(STMCAS *me, const K &key) {
+    // This loop delineates the search transaction.  It commences from the end
+    // of the longest consistent prefix in the checkpoint stack
+    while (true) {
+      // Open a RSTEP transaction to traverse downward to the target node:
+      leq_t parent = {nullptr, 0}, child = {nullptr, 0};
+      RSTEP tx(me);
+
+      // Validate the checkpoints to find a starting point.  When this is done,
+      // there must be at least one entry in the checkpoints (the sentinel), and
+      // it must be valid.
+      //
+      // NB: When this step is done, the curr->child relationship is validated,
+      //     but we haven't read any of child's fields, or checked child's orec.
+      //     Every checkpointed node must be valid at the time of checkpointing.
+      //
+
+      // If the stack is empty or only holds the sentinel, start from {sentinel,
+      // root}
+      if (me->snapshots.size() <= 1) {
+        parent._obj = sentinel;
+        child._obj = parent._obj->children[LEFT].get(tx);
+        parent._ver = tx.check_orec(parent._obj);
+        if (parent._ver == STMCAS::END_OF_TIME)
+          continue; // retry
+        me->snapshots.clear();
+        me->snapshots.push_back({parent._obj, parent._ver});
+      }
+      // If the stack is larger, we can find the longest valid prefix
+      else {
+        // Trim the stack to a set of consistent checkpoints
+        for (auto cp = me->snapshots.begin(); cp != me->snapshots.end(); ++cp) {
+          if (!tx.check_continuation(cp->_obj, cp->_ver)) {
+            me->snapshots.reset(cp - me->snapshots.begin());
+            break; // the rest of the checkpoints aren't valid
+          }
+        }
+        // If we don't have more than a sentinel, restart
+        if (me->snapshots.size() <= 1)
+          continue;
+        // Use the key to choose a child of the last good checkpoint
+        auto top = me->snapshots.top();
+        parent._obj = static_cast<node_t *>(top._obj);
+        parent._ver = top._ver;
+        auto parent_key = static_cast<data_t *>(parent._obj)->key.get(tx);
+        child._obj = parent._obj->children[(key < parent_key) ? 0 : 1].get(tx);
+        // Validate that the read was valid
+        if (!tx.check_continuation(parent._obj, parent._ver))
+          continue;
+      }
+
+      // Traverse downward from the parent until we find null child or `key`
+      while (true) {
+        // nullptr == not found, so stop.  We know parent was valid, so we can
+        // just return it
+        if (!child._obj)
+          return {{nullptr, 0}, parent};
+
+        // It's time to move downward.  Read fields of child, then validate it.
+        //
+        // NB: we may not use grandchild, but it's better to read it here
+        auto child_key = static_cast<data_t *>(child._obj)->key.get(tx);
+        auto grandchild =
+            child._obj->children[(key < child_key) ? LEFT : RIGHT].get(tx);
+        child._ver = tx.check_orec(child._obj);
+        if (child._ver == STMCAS::END_OF_TIME)
+          break; // retry
+
+        // If the child key matches, return {child, parent}.  We know both are
+        // valid (parent came from stack; we just checked child)
+        //
+        // NB: the snapshotting code requires that no node with matching key
+        //     goes into `snapshots`
+        if (child_key == key)
+          return {child, parent};
+
+        // Otherwise add the child to the checkpoint stack and traverse downward
+        me->snapshots.push_back({child._obj, child._ver});
+        parent = child;
+        child = {grandchild, 0};
+      }
+    }
+  }
+
+  /// Given a node and its orec value, find the tree node that holds the key
+  /// that logically succeeds it (i.e., the leftmost descendent of the right
+  /// child)
+  ///
+  /// NB: The caller must ensure that `node` has a valid right child before
+  ///     calling this method
+  ///
+  /// @param me   The calling thread's descriptor
+  /// @param node An object and orec value to use as the starting point
+  ///
+  /// @return {{found, orec}, {parent, orec}} if no inconsistency occurs
+  ///         {{nullptr, 0},  {nullptr, 0}}   on any consistency violation
+  ret_pair_t get_succ_pair(STMCAS *me, leq_t &node) {
+    // NB: We expect the successor to be relatively close to the node, so we
+    //     don't bother with checkpoints.  However, we are willing to retry,
+    //     since it's unlikely that `node` itself will change.
+    while (true) {
+      RSTEP tx(me);
+      // Ensure `node` is not deleted before reading its fields
+      if (!tx.check_continuation(node._obj, node._ver))
+        return {{nullptr, 0}, {nullptr, 0}};
+
+      // Read the right child, ensure consistency
+      leq_t parent = node, child = {node._obj->children[RIGHT].get(tx), 0};
+      if (!tx.check_continuation(node._obj, node._ver))
+        return {{nullptr, 0}, {nullptr, 0}};
+
+      // Find the leftmost non-null node in the tree rooted at child
+      while (true) {
+        auto next = child._obj->children[LEFT].get(tx);
+        child._ver = tx.check_orec(child._obj);
+        if (child._ver == STMCAS::END_OF_TIME)
+          break; // retry
+        // If next is null, `child` is the successor.  Otherwise keep traversing
+        if (!next)
+          return {child, parent};
+        parent = child;
+        child = {next, 0};
+      }
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) {
+    me->snapshots.clear();
+    while (true) {
+      // Get the node that holds `key`, if it is present, and also its parent.
+      // If it isn't present, we'll get a null pointer.  That corresponds to a
+      // consistent read of the parent, which means we already linearized and
+      // we're done
+      auto [curr, _] = get_node(me, key);
+      if (curr._obj == nullptr)
+        return false;
+
+      // Use an optimistic read if V can be read atomically
+      if (std::is_scalar<V>::value) {
+        RSTEP tx(me);
+        auto *dn = static_cast<data_t *>(curr._obj);
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (!tx.check_continuation(curr._obj, curr._ver))
+          continue;
+        val = val_copy;
+        return true;
+      } else {
+        WSTEP tx(me);
+        if (!tx.acquire_continuation(curr._obj, curr._ver)) {
+          tx.unwind();
+          continue;
+        }
+        auto dn = static_cast<data_t *>(curr._obj);
+        val = dn->val;
+        tx.unwind(); // because this WSTEP_TM didn't write anything
+        return true;
+      }
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    me->snapshots.clear();
+    while (true) {
+      auto [child, parent] = get_node(me, key);
+      if (child._obj)
+        return false;
+      WSTEP tx(me);
+      if (tx.acquire_continuation(parent._obj, parent._ver)) {
+        // We must have a null child and a valid parent.  If it's sentinel, we
+        // must insert as LEFT.  Otherwise, compute which child to set.
+        auto cID = (parent._obj == sentinel ? LEFT : RIGHT) &
+                   (key > static_cast<data_t *>(parent._obj)->key.get(tx));
+        auto child = new data_t(tx, nullptr, nullptr, key, val);
+        parent._obj->children[cID].set(child, tx);
+        return true;
+      }
+    }
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    me->snapshots.clear();
+    while (true) {
+      auto [target, parent] = get_node(me, key);
+      if (target._obj == nullptr)
+        return false;
+
+      // Consistently read the target node's children
+      //
+      // NB: a concurrent thread could delete `target`, or could move `target`
+      //     as part of some other `remove`.  The call to `check_continuation()`
+      //     will detect these cases and restart.
+      data_t *t_child[2];
+      {
+        RSTEP tx(me);
+        t_child[RIGHT] =
+            static_cast<data_t *>(target._obj->children[RIGHT].get(tx));
+        t_child[LEFT] =
+            static_cast<data_t *>(target._obj->children[LEFT].get(tx));
+        if (!tx.check_continuation(target._obj, target._ver))
+          continue;
+      }
+
+      // If either child is null, and if the parent is still valid, then we can
+      // unstitch the target, link the parent to a grandchild and we're done.
+      if (!t_child[LEFT] || !t_child[RIGHT]) {
+        // Acquire the (possibly null) grandchild to link to the parent
+        auto gID = t_child[LEFT] ? LEFT : RIGHT;
+        WSTEP tx(me);
+        if (!tx.acquire_continuation(target._obj, target._ver) ||
+            !tx.acquire_continuation(parent._obj, parent._ver)) {
+          tx.unwind();
+          continue;
+        }
+
+        // Which child of the parent is target?
+        auto cID =
+            parent._obj->children[LEFT].get(tx) == target._obj ? LEFT : RIGHT;
+
+        // Unstitch and reclaim
+        parent._obj->children[cID].set(t_child[gID], tx);
+        tx.reclaim(target._obj);
+        return true;
+      }
+
+      // `target` has two children.  WLOG, the leftmost descendent of the right
+      // child is `target`'s successor, and must have at most one child.  We
+      // want to put that node's key and value into `target`, and then remove
+      // that node by setting its parent's LEFT to its RIGHT (which might be
+      // null).
+      auto [succ, s_parent] = get_succ_pair(me, target);
+      if (!succ._obj)
+        continue;
+
+      // If target's successor is target's right child, then target._ver must
+      // equal s_parent._ver.  As long as we lock target._obj before we try
+      // to lock s_parent._obj, we'll get the check for free.
+      {
+        WSTEP tx(me);
+
+        if (!tx.acquire_continuation(target._obj, target._ver) ||
+            !tx.acquire_continuation(succ._obj, succ._ver) ||
+            !tx.acquire_continuation(s_parent._obj, s_parent._ver)) {
+          tx.unwind();
+          continue;
+        } // Postcondition of acquisition: target, succ, and s_parent are valid
+
+        // Copy `succ`'s key/value into `target`
+        static_cast<data_t *>(target._obj)
+            ->key.set(static_cast<data_t *>(succ._obj)->key.get(tx), tx);
+        static_cast<data_t *>(target._obj)->val =
+            static_cast<data_t *>(succ._obj)->val;
+
+        // Unstitch `succ` by setting its parent's left to its right
+        // Case 1: there are intermediate nodes between target and successor
+        if (s_parent._obj != target._obj)
+          s_parent._obj->children[LEFT].set(succ._obj->children[RIGHT].get(tx),
+                                            tx);
+        // Case 2: target is successor's parent
+        else
+          s_parent._obj->children[RIGHT].set(succ._obj->children[RIGHT].get(tx),
+                                             tx);
+        tx.reclaim(succ._obj);
+        return true;
+      }
+    }
+  }
+};
diff --git a/artifact/ds/STMCAS/rbtree_omap.h b/artifact/ds/STMCAS/rbtree_omap.h
new file mode 100644
index 0000000..3fceba3
--- /dev/null
+++ b/artifact/ds/STMCAS/rbtree_omap.h
@@ -0,0 +1,948 @@
+#pragma once
+
+#include <assert.h>
+#include <atomic>
+#include <cstdint>
+#include <iostream>
+#include <type_traits>
+
+/// An ordered map, implemented as an unbalanced, internal binary search tree.
+/// This map supports get(), insert(), and remove() operations.
+///
+/// @param K      The type of the keys stored in this map
+/// @param V      The type of the values stored in this map
+/// @param STMCAS The STMCAS implementation (PO or PS)
+template <typename K, typename V, class STMCAS> class rbtree_omap {
+  using WSTEP = typename STMCAS::WSTEP;
+  using RSTEP = typename STMCAS::RSTEP;
+  using snapshot_t = typename STMCAS::snapshot_t;
+  using ownable_t = typename STMCAS::ownable_t;
+  template <typename T> using FIELD = typename STMCAS::template sField<T>;
+
+  /// An easy-to-remember way of indicating the left and right children
+  enum DIRS { LEFT = 0, RIGHT = 1 };
+
+  /// the color of a node
+  enum COLOR { RED = 0, BLACK = 1 };
+
+  /// node_t is the base type for all tree nodes.  It doesn't have key/value
+  /// fields.
+  struct node_t : public ownable_t {
+    FIELD<node_t *> children[2]; // The node's children; index with LEFT/RIGHT
+    FIELD<COLOR> color;          // The node's color
+
+    /// Construct a node_t.  This should only be called from a writer
+    /// transaction
+    ///
+    /// @param tx      A writing transactional context
+    /// @param _color  The color for this node
+    /// @param _left   The left child of this node
+    /// @param _right  The right child of this node
+    node_t(WSTEP &tx, COLOR _color, node_t *_left = nullptr,
+           node_t *_right = nullptr)
+        : ownable_t() {
+      color.set(_color, tx);
+      children[LEFT].set(_left, tx);
+      children[RIGHT].set(_right, tx);
+    }
+  };
+
+  /// The pair returned by get_leq; equivalent to the type in snapshots
+  struct leq_t {
+    node_t *_obj = nullptr; // The object
+    uint64_t _ver = 0;      // The observed version of the object
+  };
+
+  /// Our tree uses a sentinel root node, so that we always have a valid node
+  /// for which to compute an orec.  The sentinel's *LEFT* child is the true
+  /// root of the tree.  That is, logically sentinel has the value "TOP".
+  node_t *sentinel;
+
+  /// data_t is the type for all internal and leaf nodes in the data structure.
+  /// It extends the base type with a key and value.
+  ///
+  /// NB: keys are *not* const, because we want to overwrite nodes instead of
+  ///     swapping them
+  struct data_t : public node_t {
+    FIELD<K> key;           // The key stored in this node
+    V val;                  // The value stored in this node
+    FIELD<node_t *> parent; // The node's parent
+
+    /// Construct a node
+    ///
+    /// @param tx      A writing transaction context
+    /// @param _parent The node's parent
+    /// @param _left   The node's left child
+    /// @param _right  The node's right child
+    /// @param _key    The node's key
+    /// @param _val    The node's value
+    /// @param _color  The color of this node
+    data_t(WSTEP &tx, node_t *_parent, node_t *_left, node_t *_right,
+           const K &_key, V &_val, COLOR _color)
+        : node_t(tx, _color, _left, _right), key(_key), val(_val),
+          parent(_parent) {}
+  };
+
+public:
+  /// Default construct an empty tree
+  ///
+  /// @param me  The operation that is constructing the tree
+  /// @param cfg An unused configuration object
+  rbtree_omap(STMCAS *me, auto *cfg) {
+    // NB: Even though the constructor is operating on private data, it needs a
+    //     TM context for the constructor
+    WSTEP tx(me);
+    sentinel = new node_t(tx, BLACK);
+  }
+
+private:
+  /// Search for a `key` in the tree, and return the node holding it.  If the
+  /// key is not found, return the node that ought to be parent of the (not
+  /// found) `key`.
+  ///
+  /// NB: The caller is responsible for clearing the checkpoint stack before
+  ///     calling get_node().
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search for
+  ///
+  /// @return {found, orec}  if `key` is in the tree;
+  ///         {parent, orec} if `key` is not in the tree
+  leq_t get_node(STMCAS *me, const K &key) const {
+    // This loop delineates the search transaction.  It commences from the end
+    // of the longest consistent prefix in the checkpoint stack
+    while (true) {
+      // Open a RSTEP transaction to traverse downward to the target node:
+      leq_t parent = {nullptr, 0}, child = {nullptr, 0};
+      RSTEP tx(me);
+
+      // Validate the checkpoints to find a starting point.  When this is done,
+      // there must be at least one entry in the checkpoints (the sentinel), and
+      // it must be valid.
+      //
+      // NB: When this step is done, the curr->child relationship is validated,
+      //     but we haven't read any of child's fields, or checked child's orec.
+      //     Every checkpointed node must be valid at the time of checkpointing.
+
+      // If stack is empty or only holds sentinel, start from {sentinel, root}
+      if (me->snapshots.size() <= 1) {
+        parent._obj = sentinel;
+        child._obj = parent._obj->children[LEFT].get(tx);
+        parent._ver = tx.check_orec(parent._obj);
+        if (parent._ver == STMCAS::END_OF_TIME)
+          continue; // retry
+        me->snapshots.clear();
+        me->snapshots.push_back({parent._obj, parent._ver});
+      }
+      // If the stack is larger, we can find the longest valid prefix
+      else {
+        // Trim the stack to a set of consistent checkpoints
+        for (auto cp = me->snapshots.begin(); cp != me->snapshots.end(); ++cp) {
+          if (!tx.check_continuation(cp->_obj, cp->_ver)) {
+            me->snapshots.reset(cp - me->snapshots.begin());
+            break; // the rest of the checkpoints aren't valid
+          }
+        }
+        // If we don't have more than a sentinel, restart
+        if (me->snapshots.size() <= 1)
+          continue;
+        // Use the key to choose a child of the last good checkpoint
+        //
+        // NB: top.key != key, because we never put a matching key into
+        //     snapshots, and if a remove caused a key to change, we'll fail to
+        //     validate that node.
+        auto top = me->snapshots.top();
+        parent = {static_cast<node_t *>(top._obj), top._ver};
+        auto parent_key = static_cast<data_t *>(parent._obj)->key.get(tx);
+        child._obj = parent._obj->children[(key < parent_key) ? 0 : 1].get(tx);
+        // Validate that the reads of parent were valid
+        if (!tx.check_continuation(parent._obj, parent._ver))
+          continue;
+      }
+
+      // Traverse downward from the parent until we find null child or `key`
+      while (true) {
+        // nullptr == not found, so stop.  Parent was valid, so return it
+        if (!child._obj)
+          return parent;
+
+        // It's time to move downward.  Read fields of child, then validate it.
+        //
+        // NB: we may not use grandchild, but it's better to read it here
+        auto child_key = static_cast<data_t *>(child._obj)->key.get(tx);
+        auto grandchild =
+            child._obj->children[(key < child_key) ? LEFT : RIGHT].get(tx);
+        child._ver = tx.check_orec(child._obj);
+        if (child._ver == STMCAS::END_OF_TIME)
+          break; // retry
+
+        // If the child key matches, return {child, parent}.  We know both are
+        // valid (parent came from stack; we just checked child)
+        //
+        // NB: the snapshot code requires that no node with matching key goes
+        //     into `snapshots`
+        if (child_key == key)
+          return child;
+
+        // Otherwise add the child to the checkpoint stack and traverse downward
+        me->snapshots.push_back({child._obj, child._ver});
+        parent = child;
+        child = {grandchild, 0};
+      }
+    }
+  }
+
+  /// Given a node and its orec value, find the tree node that holds the key
+  /// that logically succeeds it (i.e., the leftmost descendent of the right
+  /// child)
+  ///
+  /// NB: The caller must ensure that `node` has a valid right child before
+  ///     calling this method
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param node An object and orec value to use as the starting point
+  ///
+  /// @return {{found, orec}, {parent, orec}} if no inconsistency occurs
+  ///         {{nullptr, 0},  {nullptr, 0}}   on any consistency violation
+  leq_t get_succ(STMCAS *me, leq_t &node) {
+    // NB: We expect the successor to be relatively close to the node, so we
+    //     don't bother with checkpoints.  However, we are willing to retry,
+    //     since it's unlikely that `node` itself will change.
+    while (true) {
+      RSTEP tx(me);
+      // Read the right child, ensure consistency
+      //
+      // NB: Since we have smr, we can read `node` even if it is deleted.  The
+      //     subsequent validation will suffice.
+      leq_t child = {node._obj->children[RIGHT].get(tx), 0};
+      if (!tx.check_continuation(node._obj, node._ver))
+        return {nullptr, 0};
+
+      // Find the leftmost non-null node in the tree rooted at child
+      while (true) {
+        auto next = child._obj->children[LEFT].get(tx);
+        child._ver = tx.check_orec(child._obj);
+        if (child._ver == STMCAS::END_OF_TIME)
+          break; // retry
+        // If next is null, `child` is the successor.  Otherwise keep traversing
+        if (!next)
+          return child;
+        child = {next, 0};
+      }
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) const {
+    me->snapshots.clear();
+    while (true) {
+      // Get the node that holds `key`, if it is present. If it isn't present,
+      // we'll get the parent of where it would be.  Whatever we get is
+      // validated, so if it's the sentinel, we're done.
+      auto curr = get_node(me, key);
+      if (curr._obj == sentinel)
+        return false;
+
+      // Use an optimistic read if V can be read atomically
+      if (std::is_scalar<V>::value) {
+        RSTEP tx(me);
+        auto *dn = static_cast<data_t *>(curr._obj);
+        auto dn_key = dn->key.get(tx);
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (!tx.check_continuation(curr._obj, curr._ver))
+          continue;
+        if (dn_key != key)
+          return false;
+        val = val_copy;
+        return true;
+      } else {
+        WSTEP tx(me);
+        if (!tx.acquire_continuation(curr._obj, curr._ver)) {
+          tx.unwind();
+          continue;
+        }
+        auto dn = static_cast<data_t *>(curr._obj);
+        if (dn->key.get(tx) != key) {
+          tx.unwind();
+          return false;
+        }
+        val = dn->val;
+        tx.unwind(); // because this WSTEP_TM didn't write anything
+        return true;
+      }
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    me->snapshots.clear();
+    while (true) {
+      // Get the node that holds `key`, if it is present.  If it isn't present,
+      // we'll get the parent of where it would be.  Whatever we get is
+      // validated, so if it matches, we're done.
+      auto leq = get_node(me, key);
+
+      // We're going to assume that we'll insert, so open a WSTEP transaction.
+      // If we can't lock the node, restart
+      WSTEP tx(me);
+      if (!tx.acquire_continuation(leq._obj, leq._ver))
+        continue;
+
+      // If the key matches, the insertion attempt fails
+      if (leq._obj != sentinel &&
+          static_cast<data_t *>(leq._obj)->key.get(tx) == key) {
+        tx.unwind();
+        return false;
+      }
+
+      // We must have a null child and a valid parent.  If it's sentinel, we
+      // must insert as LEFT.  Otherwise, compute which child to set.
+      node_t *parent = leq._obj;
+      auto cID = (leq._obj == sentinel ? LEFT : RIGHT) &
+                 (key > static_cast<data_t *>(parent)->key.get(tx));
+
+      // We are strict 2PL here: first we must acquire everything that will be
+      // written.  `fix_root` tracks if the root will need special cleanup.
+      bool fix_root = false;
+      if (!insert_acquire_aggressive_all(cID, static_cast<data_t *>(parent), tx,
+                                         fix_root)) {
+        tx.unwind();
+        continue;
+      }
+
+      // Now we can link the child to the parent
+      auto child = new data_t(tx, parent, nullptr, nullptr, key, val, RED);
+      tx.acquire_aggressive(child);
+      parent->children[cID].set(child, tx);
+
+      // Rebalance in response to this insertion, then we're done
+      insert_fixup(child, tx, fix_root);
+      return true;
+    }
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    me->snapshots.clear();
+    while (true) {
+      // Get the node that holds `key`, if it is present.  If it isn't present,
+      // we'll get the parent of where it would be.  Whatever we get is
+      // validated, so if it's sentinel, we're done.
+      auto target = get_node(me, key);
+      if (target._obj == sentinel)
+        return false;
+
+      // Read the node's key and its children; detect if the key doesn't match
+      //
+      // NB: we can't open a WSTEP yet, because an upcoming call to get_succ
+      // does
+      //     is going to use an RSTEP
+      data_t *t_child[2];
+      {
+        RSTEP tx(me);
+        auto dn_key = static_cast<data_t *>(target._obj)->key.get(tx);
+        t_child[RIGHT] =
+            static_cast<data_t *>(target._obj->children[RIGHT].get(tx));
+        t_child[LEFT] =
+            static_cast<data_t *>(target._obj->children[LEFT].get(tx));
+        if (!tx.check_continuation(target._obj, target._ver))
+          continue;
+        if (dn_key != key)
+          return false;
+      }
+
+      // If target has <=1 child, then we will un-stitch by pointing its parent
+      // to that child.  Otherwise, we'll un-stitch by swapping target with its
+      // successor and then removing the successor by pointing successor's
+      // parent to successor's child.  Here's where we get the child and
+      // successor
+      leq_t succ = {target._obj, target._ver}; // succ is target if only 1 child
+      data_t *child = nullptr;                 // The child who gets swapped up
+      if (!t_child[LEFT]) {
+        child = t_child[RIGHT];
+      } else if (!t_child[RIGHT]) {
+        child = t_child[LEFT];
+      } else {
+        succ = get_succ(me, target);
+        if (!succ._obj)
+          continue;
+        RSTEP tx(me);
+        child = static_cast<data_t *>(succ._obj->children[RIGHT].get(tx));
+        if (!tx.check_continuation(succ._obj, succ._ver))
+          continue;
+      }
+
+      // We're going to assume that we'll remove, so open a WSTEP transaction
+      // and acquire succ and child.
+      //
+      // NB: acquire continuation on succ is necessary regardless of whether
+      //     it's target or not, but child can be aggressive.
+      WSTEP tx(me);
+      if (!tx.acquire_continuation(succ._obj, succ._ver) ||
+          (child && !tx.acquire_aggressive(child))) {
+        tx.unwind();
+        continue;
+      }
+
+      // Now acquire child's children, if child is not null
+      {
+        auto x_l = child ? child->children[LEFT].get(tx) : nullptr;
+        auto x_r = child ? child->children[RIGHT].get(tx) : nullptr;
+        if ((x_l && !tx.acquire_aggressive(x_l)) ||
+            (x_r && !tx.acquire_aggressive(x_r))) {
+          tx.unwind();
+          continue;
+        }
+      }
+
+      // We are strict 2PL here: first we must acquire everything that will be
+      // written.  remove_acquire_aggressive_all does most of the job:
+      if (!remove_acquire_aggressive_all(
+              child, static_cast<data_t *>(succ._obj), tx)) {
+        tx.unwind();
+        continue;
+      }
+
+      // Lastly, we need to acquire the target and the successor's parent
+      if ((!tx.acquire_continuation(target._obj, target._ver)) ||
+          (!tx.acquire_aggressive(
+              static_cast<data_t *>(succ._obj)->parent.get(tx)))) {
+        tx.unwind();
+        continue;
+      }
+
+      // Now we can start moving keys and values, unstitching, and cleaning up
+
+      // We need the successor's original color to know if we need to call fixup
+      auto original_succ_color = succ._obj->color.get(tx);
+      // If we call fixup, we need the child's CID and parent
+      DIRS cID_c;
+      node_t *c_parent;
+
+      // If either child is null, and if the parent is still valid, then we can
+      // un-stitch target, then link target's parent to target's grandchild
+      if (!t_child[LEFT] || !t_child[RIGHT]) {
+        // get target's parent, figure out which of its children target is:
+        c_parent = static_cast<data_t *>(target._obj)->parent.get(tx);
+        cID_c = c_parent->children[LEFT].get(tx) == target._obj ? LEFT : RIGHT;
+        // Unstitch, reclaim
+        c_parent->children[cID_c].set(child, tx);
+        if (child)
+          child->parent.set(c_parent, tx);
+        tx.reclaim(target._obj);
+      }
+      // When both children of target are not null, we have to swap, then
+      // unstitch
+      else {
+        // Get the successor's parent, then copy succ's k/v into target
+        auto s_p = static_cast<data_t *>(succ._obj)->parent.get(tx);
+        auto dn = static_cast<data_t *>(target._obj);
+        dn->key.set(static_cast<data_t *>(succ._obj)->key.get(tx), tx);
+        dn->val = static_cast<data_t *>(succ._obj)->val;
+
+        // Unstitch `succ` by setting its parent's left to its right (i.e.,
+        // child)
+
+        // Case 1: there are intermediate nodes between target and successor
+        if (s_p != target._obj) {
+          s_p->children[LEFT].set(child, tx);
+          cID_c = LEFT;
+        }
+        // Case 2: target is successor's parent
+        else {
+          s_p->children[RIGHT].set(child, tx);
+          cID_c = RIGHT;
+        }
+        // don't forget the back-link from child to parent
+        if (child)
+          child->parent.set(s_p, tx);
+
+        tx.reclaim(succ._obj);
+        c_parent = s_p;
+      }
+
+      // Rebalance and recolor in response to this removal, then we're done
+      if (original_succ_color == BLACK)
+        remove_fixup(child, static_cast<data_t *>(c_parent), cID_c, tx);
+      return true;
+    }
+  }
+
+private:
+  /// Acquire all of the nodes that will need to change if `z_p` is to receive a
+  /// new child in position `CID_z`.
+  ///
+  /// @param cID_z    The index of the child being added to z_p
+  /// @param z_p      The (acquired) node who will be receiving a new child
+  /// @param tx       A writing transaction context
+  /// @param fix_root A reference parameter indicating if the root was reached
+  ///
+  /// @return True if all nodes were acquired, false otherwise
+  bool insert_acquire_aggressive_all(int cID_z, data_t *z_p, WSTEP &tx,
+                                     bool &fix_root) {
+    // If we're giving the sentinel a child, then we immediately stop
+    // traversing upward... the sentinel is already locked.
+    if (z_p == sentinel) {
+      fix_root = true;
+      return true;
+    }
+
+    // z is the child of z_p.  In the first round, it's the to-insert node,
+    // which we haven't created yet, so we let it be null and pretend it's RED
+    data_t *z = nullptr;
+
+    // Invariant: z_p is already on each iteration
+    while (z_p->color.get(tx) == RED) {
+      // Acquire the grandparent
+      data_t *z_p_p = static_cast<data_t *>(z_p->parent.get(tx));
+      if (!tx.acquire_aggressive(z_p_p))
+        return false;
+
+      // Now acquire z's aunt (z_a) (z_p's sibling) if it exists
+      auto cID_z_p = z_p == z_p_p->children[LEFT].get(tx) ? LEFT : RIGHT;
+      auto cID_z_a = cID_z_p == LEFT ? RIGHT : LEFT;
+      data_t *z_a = static_cast<data_t *>(z_p_p->children[cID_z_a].get(tx));
+      if (z_a && !tx.acquire_aggressive(z_a))
+        return false;
+
+      // case 1: z_a is RED --> colors will propagate
+      // z_p_p
+      //   / \.
+      // z_p  z_a
+      //  |
+      //  z
+      if (z_a && static_cast<data_t *>(z_a)->color.get(tx) == RED) {
+        // NB: {z, z_p, z_a, z_p_p} are acquired, but we're going to jump to the
+        //     great grandparent.  The loop invariant requires us to acquire it
+        //     now.
+        z = static_cast<data_t *>(z_p_p);
+        z_p = static_cast<data_t *>(z->parent.get(tx));
+        if (!tx.acquire_aggressive(z_p))
+          return false;
+        if (z_p == sentinel) // we painted z which is root, we need to fix that
+          fix_root = true;
+        cID_z = z == z_p->children[LEFT].get(tx) ? LEFT : RIGHT;
+        continue;
+      }
+
+      // Invariant: z_a is black or nullptr
+
+      // case 2: cID_z != cID_z_p --> do a /cID_z_p/ rotation on z_p
+      //  z_p_p
+      //   / \.
+      // z_a z_p
+      //     /
+      //    z
+      //   / \.
+      // z_e  z_c
+      if (cID_z != cID_z_p) {
+        // NB: z == nullptr should only be true in the first iteration
+        //
+        // NB: {z, z_p, z_p_p} are acquired.  one of z's children will get a new
+        //     parent, so acquire it
+        data_t *z_c =
+            z ? static_cast<data_t *>(z->children[cID_z_p].get(tx)) : nullptr;
+        if (z_c && !tx.acquire_aggressive(z_c))
+          return false;
+
+        // Now we roll right into case 3 and finish the rebalance:
+        // case 2->3: perform a /cID_z_a/_rotation on z_p_p
+        // z_p_p_p
+        //    |
+        //  z_p_p
+        //    / \.
+        //   z_a z (==>new z_p)
+        //      / \.
+        //     z_e z_p (==> new z)
+        //         /
+        //       z_c
+
+        // {z, z_p_p} are already acquired.  We need to acquire z_e and z_p_p_p
+        data_t *z_e =
+            z ? static_cast<data_t *>(z->children[cID_z_a].get(tx)) : nullptr;
+        if (z_e && !tx.acquire_aggressive(z_e))
+          return false;
+
+        auto z_p_p_p = z_p_p->parent.get(tx);
+        if (!tx.acquire_aggressive(z_p_p_p))
+          return false;
+        if (z_p_p_p == sentinel)
+          fix_root = true;
+        return true;
+      }
+
+      // case 3: perform a /cID_y/_rotation on z_p_p
+      //        z_p_p_p
+      //           |
+      //         z_p_p
+      //          / \.
+      //         z_a z_p
+      //             / \.
+      //             w  z
+
+      // {z_p, z_p_p} are already acquired.  We need to acquire w and z_p_p_p
+      auto z_p_p_p = z_p_p->parent.get(tx);
+      if (!tx.acquire_aggressive(z_p_p_p))
+        return false;
+      if (z_p_p_p == sentinel)
+        fix_root = true;
+
+      auto w = z_p->children[cID_z_a].get(tx);
+      if (w && !tx.acquire_aggressive(w))
+        return false;
+      return true;
+    }
+
+    // At last, we've acquired everything we need, and can stop
+    return true;
+  }
+
+  /// Do all of the rotations and color changes that correspond to z being
+  /// inserted into the tree.  This should only be called after
+  /// insert_acquire_aggressive_all has acquired everything that this method
+  /// will modify.  Consequently, this code is identical to the sequential code.
+  ///
+  /// @param z        The new child being added
+  /// @param tx       A writing transaction context
+  /// @param fix_root Is the root acquired?
+  void insert_fixup(data_t *z, WSTEP &tx, bool fix_root) {
+    auto z_p = z->parent.get(tx);
+    // Normal case: z is not the root
+    while (z_p->color.get(tx) == RED) {
+      auto cID_z = z == z_p->children[LEFT].get(tx) ? LEFT : RIGHT;
+      node_t *z_p_p = static_cast<data_t *>(z_p)->parent.get(tx);
+      auto cID_z_p = z_p == z_p_p->children[LEFT].get(tx) ? LEFT : RIGHT;
+      auto cID_z_a = cID_z_p == LEFT ? RIGHT : LEFT;
+      data_t *z_a = static_cast<data_t *>(z_p_p->children[cID_z_a].get(tx));
+      // case 1:
+      if (z_a && z_a->color.get(tx) == RED) {
+        z_p->color.set(BLACK, tx);
+        z_a->color.set(BLACK, tx);
+        z_p_p->color.set(RED, tx);
+        z = static_cast<data_t *>(z_p_p);
+        z_p = z->parent.get(tx);
+        continue;
+      }
+
+      // case 2
+      if (cID_z == cID_z_a) {
+        z = static_cast<data_t *>(z_p);
+        if (cID_z == RIGHT)
+          left_rotate(z, tx);
+        else
+          right_rotate(z, tx);
+        z_p = z->parent.get(tx);
+        z_p_p = static_cast<data_t *>(z_p)->parent.get(tx);
+      }
+
+      // case 3 (includes fallthrough from 2->3)
+      z_p->color.set(BLACK, tx);
+      z_p_p->color.set(RED, tx);
+      if (cID_z_a == RIGHT)
+        right_rotate(static_cast<data_t *>(z_p_p), tx);
+      else
+        left_rotate(static_cast<data_t *>(z_p_p), tx);
+    }
+
+    // Clean up the root if necessary
+    if (fix_root) {
+      auto r = sentinel->children[LEFT].get(tx);
+      static_cast<data_t *>(r)->color.set(BLACK, tx);
+    }
+  }
+
+  /// Acquire all of the nodes that will need to change if `y` is to be removed
+  /// and `x` is to move into its place
+  ///
+  /// @param x The node that moves upward
+  /// @param y The node that will be removed
+  /// @param tx A writing transaction context
+  ///
+  /// @return True if all nodes were acquired, false otherwise
+  bool remove_acquire_aggressive_all(data_t *x, data_t *y, WSTEP &tx) {
+    // If `y` isn't black, we won't have to do any rebalancing
+    if (y->color.get(tx) != BLACK)
+      return true;
+
+    // When x swaps into y's place, it gets y's parent but keeps its color
+    auto x_color = x ? x->color.get(tx) : BLACK;
+    x = y;
+
+    // loop invariants: x != nullptr, x.color == black, x is acquired
+    while (x->parent.get(tx) != sentinel && x_color == BLACK) {
+      data_t *x_p = static_cast<data_t *>(x->parent.get(tx));
+      if (!tx.acquire_aggressive(x_p))
+        return false;
+
+      // We need to know if x is left or right, and we need its sibling (w)
+      DIRS cID_x = LEFT, cID_w = RIGHT;
+      if (x != x_p->children[LEFT].get(tx)) {
+        cID_x = RIGHT;
+        cID_w = LEFT;
+      }
+      auto w = x_p->children[cID_w].get(tx);
+      if (!tx.acquire_aggressive(w))
+        return false;
+
+      // case 1: Do a cID_x rotation to push x down
+      // x_p_p
+      //   |
+      //  x_p
+      //    \.
+      //     w
+      //    /
+      //   w_c
+      if (static_cast<data_t *>(w)->color.get(tx) == RED) {
+        // {x_p, w} are acquired.  Need to acquire cID_x'th child of w and x_p_p
+        data_t *w_c = static_cast<data_t *>(w->children[cID_x].get(tx));
+        auto x_p_p = x_p->parent.get(tx);
+        if ((w_c && !tx.acquire_aggressive(w_c)) ||
+            (!tx.acquire_aggressive(x_p_p)))
+          return false;
+        // w_c's children's colors determine if we need to propagate.
+        // Since we're going to read both children's colors, we need to acquire
+        // them (as w_c_c and w_c_e)
+        data_t *w_c_c =
+            w_c ? static_cast<data_t *>(w_c->children[cID_x].get(tx)) : nullptr;
+        data_t *w_c_e =
+            w_c ? static_cast<data_t *>(w_c->children[cID_w].get(tx)) : nullptr;
+
+        if ((w_c_c && !tx.acquire_aggressive(w_c_c)) ||
+            (w_c_e && !tx.acquire_aggressive(w_c_e)))
+          return false;
+
+        // If case 1 becomes case 3, we do a cID_w-rotation on w_c
+        //  x_p
+        //    \.
+        //    w_c
+        //    /
+        //  w_c_c
+        //    \.
+        //     w_c_c_e
+        // If case 1 becomes case 3 becomes case 4, we need to do a
+        // /cID_x/_rotation on x_p
+        //   w
+        //  /
+        // x_p
+        //   \.
+        //   w_c_c
+        //   /
+        // w_c_c_c
+        if (((w_c_e && w_c_e->color.get(tx) == BLACK) || !w_c_e) && w_c_c &&
+            w_c_c->color.get(tx) == RED) {
+          // {w, w_c_c, x_p} are already acquired.  Need to acquire w_c_c's
+          // children
+          data_t *w_c_c_c =
+              w_c_c ? static_cast<data_t *>(w_c_c->children[cID_x].get(tx))
+                    : nullptr;
+          data_t *w_c_c_e =
+              w_c_c ? static_cast<data_t *>(w_c_c->children[cID_w].get(tx))
+                    : nullptr;
+          if ((w_c_c_e && !tx.acquire_aggressive(w_c_c_e)) ||
+              (w_c_c_c && !tx.acquire_aggressive(w_c_c_c)))
+            return false;
+          return true;
+        }
+
+        // If case 1 becomes case 4, we need to do a /cID_x/_rotation on x_p
+        //   w
+        //  /
+        // x_p
+        //  \.
+        //  w_c
+        //  /
+        // w_c_c
+        if (w_c_e && w_c_e->color.get(tx) == RED) {
+          // {w, w_c, w_c_c, x_p} are already acquired, so we're done
+          return true;
+        }
+        // otherwise : case1 becomes case2, propagate, see below
+      }
+      // W's children's colors determine if we need to propagate, so acquire
+      // both
+      else {
+        data_t *w_c = static_cast<data_t *>(w->children[cID_x].get(tx));
+        data_t *w_e = static_cast<data_t *>(w->children[cID_w].get(tx));
+        if ((w_c && !tx.acquire_aggressive(w_c)) ||
+            (w_e && !tx.acquire_aggressive(w_e)))
+          return false;
+        // case 3: we need to do a /cID_w/_rotation on w
+        //  x_p
+        //    \.
+        //     w
+        //    /
+        //   w_c
+        //    \.
+        //    w_c_e
+        // {w, x_p, w_c} are already acquired.  Acquire w_c's cID_w child
+        // case 3 -> case 4, we need to perform a /cID_x/ rotation on x_p
+        //  x_p_p
+        //   |
+        //  x_p
+        //    \.
+        //    w_c
+        //    /
+        //  w_c_c
+        // {w, x_p} are already acquired: acquire w_c's cID_x child, and x_p_p
+        data_t *x_p_p = static_cast<data_t *>(x_p->parent.get(tx));
+        if (x_p_p && !tx.acquire_aggressive(x_p_p))
+          return false;
+
+        if (((w_e && w_e->color.get(tx) == BLACK) || !w_e) && w_c &&
+            w_c->color.get(tx) == RED) {
+          data_t *w_c_c =
+              w_c ? static_cast<data_t *>(w_c->children[cID_x].get(tx))
+                  : nullptr;
+          data_t *w_c_e =
+              w_c ? static_cast<data_t *>(w_c->children[cID_w].get(tx))
+                  : nullptr;
+          if ((w_c_e && !tx.acquire_aggressive(w_c_e)) ||
+              (w_c_c && !tx.acquire_aggressive(w_c_c)))
+            return false;
+          return true;
+        }
+
+        // case 4: w_c and/or w_e is red, so cID_x rotate x_p
+        // {x_p, x_p_p, w, w_c} already acquired, nothing needed
+        if ((w_c && w_c->color.get(tx) == RED) ||
+            (w_e && w_e->color.get(tx) == RED))
+          return true;
+      }
+      // case 2 : propagate the fixup to x's parent.
+      x = static_cast<data_t *>(x_p);
+      x_color = x->color.get(tx);
+    }
+    return true;
+  }
+
+  /// Do all of the color changes and rotations that correspond to x's parent
+  /// being deleted, resulting in x becoming the child of x_p.  This should only
+  /// be called after remove_acquire_aggressive_all has acquired everything that
+  /// this method will modify.  Consequently, this code is identical to the
+  /// sequential code.
+  ///
+  /// @param x     The node that moved up
+  /// @param x_p   The new parent of `x`
+  /// @param cID_x Which child of `x_p` is `x`?
+  /// @param tx    A writing transaction context
+  void remove_fixup(data_t *x, data_t *x_p, DIRS cID_x, WSTEP &tx) {
+    auto x_color = x ? x->color.get(tx) : BLACK;
+    while (x_p != sentinel && x_color == BLACK) {
+      // `w` is the sibling of `x`
+      auto cID_w = cID_x == LEFT ? RIGHT : LEFT;
+      data_t *w = static_cast<data_t *>(x_p->children[cID_w].get(tx));
+      if (w && static_cast<data_t *>(w)->color.get(tx) == RED) {
+        static_cast<data_t *>(w)->color.set(BLACK, tx);
+        static_cast<data_t *>(x_p)->color.set(RED, tx);
+        if (cID_x == LEFT)
+          left_rotate(static_cast<data_t *>(x_p), tx);
+        else
+          right_rotate(static_cast<data_t *>(x_p), tx);
+        w = static_cast<data_t *>(x_p->children[cID_w].get(tx));
+      }
+      // check both children's colors to decide about propagating:
+      data_t *w_c =
+          w ? static_cast<data_t *>(w->children[cID_x].get(tx)) : nullptr;
+      data_t *w_e =
+          w ? static_cast<data_t *>(w->children[cID_w].get(tx)) : nullptr;
+      if ((w_c && w_c->color.get(tx) == RED) ||
+          (w_e && w_e->color.get(tx) == RED)) {
+        if (!w_e || w_e->color.get(tx) == BLACK) {
+          w_c->color.set(BLACK, tx);
+          w->color.set(RED, tx);
+          if (cID_x == LEFT)
+            right_rotate(w, tx);
+          else
+            left_rotate(w, tx);
+          w = static_cast<data_t *>(x_p->children[cID_w].get(tx));
+        }
+        w->color.set(x_p->color.get(tx), tx);
+        x_p->color.set(BLACK, tx);
+        auto w_e = w->children[cID_w].get(tx);
+        static_cast<data_t *>(w_e)->color.set(BLACK, tx);
+        if (cID_x == LEFT)
+          left_rotate(x_p, tx);
+        else
+          right_rotate(x_p, tx);
+        break;
+      } else {
+        w->color.set(RED, tx);
+        x = x_p;
+        x_p = static_cast<data_t *>(x->parent.get(tx));
+        x_color = x->color.get(tx);
+        cID_x = x == x_p->children[LEFT].get(tx) ? LEFT : RIGHT;
+      }
+    }
+    if (x)
+      x->color.set(BLACK, tx);
+  }
+
+  /// Perform a left rotation on `x`, pushing it downward
+  ///
+  /// @param x  The node to rotate downward
+  /// @param tx A writing transaction context
+  void left_rotate(data_t *x, WSTEP &tx) {
+    auto y = static_cast<data_t *>(x->children[RIGHT].get(tx));
+    auto y_l = y->children[LEFT].get(tx);
+    x->children[RIGHT].set(y_l, tx);
+    if (y_l)
+      static_cast<data_t *>(y_l)->parent.set(x, tx);
+
+    auto x_p = x->parent.get(tx);
+    y->parent.set(x_p, tx);
+    if (x_p == sentinel)
+      sentinel->children[LEFT].set(y, tx);
+    else
+      x_p->children[x == x_p->children[LEFT].get(tx) ? LEFT : RIGHT].set(y, tx);
+
+    y->children[LEFT].set(x, tx);
+    x->parent.set(y, tx);
+  }
+
+  /// Perform a right rotation on `y`, pushing it downward
+  ///
+  /// @param y  The node to rotate downward
+  /// @param tx A writing transaction context
+  void right_rotate(data_t *y, WSTEP &tx) {
+    auto x = static_cast<data_t *>(y->children[LEFT].get(tx));
+    auto x_r = x->children[RIGHT].get(tx);
+    y->children[LEFT].set(x_r, tx);
+    if (x_r)
+      static_cast<data_t *>(x_r)->parent.set(y, tx);
+
+    auto y_p = y->parent.get(tx);
+    x->parent.set(y_p, tx);
+    if (y_p == sentinel)
+      sentinel->children[LEFT].set(x, tx);
+    else
+      y_p->children[y == y_p->children[RIGHT].get(tx) ? RIGHT : LEFT].set(x,
+                                                                          tx);
+
+    x->children[RIGHT].set(y, tx);
+    y->parent.set(x, tx);
+  }
+};
diff --git a/artifact/ds/STMCAS/skiplist_cached_opt_omap.h b/artifact/ds/STMCAS/skiplist_cached_opt_omap.h
new file mode 100644
index 0000000..49b7a2f
--- /dev/null
+++ b/artifact/ds/STMCAS/skiplist_cached_opt_omap.h
@@ -0,0 +1,567 @@
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+
+/// An ordered map, implemented as a doubly-linked skip list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// This version of the skiplist is heavily optimized to use the biggest STMCAS
+/// operations it can, while still avoiding aborts.  This means, for example,
+/// trying to stitch as many layers as possible (and to do so via recording old
+/// values).
+///
+/// @param K         The type of the keys stored in this map
+/// @param V         The type of the values stored in this map
+/// @param STMCAS    The STMCAS implementation (PO or PS)
+/// @param dummy_key A fake key, to use in sentinel nodes
+/// @param dummy_val A fake value, to use in sentinel nodes
+template <typename K, typename V, class STMCAS, K dummy_key, V dummy_val>
+class skiplist_cached_opt_omap {
+  using WSTEP = typename STMCAS::WSTEP;
+  using RSTEP = typename STMCAS::RSTEP;
+  using STEP = typename STMCAS::STEP;
+  using ownable_t = typename STMCAS::ownable_t;
+  template <typename T> using FIELD = typename STMCAS::template sField<T>;
+
+  /// data_t is a node in the skip list.  It has a key, a value, an owner, and a
+  /// "tower" of predecessor and successor pointers
+  ///
+  /// NB: Height isn't always the size of tower... it tracks how many levels are
+  ///     fully and correctly stitched, so it changes during insertion and
+  ///     removal.
+  struct data_t : public ownable_t {
+    /// A pair of data pointers, for the successor and predecessor at a level of
+    /// the tower
+    struct level_t {
+      FIELD<K> key;         // Key of the successor
+      FIELD<data_t *> next; // Succ at this level
+    };
+
+    const K key;          // The key stored in this node
+    std::atomic<V> val;   // The value stored in this node
+    const uint8_t height; // # valid tower nodes
+    level_t tower[];      // Tower of pointers to pred/succ
+
+  private:
+    /// Construct a data node.  This is private to force the use of our make_*
+    /// methods, which handle allocating enough space for the tower.
+    ///
+    /// @param _key    The key that is stored in this node
+    /// @param _val    The value that is stored in this node
+    data_t(K _key, V _val, uint8_t _height)
+        : ownable_t(), key(_key), val(_val), height(_height) {}
+
+  public:
+    /// Construct a sentinel (head or tail) node.  Note that the sentinels can't
+    /// easily be of a node type that lacks key and value fields, or else the
+    /// variable-length array would preclude inheriting from it.
+    ///
+    /// @param iHeight  The max number of index layers this node will have
+    static data_t *make_sentinel(uint8_t iHeight) {
+      int node_size = sizeof(data_t) + (iHeight + 1) * sizeof(level_t);
+      void *region = calloc(1, node_size);
+      return new (region) data_t(dummy_key, dummy_val, iHeight);
+    }
+
+    /// Construct a data node
+    ///
+    /// @param iHeight The max number of index layers this node will have
+    /// @param key     The key to store in this node
+    /// @param val     The value to store in this node
+    static data_t *make_data(uint64_t iHeight, K key, V val) {
+      int node_size = sizeof(data_t) + (iHeight + 1) * sizeof(level_t);
+      void *region = calloc(1, node_size);
+      return new (region) data_t(key, val, iHeight);
+    }
+  };
+
+  const int NUM_INDEX_LAYERS; // # of index layers.  Doesn't count data layer
+  data_t *const head;         // The head sentinel
+  data_t *const tail;         // The tail sentinel
+
+public:
+  /// Default construct a skip list by stitching a head sentinel to a tail
+  /// sentinel at each level
+  ///
+  /// @param _op  The operation that is constructing the list
+  /// @param cfg A configuration object that has a `snapshot_freq` field
+  skiplist_cached_opt_omap(STMCAS *_op, auto *cfg)
+      : NUM_INDEX_LAYERS(cfg->max_levels),
+        head(data_t::make_sentinel(NUM_INDEX_LAYERS)),
+        tail(data_t::make_sentinel(NUM_INDEX_LAYERS)) {
+    // NB: Even though the constructor is operating on private data, it needs a
+    //     TM context in order to set the head and tail's towers to each other
+    WSTEP tx(_op);
+    for (auto i = 0; i <= NUM_INDEX_LAYERS; i++) {
+      head->tower[i].key.set(dummy_key, tx);
+      head->tower[i].next.set(tail, tx);
+    }
+  }
+
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) {
+    while (true) {
+      RSTEP tx(me);
+      // Do a leq... if head, we fail.  n will never be null or tail
+      auto n = get_leq(tx, key);
+      if (n == nullptr)
+        continue;
+
+      if (n == head || n->key != key)
+        return false;
+
+      // since we have EBR, `val` can be atomic, making this code quite simple
+
+      // NB: get() doesn't care if the node is owned, just that it's still in
+      //     the skiplist
+      V val_copy = n->val.load(std::memory_order_acquire);
+      // Check after reading value
+      if (tx.check_orec(n) == STMCAS::END_OF_TIME)
+        continue;
+      val = val_copy;
+      return true;
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    data_t *preds[NUM_INDEX_LAYERS];
+    int target_height = randomLevel(me); // The target index height of new_dn
+
+    while (true) {
+      WSTEP tx(me);
+      // Get the insertion point, lock it or retry
+      auto n = get_leq(tx, key, preds, target_height);
+
+      if (n == nullptr)
+        continue;
+
+      // Since we have EBR, we can look at n->key without validation.  If
+      // it matches `key`, return false.
+      if (n != head && n->key == key)
+        return false;
+
+      // Acquire the pred of the to-be-inserted node
+      if (!tx.acquire_consistent(n)) {
+        tx.unwind();
+        continue;
+      }
+      auto next = n->tower[0].next.get(tx);
+
+      // If this is a "short" insert, we can finish quickly
+      if (target_height == 0) {
+        auto new_dn = data_t::make_data(target_height, key, val);
+        new_dn->tower[0].key.set(next->key, tx);
+        new_dn->tower[0].next.set(next, tx);
+        // NB: we don't need to acquire new_dn in this case, because anyone who
+        // finds their way to it will find it fully stitched in.
+        n->tower[0].key.set(key, tx);
+        n->tower[0].next.set(new_dn, tx);
+        return true;
+      }
+
+      // Slow path for when the node is tall, and we have a lot of acquiring to
+      // do
+      if (index_stitch(tx, me, n, next, preds, key, val, target_height))
+        return true;
+      tx.unwind();
+    }
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    data_t *preds[NUM_INDEX_LAYERS];
+
+    while (true) {
+      WSTEP tx(me);
+      // Get predecessor, find its next, if != key return false
+      data_t *n = get_le(tx, key, preds);
+      if (n == nullptr)
+        continue;
+      auto found = n->tower[0].next.get(tx);
+      if (found == nullptr) {
+        tx.unwind();
+        continue;
+      }
+      if (found == tail || found->key != key)
+        return false;
+
+      // Acquire the target, make sure it's not owned
+      if (!tx.acquire_consistent(found)) {
+        tx.unwind();
+        continue;
+      }
+      // Acquire the predecessor so we can edit its next pointer
+      if (!tx.acquire_consistent(n)) {
+        tx.unwind();
+        continue;
+      }
+
+      // Fast-path unstitch when it has height 0
+      if (found->height == 0) {
+        auto nxt = found->tower[0].next.get(tx);
+        n->tower[0].next.set(nxt, tx);
+        n->tower[0].key.set(nxt->key, tx);
+        // NB: don't forget to set `node`'s pointers to null!
+        found->tower[0].next.set(nullptr, tx);
+        tx.reclaim(found);
+        return true;
+      }
+
+      // Slow-path unstitch when it's tall
+      if (index_unstitch(tx, me, found, n, preds))
+        return true;
+      tx.unwind();
+    }
+  }
+
+private:
+  /// get_leq uses the towers to skip from the head sentinel to the node
+  /// with the largest key <= the search key.  It can return the head data
+  /// sentinel, but not the tail sentinel.
+  ///
+  /// There is no atomicity between get_leq and its caller.  It returns the
+  /// node it found, along with the value of the orec for that node at the time
+  /// it was accessed.  The caller needs to validate the orec before using the
+  /// returned node.
+  ///
+  /// get_leq *can* return an OWNED node.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for which we are doing a predecessor query.
+  ///
+  /// @return The data node that was found, and its orec's value
+  __attribute__((noinline)) data_t *get_leq(STEP &tx, const K &key) {
+    // We always start at the head sentinel.  Scan its tower to find the
+    // highest non-tail level
+    data_t *curr = head;
+    int current_level = 0;
+    for (int i = NUM_INDEX_LAYERS; i > 0; --i) {
+      if (head->tower[i].next.get(tx) != tail) {
+        current_level = i;
+        break;
+      }
+    }
+
+    // Traverse over and down through the index layers
+    while (current_level > 0) {
+      // Advance curr by moving forward in this index layer
+      curr = index_leq(tx, key, curr, current_level);
+      if (curr == nullptr)
+        return nullptr;
+      // On a key match, we can exit immediately
+      if (curr->key == key)
+        return curr;
+      --current_level; // Move down a level
+    }
+
+    // Search in the data layer.  Only return if result valid
+    return data_leq(tx, key, curr);
+  }
+
+  /// A version of get_leq that is specialized for insert, where we need to get
+  /// the predecessors at all levels
+  __attribute__((noinline)) data_t *get_leq(WSTEP &tx, const K &key,
+                                            data_t **preds, int target_height) {
+    // We always start at the head sentinel.  Scan its tower to find the
+    // highest non-tail level
+    data_t *curr = head;
+    int current_level = 0;
+    for (int i = NUM_INDEX_LAYERS; i > 0; --i) {
+      if (head->tower[i].next.get(tx) != tail) {
+        current_level = i;
+        break;
+      }
+      if (current_level <= target_height)
+        preds[i - 1] = head;
+    }
+
+    // Traverse over and down through the index layers
+    while (current_level > 0) {
+      // Advance curr by moving forward in this index layer
+      curr = index_leq(tx, key, curr, current_level);
+      if (curr == nullptr)
+        return nullptr;
+      // On a key match, we can exit immediately
+      if (curr->key == key)
+        return curr;
+      // we need to save current node to preds
+      if (current_level <= target_height)
+        preds[current_level - 1] = curr;
+      --current_level; // Move down a level
+    }
+
+    // Search in the data layer.  Only return if result valid
+    return data_leq(tx, key, curr);
+  }
+
+  /// A version of get_le that is specialized for remove, where we need to get
+  /// the predecessors at all levels
+  __attribute__((noinline)) data_t *get_le(WSTEP &tx, const K &key,
+                                           data_t **preds) {
+    // We always start at the head sentinel.  Scan its tower to find the
+    // highest non-tail level
+    data_t *curr = head;
+    int current_level = 0;
+    for (int i = NUM_INDEX_LAYERS; i > 0; --i) {
+      if (head->tower[i].next.get(tx) != tail) {
+        current_level = i;
+        break;
+      }
+      preds[i - 1] = head;
+    }
+
+    // Traverse over and down through the index layers
+    while (current_level > 0) {
+      // Advance curr by moving forward in this index layer
+      curr = index_le(tx, key, curr, current_level);
+      // Deal with index_le failing by returning null
+      if (curr == nullptr)
+        return nullptr;
+      // we need to save current node to preds
+      preds[current_level - 1] = curr;
+      --current_level; // Move down a level
+    }
+
+    // Search in the data layer.  Only return if result valid
+    return data_le(tx, key, curr);
+  }
+
+  /// Traverse forward from `start`, considering only tower level `level`,
+  /// stopping at the largest key <= `key`
+  ///
+  /// This can return nodes that are OWNED.  The caller must check.
+  ///
+  /// @param tx    The enclosing RSTEP_TM operation's descriptor
+  /// @param key   The key for which we are doing a predecessor query.
+  /// @param start The start position of this traversal.
+  /// @param level The tower level to consider
+  ///
+  /// @return The node that was found (possibly `start`).  The caller must
+  ///         validate the node
+  data_t *index_leq(STEP &tx, K key, data_t *start, uint64_t level) {
+    // NB: The consistency argument here is nuanced: keys are immutable. Next
+    //     pointers are never modified during an unstitch.  Thus we can race
+    //     forward, and let the caller validate whatever we find.
+    auto curr = start;
+    while (true) {
+      data_t *next = curr->tower[level].next.get(tx);
+      auto next_key = curr->tower[level].key.get(tx);
+      if (tx.check_orec(curr) == STMCAS::END_OF_TIME)
+        return nullptr;
+      if (next == nullptr)
+        return nullptr;
+      if (next == tail)
+        return curr;
+      if (next_key == key)
+        return next;
+      if (next_key > key)
+        return curr;
+      curr = next;
+    }
+  }
+
+  /// Traverse forward from `start`, considering only tower level `level`,
+  /// stopping at the largest key <= `key`
+  ///
+  /// This can return nodes that are OWNED.  The caller must check.
+  ///
+  /// @param tx    The enclosing RSTEP_TM operation's descriptor
+  /// @param key   The key for which we are doing a predecessor query.
+  /// @param start The start position of this traversal.
+  /// @param level The tower level to consider
+  ///
+  /// @return The node that was found (possibly `start`).  The caller must
+  ///         validate the node
+  data_t *index_le(STEP &tx, K key, data_t *start, uint64_t level) {
+    // NB: The consistency argument here is nuanced: keys are immutable. Next
+    //     pointers are never modified during an unstitch.  Thus we can race
+    //     forward, and let the caller validate whatever we find.
+    auto curr = start;
+    while (true) {
+      data_t *next = curr->tower[level].next.get(tx);
+      auto next_key = curr->tower[level].key.get(tx);
+      if (tx.check_orec(curr) == STMCAS::END_OF_TIME)
+        return nullptr;
+      if (next == nullptr)
+        return nullptr;
+      if (next == tail || next_key >= key)
+        return curr;
+      curr = next;
+    }
+  }
+
+  /// Traverse in the data layer to find the largest node with key <= `key`.
+  ///
+  /// This can return an OWNED node
+  ///
+  /// @param tx    The enclosing RSTEP_TM operation's descriptor
+  /// @param key   The key for which we are doing a predecessor query.
+  /// @param start The start position of this traversal.  This may be the head,
+  ///              or an intermediate point in the list
+  ///
+  /// @return The node that was found (possibly `start`), and its orec value.
+  ///         {nullptr, 0} can be returned on inconsistency
+  data_t *data_leq(STEP &tx, K key, data_t *start) {
+    // Set up the start point for our traversal, then start iterating
+    data_t *curr = start;
+    data_t *next = curr->tower[0].next.get(tx);
+    while (true) {
+      // Case 0: `next` is nullptr: restart
+      if (next == nullptr)
+        return nullptr;
+      // Case 1: `next` is tail --> stop the search at curr
+      if (next == tail)
+        return curr;
+      // Case 2: `next` is a data node: stop if next->key >= key
+      auto nkey = next->key;
+      if (nkey > key)
+        return curr;
+      if (nkey == key)
+        return next;
+      // Case 3: Keep traversing
+      curr = next;
+      next = next->tower[0].next.get(tx);
+    }
+  }
+
+  /// Traverse in the data layer to find the largest node with key <= `key`.
+  ///
+  /// @param tx    The enclosing RSTEP_TM operation's descriptor
+  /// @param key   The key for which we are doing a predecessor query.
+  /// @param start The start position of this traversal.  This may be the head,
+  ///              or an intermediate point in the list
+  ///
+  /// @return The node that was found (possibly `start`), and its orec value.
+  ///         {nullptr, 0} can be returned on inconsistency
+  data_t *data_le(STEP &tx, K key, data_t *start) {
+    // Set up the start point for our traversal, then start iterating
+    data_t *curr = start;
+    data_t *next = curr->tower[0].next.get(tx);
+    while (true) {
+      if (next == nullptr)
+        return nullptr;
+      if (next == tail)
+        return curr;
+      auto nkey = next->key;
+      if (nkey >= key)
+        return curr;
+      curr = next;
+      next = next->tower[0].next.get(tx);
+    }
+  }
+
+  /// Generate a random level for a new node
+  ///
+  /// NB: This code has been verified to produce a nice geometric distribution
+  ///     in constant time per call
+  ///
+  /// @param me The caller's STMCAS operation
+  ///
+  /// @return a random number between 0 and NUM_INDEX_LAYERS, inclusive
+  int randomLevel(STMCAS *me) {
+    // Get a random int between 0 and 0xFFFFFFFF
+    int rr = me->rand();
+    // Add 1 to it, then find the lowest nonzero bit.  This way, we never return
+    // a zero for small integers, and the distribution is correct.
+    int res = __builtin_ffs(rr + 1);
+    // Now take one off of that, so that we return a zero-based integer
+    res -= 1;
+    // But if rr was 0xFFFFFFFF, we've got a problem, so coerce it back
+    // Also, drop it down to within NUM_INDEX_LAYERS
+    return (res < 0 || res > NUM_INDEX_LAYERS) ? NUM_INDEX_LAYERS : res;
+  }
+
+  /// index_stitch is a small atomic operation that stitches a node in at a
+  /// given index level.
+  ///
+  /// @param me      The currently active STMCAS operation
+  /// @param node    The node that was just inserted and stitched into `level`
+  /// @param level   The level below where we're stitching
+  /// @param release Should `node` be marked UNOWNED before returning?
+  bool index_stitch(WSTEP &tx, STMCAS *me, data_t *n, data_t *s, data_t **preds,
+                    const K &key, V &val, int target_height) {
+    // acquire all the levels or fail.  n and s are already acquired
+    for (int level = 0; level < target_height; ++level) {
+      // preds[level] is actually a /level + 1/ height node
+      auto pred = preds[level];
+      if (!tx.acquire_consistent(pred))
+        return false;
+    }
+
+    // `n` is the predecessor to the node we're making, `s` is the successor
+    // Fully initialize new_dn before we make it visible at any level.  This
+    // suffices to avoid acquiring the new node.
+    data_t *new_dn = data_t::make_data(target_height, key, val);
+    for (int level = 0; level < new_dn->height; ++level) {
+      auto succ = preds[level]->tower[level + 1].next.get(tx);
+      new_dn->tower[level + 1].key.set(succ->key, tx);
+      new_dn->tower[level + 1].next.set(succ, tx);
+    }
+    new_dn->tower[0].key.set(s->key, tx);
+    new_dn->tower[0].next.set(s, tx);
+
+    // Make it visible in the data level, then in index levels from bottom up
+    n->tower[0].next.set(new_dn, tx);
+    n->tower[0].key.set(new_dn->key, tx);
+    for (int level = 0; level < new_dn->height; ++level) {
+      preds[level]->tower[level + 1].next.set(new_dn, tx);
+      preds[level]->tower[level + 1].key.set(new_dn->key, tx);
+    }
+    return true;
+  }
+
+  /// Unstitch `node`, starting at its topmost index layer.  Reclaim once it's
+  /// fully unstitched.
+  ///
+  /// @param me     The currently active STMCAS operation
+  /// @param node   The node that we are unstitching
+  bool index_unstitch(WSTEP &tx, STMCAS *me, data_t *node, data_t *prev,
+                      data_t **preds) {
+    // Acquire everything, from bottom to top
+    for (int level = 0; level < node->height; ++level)
+      if (!tx.acquire_consistent(preds[level]))
+        return false;
+
+    // Now update all the pointers, from top to bottom
+    for (int level = node->height; level >= 0; --level) {
+      auto pre = (level > 0) ? preds[level - 1] : prev;
+      auto nxt = node->tower[level].next.get(tx);
+      pre->tower[level].key.set(nxt->key, tx);
+      pre->tower[level].next.set(nxt, tx);
+    }
+
+    // NB: don't forget to set `node`'s pointers to null!
+    for (int level = node->height; level >= 0; --level)
+      node->tower[level].next.set(nullptr, tx);
+    // Reclaim it and we're done
+    tx.reclaim(node);
+    return true;
+  }
+};
diff --git a/artifact/ds/STMCAS/slist_omap.h b/artifact/ds/STMCAS/slist_omap.h
new file mode 100644
index 0000000..e5fc922
--- /dev/null
+++ b/artifact/ds/STMCAS/slist_omap.h
@@ -0,0 +1,328 @@
+#pragma once
+
+#include <atomic>
+#include <type_traits>
+
+/// An ordered map, implemented as a singly-linked list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// Note that the AVOID_OREC_CHECKS flag can be used to create an "optimized"
+/// version of this data structure, where list traversal (get_leq) avoids
+/// checking orecs in most cases.
+///
+/// @param K                 The type of the keys stored in this map
+/// @param V                 The type of the values stored in this map
+/// @param STMCAS            The STMCAS implementation (PO or PS)
+/// @param AVOID_OREC_CHECKS A flag to enable an optimization that avoids
+///                          checking orecs when get_leq is doing its read-only
+///                          traversal
+template <typename K, typename V, class STMCAS, bool AVOID_OREC_CHECKS>
+class slist_omap {
+  using WSTEP = typename STMCAS::WSTEP;
+  using RSTEP = typename STMCAS::RSTEP;
+  using snapshot_t = typename STMCAS::snapshot_t;
+  using ownable_t = typename STMCAS::ownable_t;
+  template <typename T> using FIELD = typename STMCAS::template sField<T>;
+
+  /// A list node.  It has a next pointer, but no key or value.  It's useful for
+  /// sentinels, so that K and V don't have to be default constructable.
+  struct node_t : ownable_t {
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : ownable_t(), next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const, and
+  /// values are only accessed while the node is locked, so neither is a
+  /// tm_field.
+  struct data_t : public node_t {
+    const K key; // The key of this key/value pair
+    V val;       // The value of this key/value pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key         The key that is stored in this node
+    /// @param _val         The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+  };
+
+  /// The pair returned by predecessor queries: a node and it's observed version
+  struct leq_t {
+    node_t *_obj = nullptr; // The object
+    uint64_t _ver = 0;      // The observed version of the object
+  };
+
+  node_t *const head; // The list head pointer
+  node_t *const tail; // The list tail pointer
+
+  /// During get_leq, we have a way to periodically capture snapshots, so that a
+  /// failed search can resume from an intermediate point.  This specifies how
+  /// frequently to take a snapshot (higher is less frequent, i.e., once per
+  /// SNAPSHOT_FREQUENCY nodes).
+  const int SNAPSHOT_FREQUENCY;
+
+public:
+  /// Default construct a list by constructing and connecting two sentinel nodes
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object that has a `snapshot_freq` field
+  slist_omap(STMCAS *me, auto *cfg)
+      : head(new node_t()), tail(new node_t()),
+        SNAPSHOT_FREQUENCY(cfg->snapshot_freq) {
+    // NB: Even though this code can't abort and doesn't acquire orecs, we still
+    //     need to use a transaction (WSTEP), because we can't set fields of a
+    //     node_t without a legal WSTEP context.  We can cheat, though, and not
+    //     bother to acquire orecs, because we know nothing is shared.
+    WSTEP tx(me);
+    head->next.set(tail, tx);
+  }
+
+private:
+  /// Convert a snapshot_t into a leq_t
+  leq_t leq(const snapshot_t &s) { return leq_t{(node_t *)s._obj, s._ver}; }
+
+  /// Convert a leq_t into a snapshot_t
+  snapshot_t snapshot(const leq_t &l) { return snapshot_t{l._obj, l._ver}; }
+
+  /// get_leq is an inclusive predecessor query that returns the largest node
+  /// whose key is <= the provided key.  It can return the head sentinel, but
+  /// not the tail sentinel.
+  ///
+  /// There is no atomicity between get_leq and its caller.  It returns the node
+  /// it found, along with the value of the orec for that node at the time it
+  /// was accessed.  The caller needs to validate the orec before using the
+  /// returned node.
+  ///
+  /// @param me      The calling thread's descriptor
+  /// @param key     The key for which we are doing a predecessor query.
+  /// @param lt_mode When `true`, this behaves as `get_lt`.  When `false`, it
+  ///                behaves as `get_leq`.
+  ///
+  /// @return The node that was found, and its orec value
+  leq_t get_leq(STMCAS *me, const K key, bool lt_mode = false) {
+    // Start a transactional traversal from the head node, or from the latest
+    // valid snapshot, if we have one. If a transaction encounters an
+    // inconsistency, it will come back to here to start a new traversal.
+    while (true) {
+      RSTEP tx(me);
+
+      // Figure out where to start this traversal: initially we start at head,
+      // but on a retry, we might have a snapshot.
+      //
+      // NB: snapshots are always < key
+      leq_t curr =
+          (me->snapshots.empty()) ? leq_t{head, 0} : leq(me->snapshots.top());
+
+      // Validate the start point
+      if (curr._obj == head) {
+        // For head, be sure to save curr._ver in case we end up returning head
+        if ((curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+          continue;
+      } else {
+        // Validate snapshot as a continuation.  Drop the snapshot on failure.
+        if (!tx.check_continuation(curr._obj, curr._ver)) {
+          me->snapshots.drop();
+          continue;
+        }
+      }
+
+      // Prepare a countdown timer for snapshots
+      int nodes_until_snapshot = SNAPSHOT_FREQUENCY;
+
+      // Starting at `next`, search for key.  Breaking out of this will take us
+      // back to the top of the function.
+      while (true) {
+        // Read the next node, fail if we can't do it consistently
+        auto *next = curr._obj->next.get(tx);
+        uint64_t next_ver = 0;
+        if (!AVOID_OREC_CHECKS) {
+          next_ver = tx.check_orec(next);
+          if (next_ver == STMCAS::END_OF_TIME)
+            break;
+        }
+
+        // Stop if next's key is too big or next is tail
+        if (next == tail) {
+          if (AVOID_OREC_CHECKS &&
+              (curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+            break;
+          return curr;
+        }
+        data_t *dn = static_cast<data_t *>(next);
+        if (lt_mode ? dn->key >= key : dn->key > key) {
+          if (AVOID_OREC_CHECKS &&
+              (curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+            break;
+          return curr;
+        }
+
+        // Stop if `next` is the match we were hoping for
+        if (dn->key == key) {
+          if (AVOID_OREC_CHECKS) {
+            next_ver = tx.check_orec(next);
+            if (next_ver == STMCAS::END_OF_TIME)
+              break;
+          }
+          return {next, next_ver};
+        }
+
+        // Keep traversing to `next`.  Maybe take a snapshot first
+        if (--nodes_until_snapshot == 0) {
+          // if (AVOID_OREC_CHECKS &&
+          //     (curr._ver = tx.check_orec(curr._obj)) == STMCAS::END_OF_TIME)
+          //   break;
+          // me->snapshots.push_back(snapshot(curr));
+          if (AVOID_OREC_CHECKS) {
+            if ((curr._ver = tx.check_orec(curr._obj)) != STMCAS::END_OF_TIME)
+              me->snapshots.push_back(snapshot(curr));
+          } else
+            me->snapshots.push_back(snapshot(curr));
+          nodes_until_snapshot = SNAPSHOT_FREQUENCY;
+        }
+        curr._obj = next;
+        if (!AVOID_OREC_CHECKS)
+          curr._ver = next_ver;
+      }
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) {
+    // If we can't use the result of get_leq, we'll loop back, and the next
+    // get_leq will start from a snapshot
+    me->snapshots.clear();
+    while (true) {
+      // get_leq will use a read-only transaction to find the largest node with
+      // a key <= `key`.
+      //
+      // Postconditions of get_leq: n != null, n != tail, we have a valid
+      // node/version pair, and n.key <= `key`
+      auto n = get_leq(me, key);
+
+      // Since we have EBR, we can read n.key without validating and fast-fail
+      // on key-not-found
+      if (n._obj == head || static_cast<data_t *>(n._obj)->key != key)
+        return false;
+
+      // Use a hand-over-hand TM pattern to finish the get().  If the value is
+      // scalar, we can cast it to atomic, read it, and validate.  Otherwise we
+      // need to lock the node.
+      if (std::is_scalar<V>::value) {
+        RSTEP tx(me);
+
+        // NB: given EBR, we don't need to worry about n._obj being deleted, so
+        //     we don't need to validate before looking at the value
+        data_t *dn = static_cast<data_t *>(n._obj);
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (!tx.check_continuation(n._obj, n._ver))
+          continue;
+        val = val_copy;
+        return true;
+      } else {
+        WSTEP tx(me);
+
+        // If this acquire continuation succeeds, it's not deleted, it's a data
+        // node, and it's valid.  If it fails, we need to restart
+        if (!tx.acquire_continuation(n._obj, n._ver)) {
+          tx.unwind(); // not strictly needed, but a good habit :)
+          continue;
+        }
+
+        // NB: we aren't changing val, so we can unwind when we're done with it
+        val = static_cast<data_t *>(n._obj)->val;
+        tx.unwind();
+        return true;
+      }
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    // NB: The pattern here is similar to `get`
+    me->snapshots.clear();
+    while (true) {
+      auto n = get_leq(me, key);
+
+      // Since we have EBR, we can look at n._obj->key without validation.  If
+      // it matches `key`, return false.
+      if (n._obj != head && static_cast<data_t *>(n._obj)->key == key)
+        return false;
+
+      // either n._obj is `head`, or it's a key that's too small.  Let's insert!
+      WSTEP tx(me);
+      // lock n, fail if we can't get it
+      if (!tx.acquire_continuation(n._obj, n._ver)) {
+        tx.unwind();
+        continue;
+      }
+
+      // stitch in a new node
+      data_t *new_dn = new data_t(key, val);
+      new_dn->next.set(n._obj->next.get(tx), tx);
+      n._obj->next.set(new_dn, tx);
+      return true;
+    }
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    // NB: The pattern here is similar to `get`
+    me->snapshots.clear();
+    while (true) {
+      // NB: this will be a lt query, not a leq query
+      auto prev = get_leq(me, key, true);
+
+      WSTEP tx(me);
+      // lock the predecessor, read its next
+      if (!tx.acquire_continuation(prev._obj, prev._ver)) {
+        tx.unwind();
+        continue;
+      }
+      auto curr = prev._obj->next.get(tx);
+
+      // if curr doesn't have a matching key, fail
+      if (curr == tail || static_cast<data_t *>(curr)->key != key) {
+        tx.unwind();
+        return false;
+      }
+
+      // lock the node to remove, then unstitch it
+      if (!tx.acquire_aggressive(curr)) {
+        tx.unwind();
+        continue;
+      }
+      auto next = curr->next.get(tx);
+      prev._obj->next.set(next, tx);
+      tx.reclaim(curr);
+      return true;
+    }
+  }
+};
diff --git a/artifact/ds/baseline/ext_ticket_bst/plaf.h b/artifact/ds/baseline/ext_ticket_bst/plaf.h
new file mode 100644
index 0000000..c0a6744
--- /dev/null
+++ b/artifact/ds/baseline/ext_ticket_bst/plaf.h
@@ -0,0 +1,41 @@
+/**
+ * C++ record manager implementation (PODC 2015) by Trevor Brown.
+ *
+ * Copyright (C) 2015 Trevor Brown
+ *
+ */
+
+#ifndef MACHINECONSTANTS_H
+#define MACHINECONSTANTS_H
+
+#ifndef MAX_THREADS_POW2
+#define MAX_THREADS_POW2                                                       \
+  128 // MUST BE A POWER OF TWO, since this is used for some bitwise operations
+#endif
+#ifndef LOGICAL_PROCESSORS
+#define LOGICAL_PROCESSORS MAX_THREADS_POW2
+#endif
+
+#ifndef SOFTWARE_BARRIER
+#define SOFTWARE_BARRIER asm volatile("" : : : "memory")
+#endif
+
+// the following definition is only used to pad data to avoid false sharing.
+// although the number of words per cache line is actually 8, we inflate this
+// figure to counteract the effects of prefetching multiple adjacent cache
+// lines.
+#define PREFETCH_SIZE_WORDS 16
+#define PREFETCH_SIZE_BYTES 128
+#define BYTES_IN_CACHE_LINE 64
+
+#define CAT2(x, y) x##y
+#define CAT(x, y) CAT2(x, y)
+
+#define PAD64 volatile char CAT(___padding, __COUNTER__)[64]
+#define PAD volatile char CAT(___padding, __COUNTER__)[128]
+
+#define CASB __sync_bool_compare_and_swap
+#define CASV __sync_val_compare_and_swap
+#define FAA __sync_fetch_and_add
+
+#endif /* MACHINECONSTANTS_H */
diff --git a/artifact/ds/baseline/ext_ticket_bst/ticket_impl.h b/artifact/ds/baseline/ext_ticket_bst/ticket_impl.h
new file mode 100644
index 0000000..44fb5e9
--- /dev/null
+++ b/artifact/ds/baseline/ext_ticket_bst/ticket_impl.h
@@ -0,0 +1,379 @@
+/*
+ *   File: bst_tk.c
+ *   Author: Vasileios Trigonakis <vasileios.trigonakis@epfl.ch>
+ *   Description: Asynchronized Concurrency: The Secret to Scaling Concurrent
+ *    Search Data Structures, Tudor David, Rachid Guerraoui, Vasileios
+ *Trigonakis, ASPLOS '15 bst_tk.c is part of ASCYLIB
+ *
+ * Copyright (c) 2014 Vasileios Trigonakis <vasileios.trigonakis@epfl.ch>,
+ *                   Tudor David <tudor.david@epfl.ch>
+ *                   Distributed Programming Lab (LPD), EPFL
+ *
+ * ASCYLIB is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2
+ * of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/*
+ * File:   ticket.h
+ * Author: Trevor Brown
+ *
+ * Substantial improvements to interface, memory reclamation and bug fixing.
+ *
+ * Created on June 7, 2017, 1:38 PM
+ */
+
+#pragma once
+
+#include "plaf.h"
+#include <climits>
+#include <cstddef>
+#include <stdint.h>
+#define likely(x) __builtin_expect((x), 1)
+#define unlikely(x) __builtin_expect((x), 0)
+
+#if !defined(COMPILER_BARRIER)
+#define COMPILER_BARRIER asm volatile("" ::: "memory")
+#endif
+
+typedef union tl32 {
+  struct {
+    volatile uint16_t version;
+    volatile uint16_t ticket;
+  };
+  volatile uint32_t to_uint32;
+} tl32_t;
+
+typedef union tl {
+  tl32_t lr[2];
+  uint64_t to_uint64;
+} tl_t;
+
+static inline int tl_trylock_version(volatile tl_t *tl, volatile tl_t *tl_old,
+                                     int right) {
+  uint16_t version = tl_old->lr[right].version;
+  // uint16_t one = (uint16_t) 1;
+  if (unlikely(version != tl_old->lr[right].ticket)) {
+    return 0;
+  }
+
+#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 6
+  tl32_t tlo = {
+      {.version = version,
+       .ticket = version}}; //{ .version = version, .ticket = version};
+  tl32_t tln = {
+      {.version = version,
+       .ticket =
+           (uint16_t)(version +
+                      one)}}; //{.version = version, .ticket = (version + 1)};
+  return CASV(&tl->lr[right].to_uint32, tlo.to_uint32, tln.to_uint32) ==
+         tlo.to_uint32;
+#else
+  tl32_t tlo = {{version, version}};
+  tl32_t tln = {{version, (uint16_t)(version + 1)}};
+#endif
+  return CASV(&tl->lr[right].to_uint32, tlo.to_uint32, tln.to_uint32) ==
+         tlo.to_uint32;
+}
+
+#define TLN_REMOVED 0x0000FFFF0000FFFF0000LL
+
+static inline int tl_trylock_version_both(volatile tl_t *tl,
+                                          volatile tl_t *tl_old) {
+  uint16_t v0 = tl_old->lr[0].version;
+  uint16_t v1 = tl_old->lr[1].version;
+  if (unlikely(v0 != tl_old->lr[0].ticket || v1 != tl_old->lr[1].ticket)) {
+    return 0;
+  }
+
+#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 6
+  tl_t tlo = {.to_uint64 = tl_old->to_uint64};
+  return CASV(&tl->to_uint64, tlo.to_uint64, TLN_REMOVED) == tlo.to_uint64;
+#else
+  /* tl_t tlo; */
+  /* tlo.uint64_t = tl_old->to_uint64; */
+  uint64_t tlo = *(uint64_t *)tl_old;
+
+  return CASV((uint64_t *)tl, tlo, TLN_REMOVED) == tlo;
+#endif
+}
+
+static inline void tl_unlock(volatile tl_t *tl, int right) {
+  /* PREFETCHW(tl); */
+  COMPILER_BARRIER;
+  tl->lr[right].version = tl->lr[right].version + 1;
+  COMPILER_BARRIER;
+}
+
+static inline void tl_revert(volatile tl_t *tl, int right) {
+  /* PREFETCHW(tl); */
+  COMPILER_BARRIER;
+  tl->lr[right].ticket = tl->lr[right].ticket - 1;
+  COMPILER_BARRIER;
+}
+
+template <typename skey_t, typename sval_t, class DESCRIPTOR>
+struct node_t : DESCRIPTOR::reclaimable_t {
+  skey_t key;
+  sval_t val;
+  struct node_t<skey_t, sval_t, DESCRIPTOR> *volatile left;
+  struct node_t<skey_t, sval_t, DESCRIPTOR> *volatile right;
+  volatile tl_t lock;
+#ifdef USE_PADDING
+  char pad[PAD_SIZE];
+#endif
+};
+
+template <typename skey_t, typename sval_t, class DESCRIPTOR, skey_t _KEY_MIN,
+          skey_t _KEY_MAX, sval_t _VAL_RESERVED>
+class ticket {
+private:
+  PAD;
+  node_t<skey_t, sval_t, DESCRIPTOR> *root;
+  PAD;
+  const skey_t KEY_MIN;
+  const skey_t KEY_MAX;
+  const sval_t NO_VALUE;
+  PAD;
+  int init[MAX_THREADS_POW2] = {
+      0,
+  };
+  PAD;
+
+  node_t<skey_t, sval_t, DESCRIPTOR> *
+  new_node(skey_t key, sval_t val, node_t<skey_t, sval_t, DESCRIPTOR> *l,
+           node_t<skey_t, sval_t, DESCRIPTOR> *r);
+  node_t<skey_t, sval_t, DESCRIPTOR> *new_node_no_init();
+
+public:
+  ticket(DESCRIPTOR *me, auto *cfg)
+      : KEY_MIN(_KEY_MIN), KEY_MAX(_KEY_MAX), NO_VALUE(_VAL_RESERVED) {
+    node_t<skey_t, sval_t, DESCRIPTOR> *_min =
+        new_node(KEY_MIN, NO_VALUE, NULL, NULL);
+    node_t<skey_t, sval_t, DESCRIPTOR> *_max =
+        new_node(KEY_MAX, NO_VALUE, NULL, NULL);
+    root = new_node(KEY_MAX, NO_VALUE, _min, _max);
+  }
+
+  ~ticket() {}
+
+  node_t<skey_t, sval_t, DESCRIPTOR> *getRoot() { return root; }
+
+  sval_t get_internal(DESCRIPTOR *me, skey_t &key, sval_t &val);
+  sval_t insert_internal(DESCRIPTOR *me, skey_t &key, sval_t &val);
+  sval_t remove_internal(DESCRIPTOR *me, skey_t &key);
+
+  node_t<skey_t, sval_t, DESCRIPTOR> *get_root() { return root; }
+
+  bool get(DESCRIPTOR *me, const skey_t &key, sval_t &val) {
+    skey_t k = key;
+    sval_t v = get_internal(me, k, val);
+    if (v == NO_VALUE)
+      return false;
+    val = v;
+    return true;
+  }
+
+  bool insert(DESCRIPTOR *me, const skey_t &key, sval_t &val) {
+    sval_t v = val;
+    skey_t k = key;
+    return NO_VALUE == insert_internal(me, k, v);
+  }
+
+  bool remove(DESCRIPTOR *me, const skey_t &key) {
+    sval_t k = key;
+    return NO_VALUE != remove_internal(me, k);
+  }
+};
+
+template <typename skey_t, typename sval_t, class D, skey_t KMN, skey_t KMX,
+          sval_t VR>
+node_t<skey_t, sval_t, D> *ticket<skey_t, sval_t, D, KMN, KMX, VR>::new_node(
+    skey_t key, sval_t val, node_t<skey_t, sval_t, D> *l,
+    node_t<skey_t, sval_t, D> *r) {
+  auto node = new_node_no_init();
+  node->val = val;
+  node->key = key;
+  node->left = l;
+  node->right = r;
+  return node;
+}
+
+template <typename skey_t, typename sval_t, class D, skey_t KMN, skey_t KMX,
+          sval_t VR>
+node_t<skey_t, sval_t, D> *
+ticket<skey_t, sval_t, D, KMN, KMX, VR>::new_node_no_init() {
+  auto node = new node_t<skey_t, sval_t, D>();
+  if (unlikely(node == NULL)) {
+    // perror("malloc @ new_node");
+    // exit(1);
+  }
+  node->lock.to_uint64 = 0;
+  node->val = NO_VALUE;
+  return node;
+}
+
+template <typename skey_t, typename sval_t, class D, skey_t KMN, skey_t KMX,
+          sval_t VR>
+sval_t ticket<skey_t, sval_t, D, KMN, KMX, VR>::get_internal(D *me, skey_t &key,
+                                                             sval_t &val) {
+  node_t<skey_t, sval_t, D> *curr = root;
+
+  while (likely(curr->left != NULL)) {
+    if (key < curr->key) {
+      curr = curr->left;
+    } else {
+      curr = curr->right;
+    }
+  }
+
+  if (curr->key == key) {
+    return curr->val;
+  }
+
+  return NO_VALUE;
+}
+
+template <typename skey_t, typename sval_t, class D, skey_t KMN, skey_t KMX,
+          sval_t VR>
+sval_t ticket<skey_t, sval_t, D, KMN, KMX, VR>::insert_internal(D *me,
+                                                                skey_t &key,
+                                                                sval_t &val) {
+  node_t<skey_t, sval_t, D> *curr;
+  node_t<skey_t, sval_t, D> *pred = NULL;
+  volatile uint64_t curr_ver = 0;
+  uint64_t pred_ver = 0, right = 0;
+
+retry : { // reclamation guarded section
+  curr = root;
+  do {
+    curr_ver = curr->lock.to_uint64;
+
+    pred = curr;
+    pred_ver = curr_ver;
+
+    if (key < curr->key) {
+      right = 0;
+      curr = curr->left;
+    } else {
+      right = 1;
+      curr = curr->right;
+    }
+  } while (likely(curr->left != NULL));
+
+  if (curr->key == key) {
+    // insert if absent
+    return curr->val;
+  }
+
+  //        node_t<skey_t, sval_t>* nn_leaked = new_node(tid, key, val, NULL,
+  //        NULL);
+  node_t<skey_t, sval_t, D> *nn = new_node(key, val, NULL, NULL);
+  node_t<skey_t, sval_t, D> *nr = new_node_no_init();
+
+  if ((!tl_trylock_version(&pred->lock, (volatile tl_t *)&pred_ver, right))) {
+    // recmgr->deallocate(tid, nn);
+    // recmgr->deallocate(tid, nr);
+    goto retry;
+  }
+
+  if (key < curr->key) {
+    nr->key = curr->key;
+    nr->left = nn;
+    nr->right = curr;
+  } else {
+    nr->key = key;
+    nr->left = curr;
+    nr->right = nn;
+  }
+
+  if (right) {
+    pred->right = nr;
+  } else {
+    pred->left = nr;
+  }
+
+  tl_unlock(&pred->lock, right);
+
+  return NO_VALUE;
+}
+}
+
+template <typename skey_t, typename sval_t, class D, skey_t KMN, skey_t KMX,
+          sval_t VR>
+sval_t ticket<skey_t, sval_t, D, KMN, KMX, VR>::remove_internal(D *me,
+                                                                skey_t &key) {
+  node_t<skey_t, sval_t, D> *curr;
+  node_t<skey_t, sval_t, D> *pred = NULL;
+  node_t<skey_t, sval_t, D> *ppred = NULL;
+  volatile uint64_t curr_ver = 0;
+  uint64_t pred_ver = 0, ppred_ver = 0, right = 0, pright = 0;
+
+retry :
+
+{ // reclamation guarded section
+  curr = root;
+
+  do {
+    curr_ver = curr->lock.to_uint64;
+
+    ppred = pred;
+    ppred_ver = pred_ver;
+    pright = right;
+
+    pred = curr;
+    pred_ver = curr_ver;
+
+    if (key < curr->key) {
+      right = 0;
+      curr = curr->left;
+    } else {
+      right = 1;
+      curr = curr->right;
+    }
+  } while (likely(curr->left != NULL));
+
+  if (curr->key != key) {
+    return NO_VALUE;
+  }
+
+  if ((!tl_trylock_version(&ppred->lock, (volatile tl_t *)&ppred_ver,
+                           pright))) {
+    goto retry;
+  }
+
+  if ((!tl_trylock_version_both(&pred->lock, (volatile tl_t *)&pred_ver))) {
+    tl_revert(&ppred->lock, pright);
+    goto retry;
+  }
+
+  if (pright) {
+    if (right) {
+      ppred->right = pred->left;
+    } else {
+      ppred->right = pred->right;
+    }
+
+  } else {
+    if (right) {
+      ppred->left = pred->left;
+    } else {
+      ppred->left = pred->right;
+    }
+  }
+
+  tl_unlock(&ppred->lock, pright);
+
+  me->reclaim(curr); // recmgr->retire(tid, curr);
+  me->reclaim(pred); // recmgr->retire(tid, pred);
+
+  return curr->val;
+}
+}
diff --git a/artifact/ds/baseline/int_bst_pathcas/casword.h b/artifact/ds/baseline/int_bst_pathcas/casword.h
new file mode 100644
index 0000000..bdac6dd
--- /dev/null
+++ b/artifact/ds/baseline/int_bst_pathcas/casword.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "kcas.h"
+#include <cassert>
+#include <cstring>
+#include <sstream>
+#include <stdint.h>
+using namespace std;
+
+// #define casword_t uintptr_t
+#define SHIFT_BITS 2
+#define CASWORD_CAST(x) ((CASWORD_BITS_TYPE)(x))
+
+template <typename T> casword<T>::casword() {
+  T a = 0;
+  bits = CASWORD_CAST(a);
+}
+
+template <typename T> inline T casword<T>::setInitVal(T other) {
+  if (is_pointer<T>::value) {
+    bits = CASWORD_CAST(other);
+  } else {
+    bits = CASWORD_CAST(other);
+    assert((bits & 0xE000000000000000) == 0);
+    bits = bits << SHIFT_BITS;
+  }
+
+  return other;
+}
+
+template <typename T> inline casword<T>::operator T() {
+  if (is_pointer<T>::value) {
+    return (T)kcas::instance.readPtr(&bits);
+  } else {
+    return (T)kcas::instance.readVal(&bits);
+  }
+}
+
+template <typename T> inline T casword<T>::operator->() {
+  assert(is_pointer<T>::value);
+  return *this;
+}
+
+template <typename T> inline T casword<T>::getValue() {
+  if (is_pointer<T>::value) {
+    return (T)kcas::instance.readPtr(&bits);
+  } else {
+    return (T)kcas::instance.readVal(&bits);
+  }
+}
+
+template <typename T> inline casword_t casword<T>::getValueUnsafe(bool &isPtr) {
+  isPtr = is_pointer<T>::value;
+  return bits;
+}
+
+template <typename T>
+inline void casword<T>::addToDescriptor(T oldVal, T newVal) {
+  auto descriptor = kcas::instance.getDescriptor();
+  auto c_oldVal = (casword_t)oldVal;
+  auto c_newVal = (casword_t)newVal;
+  assert(((c_oldVal & 0xE000000000000000) == 0) &&
+         ((c_newVal & 0xE000000000000000) == 0));
+
+  if (is_pointer<T>::value) {
+    descriptor->addPtrAddr(&bits, c_oldVal, c_newVal);
+  } else {
+    descriptor->addValAddr(&bits, c_oldVal, c_newVal);
+  }
+}
diff --git a/artifact/ds/baseline/int_bst_pathcas/internal_kcas_avl.h b/artifact/ds/baseline/int_bst_pathcas/internal_kcas_avl.h
new file mode 100644
index 0000000..0cece1c
--- /dev/null
+++ b/artifact/ds/baseline/int_bst_pathcas/internal_kcas_avl.h
@@ -0,0 +1,1251 @@
+#pragma once
+
+#include <cassert>
+
+#ifndef KCAS_TYPE
+#define KCAS_HTM
+#define KCAS_TYPE "KCAS_HTM"
+#endif
+
+#define MAX_KCAS 21
+#include "kcas.h"
+
+#include <ctime>
+#include <fstream>
+#include <immintrin.h>
+#include <iomanip>
+#include <iostream>
+#include <unordered_set>
+
+using namespace std;
+
+#define MAX_THREADS 200
+#define MAX_PATH_SIZE 16384
+#define PADDING_BYTES 128
+
+#define IS_MARKED(word) (word & 0x1)
+
+template <typename K, typename V, class RecordManager>
+struct Node : RecordManager::reclaimable_t {
+  casword<K> key;
+  casword<casword_t> vNumMark;
+  casword<Node<K, V, RecordManager> *> left;
+  casword<Node<K, V, RecordManager> *> right;
+  casword<Node<K, V, RecordManager> *> parent;
+  casword<int> height;
+  casword<V> value;
+};
+
+enum RetCode : int {
+  RETRY = 0,
+  UNNECCESSARY = 0,
+  FAILURE = -1,
+  SUCCESS = 1,
+  SUCCESS_WITH_HEIGHT_UPDATE = 2
+};
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+class InternalKCAS {
+private:
+  /*
+   * ObservedNode acts as a Node-VersionNumber pair to track an "observed
+   * version number" of a given node. We can then be sure that a version number
+   * does not change after we have read it by comparing the current version
+   * number to this saved value NOTE: This is a thread-private structure, no
+   * fields need to be volatile
+   */
+  struct ObservedNode {
+    ObservedNode() {}
+    Node<K, V, RecordManager> *node = NULL;
+    casword_t oVNumMark = -1;
+  };
+
+  struct PathContainer {
+    ObservedNode path[MAX_PATH_SIZE];
+    volatile char padding[PADDING_BYTES];
+  };
+
+  volatile char padding0[PADDING_BYTES];
+  // Debugging, used to validate that no thread's parent can't be NULL, save for
+  // the root
+  bool init = false;
+  const int numThreads;
+  const int minKey;
+  const long long maxKey;
+  volatile char padding4[PADDING_BYTES];
+  Node<K, V, RecordManager> *root;
+  volatile char padding5[PADDING_BYTES];
+  RecordManager *const recmgr;
+  volatile char padding7[PADDING_BYTES];
+  PathContainer paths[MAX_THREADS];
+  volatile char padding8[PADDING_BYTES];
+
+public:
+  InternalKCAS(RecordManager *me, auto *cfg);
+
+  ~InternalKCAS();
+
+  bool contains(const int tid, const K &key);
+
+  V insertIfAbsent(const int tid, const K &key, const V &value);
+
+  V erase(const int tid, const K &key);
+
+  bool validate();
+
+  void printDebuggingDetails();
+
+  Node<K, V, RecordManager> *getRoot();
+
+  void initThread(const int tid);
+
+  void deinitThread(const int tid);
+
+  int getHeight(Node<K, V, RecordManager> *node);
+
+  RecordManager *const debugGetRecMgr() { return recmgr; }
+
+  // These are the functions that the benchmark will call
+  bool get(RecordManager *me, const K &key, V &val) {
+    return contains(me->tid, key);
+  }
+
+  bool insert(RecordManager *me, const K &key, V &val) {
+    return 0 == insertIfAbsent(me->tid, key, val);
+  }
+
+  bool remove(RecordManager *me, const K &key) {
+    return 0 != erase(me->tid, key);
+  }
+
+private:
+  Node<K, V, RecordManager> *
+  createNode(const int tid, Node<K, V, RecordManager> *parent, K key, V value);
+
+  void freeSubtree(const int tid, Node<K, V, RecordManager> *node);
+
+  long validateSubtree(Node<K, V, RecordManager> *node, long smaller,
+                       long larger, std::unordered_set<casword_t> &keys,
+                       ofstream &graph, ofstream &log, bool &errorFound);
+
+  int internalErase(const int tid, ObservedNode &parentObserved,
+                    ObservedNode &nodeObserved, const K &key);
+
+  int internalInsert(const int tid, ObservedNode &parentObserved, const K &key,
+                     const V &value);
+
+  int countChildren(const int tid, Node<K, V, RecordManager> *node);
+
+  int getSuccessor(const int tid, Node<K, V, RecordManager> *node,
+                   ObservedNode &succObserved, const K &key);
+
+  bool validatePath(const int tid, const int &size, const K &key,
+                    ObservedNode path[]);
+
+  int search(const int tid, ObservedNode &parentObserved,
+             ObservedNode &nodeObserved, const K &key);
+
+  int rotateRight(const int tid, ObservedNode &parentObserved,
+                  ObservedNode &nodeObserved, ObservedNode &leftChildObserved);
+
+  int rotateLeft(const int tid, ObservedNode &parentObserved,
+                 ObservedNode &nodeObserved, ObservedNode &rightChildObserved);
+
+  int rotateLeftRight(const int tid, ObservedNode &parentObserved,
+                      ObservedNode &nodeObserved,
+                      ObservedNode &leftChildObserved,
+                      ObservedNode &leftRigthChildObserved);
+
+  int rotateRightLeft(const int tid, ObservedNode &parentObserved,
+                      ObservedNode &nodeObserved,
+                      ObservedNode &rightChildObserved,
+                      ObservedNode &rightLeftChildObserved);
+
+  void fixHeightAndRebalance(const int tid, Node<K, V, RecordManager> *node);
+
+  int fixHeight(const int tid, ObservedNode &observedNode);
+};
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+Node<K, V, RecordManager> *
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::createNode(
+    const int tid, Node<K, V, RecordManager> *parent, K key, V value) {
+  Node<K, V, RecordManager> *node = new Node<K, V, RecordManager>();
+  // No node, save for root, should have a NULL parent
+  //  assert(!init || parent->key < maxKey);
+  node->key.setInitVal(key);
+  node->value.setInitVal(value);
+  node->parent.setInitVal(parent);
+  node->vNumMark.setInitVal(0);
+  node->left.setInitVal(NULL);
+  node->right.setInitVal(NULL);
+  node->height.setInitVal(1);
+  return node;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::InternalKCAS(
+    RecordManager *_recmgr, auto *cfg)
+    : numThreads(_numThreads), minKey(_minKey), maxKey(_maxKey),
+      recmgr(_recmgr) {
+  assert(_numThreads < MAX_THREADS);
+  int tid = 0;
+  initThread(tid);
+  root = createNode(0, NULL, maxKey, 0);
+  init = true;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+             _maxKey>::~InternalKCAS() {
+  int tid = 0;
+  initThread(tid);
+  freeSubtree(tid, root);
+  deinitThread(tid);
+  // delete recmgr;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline Node<K, V, RecordManager> *
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::getRoot() {
+  return root->left;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::initThread(const int tid) {
+  // recmgr->initThread(tid);
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::deinitThread(const int tid) {
+  // recmgr->deinitThread(tid);
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::getHeight(
+    Node<K, V, RecordManager> *node) {
+  return node == NULL ? 0 : node->height;
+}
+
+/* getSuccessor(const int tid, Node * node, ObservedNode &succObserved, int key)
+ * ### Gets the successor of a given node in it's subtree ###
+ * returns the successor of a given node stored within an ObservedNode with the
+ * observed version number.
+ * Returns an integer, 1 indicating the process was successful, 0 indicating a
+ * retry
+ */
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline int
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::getSuccessor(
+    const int tid, Node<K, V, RecordManager> *node, ObservedNode &oSucc,
+    const K &key) {
+  auto &path = paths[tid].path;
+
+  while (true) {
+    Node<K, V, RecordManager> *succ = node->right;
+    path[0].node = node;
+    path[0].oVNumMark = node->vNumMark;
+    int currSize = 1;
+
+    while (succ != NULL) {
+      assert(currSize < MAX_PATH_SIZE - 1);
+      path[currSize].node = succ;
+      path[currSize].oVNumMark = succ->vNumMark;
+      currSize++;
+      succ = succ->left;
+    }
+
+    if (validatePath(tid, currSize, key, path) && currSize > 1) {
+      oSucc = path[currSize - 1];
+      return RetCode::SUCCESS;
+    } else {
+      return RetCode::RETRY;
+    }
+  }
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline bool
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::contains(
+    const int tid, const K &key) {
+  assert(key <= maxKey);
+  int result;
+  ObservedNode oNode;
+  ObservedNode oParent;
+  // auto guard = recmgr->getGuard(tid);
+
+  while ((result = search(tid, oParent, oNode, key)) == RetCode::RETRY) {
+    /* keep trying until we get a result */
+  }
+  return result == RetCode::SUCCESS;
+}
+
+/* search(const int tid, ObservedNode &predObserved, ObservedNode
+ * &parentObserved, ObservedNode &nodeObserved, const int &key) A proposed
+ * successor-predecessor pair is generated by searching for a given key, if the
+ * key is not found, the path is then validated to ensure it was not missed.
+ * Where appropriate, the predecessor (predObserved), parent (parentObserved)
+ * and node (nodeObserved) are provided to the caller.
+ */
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::search(
+    const int tid, ObservedNode &oParent, ObservedNode &oNode, const K &key) {
+  assert(key <= maxKey);
+
+  K currKey;
+  casword_t nodeVNumMark;
+
+  ObservedNode *path = paths[tid].path;
+  path[0].node = root;
+  path[0].oVNumMark = root->vNumMark;
+
+  Node<K, V, RecordManager> *node = root->left;
+
+  int currSize = 1;
+
+  while (true) {
+    assert(currSize < MAX_PATH_SIZE - 1);
+    // We have hit a terminal node without finding our key, must validate
+    if (node == NULL) {
+      if (validatePath(tid, currSize, key, path)) {
+        oParent = path[currSize - 1];
+        return RetCode::FAILURE;
+      } else {
+        return RetCode::RETRY;
+      }
+    }
+
+    nodeVNumMark = node->vNumMark;
+
+    currKey = node->key;
+
+    path[currSize].node = node;
+    path[currSize].oVNumMark = nodeVNumMark;
+    currSize++;
+
+    if (key > currKey) {
+      node = node->right;
+    } else if (key < currKey) {
+      node = node->left;
+    } // no validation required on finding a key
+    else {
+      oParent = path[currSize - 2];
+      oNode = path[currSize - 1];
+      return RetCode::SUCCESS;
+    }
+  }
+}
+
+/* validatePath(const int tid, const int size, const int key, ObservedNode
+ * path[MAX_PATH_SIZE])
+ * ### Validates all nodes in a path such that they are not marked and their
+ * version numbers have not changed ### validated a given path, ensuring that
+ * all version numbers of observed nodes still match the version numbers stored
+ * locally within nodes within the tree. This provides the caller with certainty
+ * that there was a time that this path existed in the tree Returns true for a
+ * valid path Returns false for an invalid path (some node version number
+ * changed)
+ */
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline bool
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::validatePath(
+    const int tid, const int &size, const K &key, ObservedNode path[]) {
+  assert(size > 0 && size < MAX_PATH_SIZE);
+
+  for (int i = 0; i < size; i++) {
+    ObservedNode oNode = path[i];
+    if (oNode.node->vNumMark != oNode.oVNumMark || IS_MARKED(oNode.oVNumMark)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline V InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                      _maxKey>::insertIfAbsent(const int tid, const K &key,
+                                               const V &value) {
+  ObservedNode oParent;
+  ObservedNode oNode;
+
+  while (true) {
+    // auto guard = recmgr->getGuard(tid);
+
+    int res;
+    while ((res = (search(tid, oParent, oNode, key))) == RetCode::RETRY) {
+      /* keep trying until we get a result */
+    }
+
+    if (res == RetCode::SUCCESS) {
+      return (V)oNode.node->value;
+    }
+
+    assert(res == RetCode::FAILURE);
+    if (internalInsert(tid, oParent, key, value)) {
+      return 0;
+    }
+  }
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::internalInsert(const int tid, ObservedNode &oParent,
+                                          const K &key, const V &value) {
+  /* INSERT KCAS (K = 2-3)
+   * predecessor's version number*: vNumber  ->  vNumber + 1
+   * parent's version number:   vNumber  ->  vNumber + 1
+   * parent's child pointer:    NULL     ->  newNode
+   */
+
+  kcas::start();
+  Node<K, V, RecordManager> *parent = oParent.node;
+
+  Node<K, V, RecordManager> *newNode = createNode(tid, parent, key, value);
+
+  if (key > parent->key) {
+    kcas::add(&parent->right, (Node<K, V, RecordManager> *)NULL, newNode);
+  } else if (key < parent->key) {
+    kcas::add(&parent->left, (Node<K, V, RecordManager> *)NULL, newNode);
+  } else {
+    // recmgr->reclaim(newNode);
+    return RetCode::RETRY;
+  }
+
+  kcas::add(&parent->vNumMark, oParent.oVNumMark, oParent.oVNumMark + 2);
+
+  if (kcas::execute()) {
+    fixHeightAndRebalance(tid, parent);
+    return RetCode::SUCCESS;
+  }
+
+  // recmgr->reclaim(newNode);
+
+  return RetCode::RETRY;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline V
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::erase(
+    const int tid, const K &key) {
+  ObservedNode oParent;
+  ObservedNode oNode;
+
+  while (true) {
+    // auto guard = recmgr->getGuard(tid);
+
+    int res = 0;
+    while ((res = (search(tid, oParent, oNode, key))) == RetCode::RETRY) {
+      /* keep trying until we get a result */
+    }
+
+    if (res == RetCode::FAILURE) {
+      return 0;
+    }
+
+    assert(res == RetCode::SUCCESS);
+    if ((res = internalErase(tid, oParent, oNode, key))) {
+      return (V)oNode.node->value;
+    }
+  }
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::internalErase(const int tid, ObservedNode &oParent,
+                                         ObservedNode &oNode, const K &key) {
+  Node<K, V, RecordManager> *parent = oParent.node;
+  Node<K, V, RecordManager> *node = oNode.node;
+
+  int numChildren = countChildren(tid, node);
+
+  kcas::start();
+
+  if (IS_MARKED(oParent.oVNumMark) || IS_MARKED(oNode.oVNumMark)) {
+    return RetCode::RETRY;
+  }
+
+  if (numChildren == 0) {
+    /* No-Child Delete
+     * Unlink node
+     */
+
+    if (key > parent->key) {
+      kcas::add(&parent->right, node, (Node<K, V, RecordManager> *)NULL);
+    } else if (key < parent->key) {
+      kcas::add(&parent->left, node, (Node<K, V, RecordManager> *)NULL);
+    } else {
+      return RetCode::RETRY;
+    }
+
+    kcas::add(&parent->vNumMark, oParent.oVNumMark, oParent.oVNumMark + 2,
+              &node->vNumMark, oNode.oVNumMark, oNode.oVNumMark + 3);
+
+    if (kcas::execute()) {
+      assert(IS_MARKED(node->vNumMark));
+      // recmgr->reclaim(node);
+      fixHeightAndRebalance(tid, parent);
+
+      return RetCode::SUCCESS;
+    }
+
+    return RetCode::RETRY;
+  } else if (numChildren == 1) {
+    /* One-Child Delete
+     * Reroute parent pointer around removed node
+     */
+
+    Node<K, V, RecordManager> *left = node->left;
+    Node<K, V, RecordManager> *right = node->right;
+    Node<K, V, RecordManager> *reroute;
+
+    // determine which child will be the replacement
+    if (left != NULL) {
+      reroute = left;
+    } else if (right != NULL) {
+      reroute = right;
+    } else {
+      return RetCode::RETRY;
+    }
+
+    casword_t rerouteVNum = reroute->vNumMark;
+
+    if (IS_MARKED(rerouteVNum)) {
+      return RetCode::RETRY;
+    }
+
+    if (key > parent->key) {
+      kcas::add(&parent->right, node, reroute);
+    } else if (key < parent->key) {
+      kcas::add(&parent->left, node, reroute);
+    } else {
+      return RetCode::RETRY;
+    }
+
+    kcas::add(&reroute->parent, node, parent, &reroute->vNumMark, rerouteVNum,
+              rerouteVNum + 2, &node->vNumMark, oNode.oVNumMark,
+              oNode.oVNumMark + 3, &parent->vNumMark, oParent.oVNumMark,
+              oParent.oVNumMark + 2);
+
+    if (kcas::execute()) {
+      assert(IS_MARKED(node->vNumMark));
+      // recmgr->reclaim(node);
+      fixHeightAndRebalance(tid, parent);
+
+      return RetCode::SUCCESS;
+    }
+
+    return RetCode::RETRY;
+  } else if (numChildren == 2) {
+    /* Two-Child Delete
+     * Promotion of descendant successor to this node by replacing the key/value
+     * pair at the node
+     */
+
+    ObservedNode oSucc;
+
+    // the (decendant) successor's key will be promoted
+    if (getSuccessor(tid, node, oSucc, key) == RetCode::RETRY) {
+      return RetCode::RETRY;
+    }
+
+    if (oSucc.node == NULL) {
+      return RetCode::RETRY;
+    }
+
+    Node<K, V, RecordManager> *succ = oSucc.node;
+    Node<K, V, RecordManager> *succParent = succ->parent;
+
+    ObservedNode oSuccParent;
+    oSuccParent.node = succParent;
+    oSuccParent.oVNumMark = succParent->vNumMark;
+
+    if (oSuccParent.node == NULL) {
+      return RetCode::RETRY;
+    }
+
+    K succKey = succ->key;
+
+    assert(succKey <= maxKey);
+
+    if (IS_MARKED(oSuccParent.oVNumMark)) {
+      return RetCode::RETRY;
+    }
+
+    Node<K, V, RecordManager> *succRight = succ->right;
+
+    if (succRight != NULL) {
+      casword_t succRightVNum = succRight->vNumMark;
+
+      if (IS_MARKED(succRightVNum)) {
+        return RetCode::RETRY;
+      }
+
+      kcas::add(&succRight->parent, succ, succParent, &succRight->vNumMark,
+                succRightVNum, succRightVNum + 2);
+    }
+
+    if (succParent->right == succ) {
+      kcas::add(&succParent->right, succ, succRight);
+    } else if (succParent->left == succ) {
+      kcas::add(&succParent->left, succ, succRight);
+    } else {
+      return RetCode::RETRY;
+    }
+
+    V nodeVal = node->value;
+    V succVal = succ->value;
+
+    kcas::add(&node->value, nodeVal, succVal, &node->key, key, succKey,
+              &succ->vNumMark, oSucc.oVNumMark, oSucc.oVNumMark + 3,
+              &succParent->vNumMark, oSuccParent.oVNumMark,
+              oSuccParent.oVNumMark + 2);
+
+    if (succParent != node) {
+      kcas::add(&node->vNumMark, oNode.oVNumMark, oNode.oVNumMark + 2);
+    }
+
+    if (kcas::execute()) {
+      assert(IS_MARKED(succ->vNumMark));
+      // recmgr->reclaim(succ);
+      // successor's parent is the only node that's height will have been
+      // impacted
+      fixHeightAndRebalance(tid, succParent);
+      return RetCode::SUCCESS;
+    }
+
+    return RetCode::RETRY;
+  }
+  assert(false);
+  return RetCode::RETRY;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::
+    fixHeightAndRebalance(const int tid, Node<K, V, RecordManager> *node) {
+  ObservedNode oNode;
+  ObservedNode oParent;
+
+  int propRes;
+
+  while (node != root) {
+    ObservedNode oRight;
+    ObservedNode oLeft;
+    ObservedNode oRightLeft;
+    ObservedNode oLeftRight;
+    ObservedNode oRightRight;
+    ObservedNode oLeftLeft;
+
+    oNode.node = node;
+    oNode.oVNumMark = node->vNumMark;
+
+    oParent.node = node->parent;
+    oParent.oVNumMark = oParent.node->vNumMark;
+
+    if (IS_MARKED(oNode.oVNumMark)) {
+      return;
+    }
+
+    Node<K, V, RecordManager> *left = node->left;
+    if (left != NULL) {
+      oLeft.node = left;
+      oLeft.oVNumMark = left->vNumMark;
+    }
+
+    Node<K, V, RecordManager> *right = node->right;
+    if (right != NULL) {
+      oRight.node = right;
+      oRight.oVNumMark = right->vNumMark;
+    }
+
+    int localBalance = getHeight(left) - getHeight(right);
+
+    if (localBalance >= 2) {
+      if (left == NULL || IS_MARKED(oLeft.oVNumMark)) {
+        continue;
+      }
+
+      Node<K, V, RecordManager> *leftRight = left->right;
+      Node<K, V, RecordManager> *leftLeft = left->left;
+
+      if (leftRight != NULL) {
+        oLeftRight.node = leftRight;
+        oLeftRight.oVNumMark = leftRight->vNumMark;
+      }
+
+      if (leftLeft != NULL) {
+        oLeftLeft.node = leftLeft;
+        oLeftLeft.oVNumMark = leftLeft->vNumMark;
+      }
+
+      int leftBalance = getHeight(leftLeft) - getHeight(leftRight);
+
+      if (leftBalance < 0) {
+        if (leftRight == NULL) {
+          continue;
+        }
+        if (rotateLeftRight(tid, oParent, oNode, oLeft, oLeftRight)) {
+          // node is now the lowest on the tree, so it must be rebalanced first
+          // cannot simply loop...
+          fixHeightAndRebalance(tid, node);
+          fixHeightAndRebalance(tid, left);
+          fixHeightAndRebalance(tid, leftRight);
+          node = oParent.node;
+        }
+      } else {
+        if (rotateRight(tid, oParent, oNode, oLeft) == RetCode::SUCCESS) {
+          fixHeightAndRebalance(tid, node);
+          fixHeightAndRebalance(tid, left);
+          node = oParent.node;
+        }
+      }
+    } else if (localBalance <= -2) {
+      if (right == NULL || IS_MARKED(oRight.oVNumMark)) {
+        continue;
+      }
+
+      Node<K, V, RecordManager> *rightLeft = right->left;
+      Node<K, V, RecordManager> *rightRight = right->right;
+
+      if (rightLeft != NULL) {
+        oRightLeft.node = rightLeft;
+        oRightLeft.oVNumMark = rightLeft->vNumMark;
+      }
+
+      if (rightRight != NULL) {
+        oRightRight.node = rightRight;
+        oRightRight.oVNumMark = rightRight->vNumMark;
+      }
+
+      int rightBalance = getHeight(rightLeft) - getHeight(rightRight);
+
+      if (rightBalance > 0) {
+        if (rightLeft == NULL) {
+          continue;
+        }
+
+        if (rotateRightLeft(tid, oParent, oNode, oRight, oRightLeft)) {
+          fixHeightAndRebalance(tid, node);
+          fixHeightAndRebalance(tid, right);
+          fixHeightAndRebalance(tid, rightLeft);
+          node = oParent.node;
+        }
+      } else {
+        if (rotateLeft(tid, oParent, oNode, oRight) == RetCode::SUCCESS) {
+          fixHeightAndRebalance(tid, node);
+          fixHeightAndRebalance(tid, right);
+          node = oParent.node;
+        }
+      }
+    } else {
+      // no rebalance occurred? check if the height is still ok
+      if ((propRes = fixHeight(tid, oNode)) == RetCode::FAILURE) {
+        continue;
+      } else if (propRes == RetCode::SUCCESS_WITH_HEIGHT_UPDATE) {
+        node = node->parent;
+      } else {
+        return;
+      }
+    }
+  }
+  return;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::fixHeight(
+    const int tid, ObservedNode &oNode) {
+
+  Node<K, V, RecordManager> *node = oNode.node;
+  Node<K, V, RecordManager> *left = node->left;
+  Node<K, V, RecordManager> *right = node->right;
+
+  casword_t leftOVNumMark;
+  casword_t rightOVNumMark;
+
+  kcas::start();
+
+  if (left != NULL) {
+    leftOVNumMark = left->vNumMark;
+    kcas::add(&left->vNumMark, leftOVNumMark, leftOVNumMark);
+  }
+
+  if (right != NULL) {
+    rightOVNumMark = right->vNumMark;
+    kcas::add(&right->vNumMark, rightOVNumMark, rightOVNumMark);
+  }
+
+  int oldHeight = node->height;
+
+  int newHeight = 1 + max(getHeight(left), getHeight(right));
+
+  // Check if rebalance is actually necessary
+  if (oldHeight == newHeight) {
+    if (node->vNumMark == oNode.oVNumMark &&
+        (left == NULL || left->vNumMark == leftOVNumMark) &&
+        (right == NULL || right->vNumMark == rightOVNumMark)) {
+      return RetCode::UNNECCESSARY;
+    } else {
+      return RetCode::FAILURE;
+    }
+  }
+
+  kcas::add(&node->height, oldHeight, newHeight, &node->vNumMark,
+            oNode.oVNumMark, oNode.oVNumMark + 2);
+
+  if (kcas::execute()) {
+    return RetCode::SUCCESS_WITH_HEIGHT_UPDATE;
+  }
+
+  return RetCode::FAILURE;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::rotateRight(const int tid, ObservedNode &oParent,
+                                       ObservedNode &oNode,
+                                       ObservedNode &oLeft) {
+  Node<K, V, RecordManager> *parent = oParent.node;
+  Node<K, V, RecordManager> *node = oNode.node;
+  Node<K, V, RecordManager> *left = oLeft.node;
+
+  kcas::start();
+
+  /***Pointers to Parents and Children***/
+  // could fail fast here, should consider
+  if (parent->right == node) {
+    kcas::add(&parent->right, node, left);
+  } else if (parent->left == node) {
+    kcas::add(&parent->left, node, left);
+  } else {
+    return RetCode::FAILURE;
+  }
+
+  Node<K, V, RecordManager> *leftRight = left->right;
+  if (leftRight != NULL) {
+    casword_t leftRightOVNumMark = leftRight->vNumMark;
+    kcas::add(&leftRight->parent, left, node, &leftRight->vNumMark,
+              leftRightOVNumMark, leftRightOVNumMark + 2);
+  }
+
+  Node<K, V, RecordManager> *leftLeft = left->left;
+  if (leftLeft != NULL) {
+    casword_t leftLeftOVNumMark = leftLeft->vNumMark;
+    kcas::add(&leftLeft->vNumMark, leftLeftOVNumMark, leftLeftOVNumMark);
+  }
+
+  Node<K, V, RecordManager> *right = node->right;
+  if (right != NULL) {
+    casword_t rightOVNumMark = right->vNumMark;
+    kcas::add(&right->vNumMark, rightOVNumMark, rightOVNumMark);
+  }
+
+  int oldNodeHeight = node->height;
+  int oldLeftHeight = left->height;
+
+  int newNodeHeight = 1 + max(getHeight(leftRight), getHeight(right));
+  int newLeftHeight = 1 + max(getHeight(leftLeft), newNodeHeight);
+
+  kcas::add(&left->parent, node, parent, &node->left, left, leftRight,
+            &left->right, leftRight, node, &node->parent, parent, left,
+            &node->height, oldNodeHeight, newNodeHeight, &left->height,
+            oldLeftHeight, newLeftHeight, &parent->vNumMark, oParent.oVNumMark,
+            oParent.oVNumMark + 2, &node->vNumMark, oNode.oVNumMark,
+            oNode.oVNumMark + 2, &left->vNumMark, oLeft.oVNumMark,
+            oLeft.oVNumMark + 2);
+
+  if (kcas::execute())
+    return RetCode::SUCCESS;
+  return RetCode::FAILURE;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::rotateLeft(const int tid, ObservedNode &oParent,
+                                      ObservedNode &oNode,
+                                      ObservedNode &oRight) {
+  Node<K, V, RecordManager> *parent = oParent.node;
+  Node<K, V, RecordManager> *node = oNode.node;
+  Node<K, V, RecordManager> *right = oRight.node;
+
+  kcas::start();
+
+  /***Pointers to Parents and Children***/
+  // could fail fast here, should consider
+  if (parent->right == node) {
+    kcas::add(&parent->right, node, right);
+  } else if (parent->left == node) {
+    kcas::add(&parent->left, node, right);
+  } else {
+    return RetCode::FAILURE;
+  }
+
+  Node<K, V, RecordManager> *rightLeft = right->left;
+  if (rightLeft != NULL) {
+    casword_t rightLeftOVNumMark = rightLeft->vNumMark;
+    kcas::add(&rightLeft->parent, right, node, &rightLeft->vNumMark,
+              rightLeftOVNumMark, rightLeftOVNumMark + 2);
+  }
+
+  Node<K, V, RecordManager> *rightRight = right->right;
+  if (rightRight != NULL) {
+    casword_t rightRightOVNumMark = rightRight->vNumMark;
+    kcas::add(&rightRight->vNumMark, rightRightOVNumMark, rightRightOVNumMark);
+  }
+
+  Node<K, V, RecordManager> *left = node->left;
+  if (left != NULL) {
+    casword_t leftOVNumMark = left->vNumMark;
+    kcas::add(&left->vNumMark, leftOVNumMark, leftOVNumMark);
+  }
+
+  int oldNodeHeight = node->height;
+  int oldRightHeight = right->height;
+
+  int newNodeHeight = 1 + max(getHeight(left), getHeight(rightLeft));
+  int newRightHeight = 1 + max(newNodeHeight, getHeight(rightRight));
+
+  kcas::add(&right->parent, node, parent, &node->right, right, rightLeft,
+            &right->left, rightLeft, node, &node->parent, parent, right,
+            &node->height, oldNodeHeight, newNodeHeight, &right->height,
+            oldRightHeight, newRightHeight, &parent->vNumMark,
+            oParent.oVNumMark, oParent.oVNumMark + 2, &node->vNumMark,
+            oNode.oVNumMark, oNode.oVNumMark + 2, &right->vNumMark,
+            oRight.oVNumMark, oRight.oVNumMark + 2);
+
+  if (kcas::execute())
+    return RetCode::SUCCESS;
+  return RetCode::FAILURE;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::rotateLeftRight(const int tid, ObservedNode &oParent,
+                                           ObservedNode &oNode,
+                                           ObservedNode &oLeft,
+                                           ObservedNode &oLeftRight) {
+  Node<K, V, RecordManager> *parent = oParent.node;
+  Node<K, V, RecordManager> *node = oNode.node;
+  Node<K, V, RecordManager> *left = oLeft.node;
+  Node<K, V, RecordManager> *leftRight = oLeftRight.node;
+
+  kcas::start();
+
+  /***Pointers to Parents and Children***/
+  // could fail fast here, should consider
+  if (parent->right == node) {
+    kcas::add(&parent->right, node, leftRight);
+  } else if (parent->left == node) {
+    kcas::add(&parent->left, node, leftRight);
+  } else {
+    return RetCode::FAILURE;
+  }
+
+  Node<K, V, RecordManager> *leftRightLeft = leftRight->left;
+  if (leftRightLeft != NULL) {
+    casword_t leftRightLeftOVNumMark = leftRightLeft->vNumMark;
+    kcas::add(&leftRightLeft->parent, leftRight, left, &leftRightLeft->vNumMark,
+              leftRightLeftOVNumMark, leftRightLeftOVNumMark + 2);
+  }
+
+  Node<K, V, RecordManager> *leftRightRight = leftRight->right;
+  if (leftRightRight != NULL) {
+    casword_t leftRightRightOVNumMark = leftRightRight->vNumMark;
+    kcas::add(&leftRightRight->parent, leftRight, node,
+              &leftRightRight->vNumMark, leftRightRightOVNumMark,
+              leftRightRightOVNumMark + 2);
+  }
+
+  Node<K, V, RecordManager> *right = node->right;
+  if (right != NULL) {
+    casword_t rightOVNumMark = right->vNumMark;
+    kcas::add(&right->vNumMark, rightOVNumMark, rightOVNumMark);
+  }
+
+  Node<K, V, RecordManager> *leftLeft = left->left;
+  if (leftLeft != NULL) {
+    casword_t leftLeftOVNumMark = leftLeft->vNumMark;
+    kcas::add(&leftLeft->vNumMark, leftLeftOVNumMark, leftLeftOVNumMark);
+  }
+
+  int oldNodeHeight = node->height;
+  int oldLeftHeight = left->height;
+  int oldLeftRightHeight = leftRight->height;
+
+  int newNodeHeight = 1 + max(getHeight(leftRightRight), getHeight(right));
+  int newLeftHeight = 1 + max(getHeight(leftLeft), getHeight(leftRightLeft));
+  int newLeftRightHeight = 1 + max(newNodeHeight, newLeftHeight);
+
+  kcas::add(&leftRight->parent, left, parent, &leftRight->left, leftRightLeft,
+            left, &left->parent, node, leftRight, &leftRight->right,
+            leftRightRight, node, &node->parent, parent, leftRight,
+            &left->right, leftRight, leftRightLeft, &node->left, left,
+            leftRightRight, &node->height, oldNodeHeight, newNodeHeight,
+            &left->height, oldLeftHeight, newLeftHeight, &leftRight->height,
+            oldLeftRightHeight, newLeftRightHeight, &leftRight->vNumMark,
+            oLeftRight.oVNumMark, oLeftRight.oVNumMark + 2, &parent->vNumMark,
+            oParent.oVNumMark, oParent.oVNumMark + 2, &node->vNumMark,
+            oNode.oVNumMark, oNode.oVNumMark + 2, &left->vNumMark,
+            oLeft.oVNumMark, oLeft.oVNumMark + 2);
+
+  if (kcas::execute())
+    return RetCode::SUCCESS;
+  return RetCode::FAILURE;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::rotateRightLeft(const int tid, ObservedNode &oParent,
+                                           ObservedNode &oNode,
+                                           ObservedNode &oRight,
+                                           ObservedNode &oRightLeft) {
+  Node<K, V, RecordManager> *parent = oParent.node;
+  Node<K, V, RecordManager> *node = oNode.node;
+  Node<K, V, RecordManager> *right = oRight.node;
+  Node<K, V, RecordManager> *rightLeft = oRightLeft.node;
+
+  kcas::start();
+
+  if (parent->right == node) {
+    kcas::add(&parent->right, node, rightLeft);
+  } else if (parent->left == node) {
+    kcas::add(&parent->left, node, rightLeft);
+  } else {
+    return RetCode::FAILURE;
+  }
+
+  Node<K, V, RecordManager> *rightLeftRight = rightLeft->right;
+  if (rightLeftRight != NULL) {
+    casword_t rightLeftRightOVNumMark = rightLeftRight->vNumMark;
+
+    if (IS_MARKED(rightLeftRightOVNumMark))
+      return RetCode::FAILURE;
+
+    kcas::add(&rightLeftRight->parent, rightLeft, right,
+              &rightLeftRight->vNumMark, rightLeftRightOVNumMark,
+              rightLeftRightOVNumMark + 2);
+  }
+
+  Node<K, V, RecordManager> *rightLeftLeft = rightLeft->left;
+  if (rightLeftLeft != NULL) {
+    casword_t rightLeftLeftOVNumMark = rightLeftLeft->vNumMark;
+
+    if (IS_MARKED(rightLeftLeftOVNumMark))
+      return RetCode::FAILURE;
+
+    kcas::add(&rightLeftLeft->parent, rightLeft, node, &rightLeftLeft->vNumMark,
+              rightLeftLeftOVNumMark, rightLeftLeftOVNumMark + 2);
+  }
+
+  Node<K, V, RecordManager> *left = node->left;
+  if (left != NULL) {
+    casword_t leftOVNumMark = left->vNumMark;
+    kcas::add(&left->vNumMark, leftOVNumMark, leftOVNumMark);
+  }
+
+  Node<K, V, RecordManager> *rightRight = right->right;
+  if (rightRight != NULL) {
+    casword_t rightRightOVNumMark = rightRight->vNumMark;
+    kcas::add(&rightRight->vNumMark, rightRightOVNumMark, rightRightOVNumMark);
+  }
+
+  int oldNodeHeight = node->height;
+  int oldRightHeight = right->height;
+  int oldRightLeftHeight = rightLeft->height;
+
+  int newNodeHeight = 1 + max(getHeight(rightLeftLeft), getHeight(left));
+  int newRightHeight =
+      1 + max(getHeight(rightRight), getHeight(rightLeftRight));
+  int newRightLeftHeight = 1 + max(newNodeHeight, newRightHeight);
+
+  kcas::add(&rightLeft->parent, right, parent, &rightLeft->right,
+            rightLeftRight, right, &right->parent, node, rightLeft,
+            &rightLeft->left, rightLeftLeft, node, &node->parent, parent,
+            rightLeft, &right->left, rightLeft, rightLeftRight, &node->right,
+            right, rightLeftLeft, &node->height, oldNodeHeight, newNodeHeight,
+            &right->height, oldRightHeight, newRightHeight, &rightLeft->height,
+            oldRightLeftHeight, newRightLeftHeight, &rightLeft->vNumMark,
+            oRightLeft.oVNumMark, oRightLeft.oVNumMark + 2, &parent->vNumMark,
+            oParent.oVNumMark, oParent.oVNumMark + 2, &node->vNumMark,
+            oNode.oVNumMark, oNode.oVNumMark + 2, &right->vNumMark,
+            oRight.oVNumMark, oRight.oVNumMark + 2);
+
+  if (kcas::execute())
+    return RetCode::SUCCESS;
+  return RetCode::FAILURE;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline int
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::countChildren(
+    const int tid, Node<K, V, RecordManager> *node) {
+  return (node->left == NULL ? 0 : 1) + (node->right == NULL ? 0 : 1);
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+long InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::validateSubtree(Node<K, V, RecordManager> *node,
+                                            long smaller, long larger,
+                                            std::unordered_set<casword_t> &keys,
+                                            ofstream &graph, ofstream &log,
+                                            bool &errorFound) {
+
+  if (node == NULL)
+    return 0;
+  graph << "\"" << node << "\""
+        << "[label=\"K: " << node->key << " - H: " << node->height << "\"];\n";
+
+  if (IS_MARKED(node->vNumMark)) {
+    log << "MARKED NODE! " << node->key << "\n";
+    errorFound = true;
+  }
+  Node<K, V, RecordManager> *nodeLeft = node->left;
+  Node<K, V, RecordManager> *nodeRight = node->right;
+
+  if (nodeLeft != NULL) {
+    graph << "\"" << node << "\" -> \"" << nodeLeft << "\"";
+    if (node->key < nodeLeft->key) {
+      assert(false);
+      graph << "[color=red]";
+    } else {
+      graph << "[color=blue]";
+    }
+
+    graph << ";\n";
+  }
+
+  if (nodeRight != NULL) {
+    graph << "\"" << node << "\" -> \"" << nodeRight << "\"";
+    if (node->key > nodeRight->key) {
+      assert(false);
+      graph << "[color=red]";
+    } else {
+      graph << "[color=green]";
+    }
+    graph << ";\n";
+  }
+
+  Node<K, V, RecordManager> *parent = node->parent;
+  graph << "\"" << node << "\" -> \"" << parent
+        << "\""
+           "[color=grey];\n";
+  // casword_t height = node->height;
+
+  if (!(keys.count(node->key) == 0)) {
+    log << "DUPLICATE KEY! " << node->key << "\n";
+    errorFound = true;
+  }
+
+  if (!((nodeLeft == NULL || nodeLeft->parent == node) &&
+        (nodeRight == NULL || nodeRight->parent == node))) {
+    log << "IMPROPER PARENT! " << node->key << "\n";
+    errorFound = true;
+  }
+
+  if ((node->key < smaller) || (node->key > larger)) {
+    log << "IMPROPER LOCAL TREE! " << node->key << "\n";
+    errorFound = true;
+  }
+
+  if (nodeLeft == NULL && nodeRight == NULL && getHeight(node) > 1) {
+    log << "Leaf with height > 1! " << node->key << "\n";
+    errorFound = true;
+  }
+
+  keys.insert(node->key);
+
+  long lHeight = validateSubtree(node->left, smaller, node->key, keys, graph,
+                                 log, errorFound);
+  long rHeight = validateSubtree(node->right, node->key, larger, keys, graph,
+                                 log, errorFound);
+
+  long ret = 1 + max(lHeight, rHeight);
+
+  if (node->height != ret) {
+    log << "Node " << node->key << " with height " << ret
+        << " thinks it has height " << node->height << "\n";
+    errorFound = true;
+  }
+
+  if (abs(lHeight - rHeight) > 1) {
+    log << "Imbalanced Node! " << node->key << "(" << lHeight << ", " << rHeight
+        << ") - " << node->height << "\n";
+    errorFound = true;
+  }
+
+  return ret;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+bool InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::validate() {
+  std::unordered_set<casword_t> keys = {};
+  bool errorFound;
+
+  rename("graph.dot", "graph_before.dot");
+  ofstream graph;
+  graph.open("graph.dot");
+  graph << "digraph G {\n";
+
+  ofstream log;
+  log.open("log.txt", std::ofstream::out | std::ofstream::app);
+
+  auto t = std::time(nullptr);
+  auto tm = *std::localtime(&t);
+  log << "Run at: " << std::put_time(&tm, "%d-%m-%Y %H-%M-%S") << "\n";
+
+  // long ret = validateSubtree(root->left, minKey, maxKey, keys, graph, log,
+  // errorFound);
+  graph << "}";
+  graph.close();
+
+  if (!errorFound) {
+    log << "Validated Successfully!\n";
+  }
+
+  log.close();
+
+  return !errorFound;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::printDebuggingDetails() {}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::freeSubtree(const int tid,
+                                        Node<K, V, RecordManager> *node) {
+  if (node == NULL)
+    return;
+  freeSubtree(tid, node->left);
+  freeSubtree(tid, node->right);
+  // recmgr->reclaim(node);
+}
diff --git a/artifact/ds/baseline/int_bst_pathcas/internal_kcas_bst.h b/artifact/ds/baseline/int_bst_pathcas/internal_kcas_bst.h
new file mode 100644
index 0000000..8ef7e20
--- /dev/null
+++ b/artifact/ds/baseline/int_bst_pathcas/internal_kcas_bst.h
@@ -0,0 +1,767 @@
+#pragma once
+
+#ifndef KCAS_TYPE
+#define KCAS_HTM
+#define KCAS_TYPE "KCAS_HTM"
+#endif
+
+#include <cassert>
+#define MAX_KCAS 16
+#include "kcas.h"
+
+#include <ctime>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <unordered_set>
+
+using namespace std;
+
+#define MAX_THREADS 200
+#define MAX_PATH_SIZE 16384
+#define PADDING_BYTES 128
+#define KCAS_MAX_K 16
+
+#define IS_MARKED(word) (word & 0x1)
+
+template <typename K, typename V, class RecordManager>
+struct Node : RecordManager::reclaimable_t {
+  casword<K> key;
+  casword<casword_t> vNumMark;
+  casword<Node<K, V, RecordManager> *> left;
+  casword<Node<K, V, RecordManager> *> right;
+  casword<V> value;
+};
+
+enum RetCode : int {
+  RETRY = 0,
+  UNNECCESSARY = 0,
+  FAILURE = -1,
+  SUCCESS = 1,
+  SUCCESS_WITH_HEIGHT_UPDATE = 2
+};
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+class InternalKCAS {
+private:
+  /*
+   * ObservedNode acts as a Node-VersionNumber pair to track an "observed
+   * version number" of a given node. We can then be sure that a version number
+   * does not change after we have read it by comparing the current version
+   * number to this saved value NOTE: This is a thread-private structure, no
+   * fields need to be volatile
+   */
+  struct ObservedNode {
+    ObservedNode() {}
+
+    Node<K, V, RecordManager> *node = NULL;
+    casword_t oVNumMark = -1;
+  };
+
+  struct PathContainer {
+    ObservedNode path[MAX_PATH_SIZE];
+    volatile char padding[PADDING_BYTES];
+  };
+
+  volatile char padding0[PADDING_BYTES];
+  // Debugging, used to validate that no thread's parent can't be NULL, save for
+  // the root
+  bool init = false;
+  const int numThreads;
+  const int minKey;
+  const long long maxKey;
+  volatile char padding4[PADDING_BYTES];
+  Node<K, V, RecordManager> *root;
+  volatile char padding5[PADDING_BYTES];
+  RecordManager *const recmgr;
+  volatile char padding7[PADDING_BYTES];
+  PathContainer paths[MAX_THREADS];
+  volatile char padding8[PADDING_BYTES];
+
+public:
+  InternalKCAS(RecordManager *me, auto *cfg);
+
+  ~InternalKCAS();
+
+  bool contains(const int tid, const K &key);
+
+  V insertIfAbsent(const int tid, const K &key, const V &value);
+
+  V erase(const int tid, const K &key);
+
+  bool validate();
+
+  void printDebuggingDetails();
+
+  Node<K, V, RecordManager> *getRoot();
+
+  void initThread(const int tid);
+
+  void deinitThread(const int tid);
+
+  RecordManager *const debugGetRecMgr() { return recmgr; }
+  // These are the functions that the benchmark will call
+  bool get(RecordManager *me, const K &key, V &val) {
+    return contains(me->tid, key);
+  }
+
+  bool insert(RecordManager *me, const K &key, V &val) {
+    return 0 == insertIfAbsent(me->tid, key, val);
+  }
+
+  bool remove(RecordManager *me, const K &key) {
+    return 0 != erase(me->tid, key);
+  }
+
+private:
+  Node<K, V, RecordManager> *createNode(const int tid, K key, V value);
+
+  void freeSubtree(const int tid, Node<K, V, RecordManager> *node);
+
+  long validateSubtree(Node<K, V, RecordManager> *node, long smaller,
+                       long larger, std::unordered_set<casword_t> &keys,
+                       ofstream &graph, ofstream &log, bool &errorFound);
+
+  int internalErase(const int tid, ObservedNode &parentObserved,
+                    ObservedNode &nodeObserved, const K &key);
+
+  int internalInsert(const int tid, ObservedNode &predecessorObserved,
+                     ObservedNode &parentObserved, const K &key,
+                     const V &value);
+
+  int countChildren(const int tid, Node<K, V, RecordManager> *node);
+
+  int getSuccessor(const int tid, Node<K, V, RecordManager> *node,
+                   ObservedNode &succObserved, ObservedNode &oSuccParent,
+                   const K &key);
+
+  bool validatePath(const int tid, const int &size, const K &key,
+                    ObservedNode *path);
+
+  int search(const int tid, ObservedNode &predObserved,
+             ObservedNode &parentObserved, ObservedNode &nodeObserved,
+             const K &key);
+};
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+Node<K, V, RecordManager> *
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::createNode(
+    const int tid, K key, V value) {
+  Node<K, V, RecordManager> *node =
+      new Node<K, V, RecordManager>(); // recmgr->template allocate<Node<K, V,
+                                       // RecordManager> >(tid);
+  // No node, save for root, should have a NULL parent
+  node->key.setInitVal(key);
+  node->value.setInitVal(value);
+  node->vNumMark.setInitVal(0);
+  node->left.setInitVal(NULL);
+  node->right.setInitVal(NULL);
+  return node;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::InternalKCAS(
+    RecordManager *_recmgr, auto *cfg)
+    : numThreads(_numThreads), minKey(_minKey), maxKey(_maxKey),
+      recmgr(_recmgr) {
+
+  // root = createNode(0, (maxKey + 1 & 0x00FFFFFFFFFFFFFF), NULL);
+  root = createNode(0, maxKey, 0);
+  init = true;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+             _maxKey>::~InternalKCAS() {
+  // auto guard = recmgr->getGuard(tid);
+  freeSubtree(0, root);
+  // delete recmgr;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline Node<K, V, RecordManager> *
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::getRoot() {
+  return root->left;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::initThread(const int tid) {
+  // recmgr->initThread(tid);
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::deinitThread(const int tid) {
+  // recmgr->deinitThread(tid);
+}
+
+/* getSuccessor(const int tid, Node * node, ObservedNode &succObserved, int key)
+ * ### Gets the successor of a given node in it's subtree ###
+ * returns the successor of a given node stored within an ObservedNode with the
+ * observed version number.
+ * Returns an integer, 1 indicating the process was successful, 0 indicating a
+ * retry
+ */
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline int
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::getSuccessor(
+    const int tid, Node<K, V, RecordManager> *node, ObservedNode &oSucc,
+    ObservedNode &oSuccParent, const K &key) {
+  ObservedNode path[MAX_PATH_SIZE];
+
+  while (true) {
+    Node<K, V, RecordManager> *succ = node->right;
+    path[0].node = node;
+    path[0].oVNumMark = node->vNumMark;
+    int currSize = 1;
+
+    while (succ != NULL) {
+      assert(currSize < MAX_PATH_SIZE - 1);
+      path[currSize].node = succ;
+      path[currSize].oVNumMark = succ->vNumMark;
+      currSize++;
+      succ = succ->left;
+    }
+
+    if (currSize < 2) {
+      return RetCode::RETRY;
+    }
+
+    oSuccParent = path[currSize - 2];
+    oSucc = path[currSize - 1];
+    if (IS_MARKED(oSuccParent.oVNumMark) || IS_MARKED(oSucc.oVNumMark)) {
+      return RetCode::RETRY;
+    } else {
+      return RetCode::SUCCESS;
+    }
+  }
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline bool
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::contains(
+    const int tid, const K &key) {
+  assert(key <= maxKey);
+  int result;
+  ObservedNode oNode;
+  ObservedNode oParent;
+  ObservedNode oPred;
+
+  while ((result = search(tid, oPred, oParent, oNode, key)) ==
+         RetCode::RETRY) { /* keep trying until we get a result */
+  }
+  return result == RetCode::SUCCESS;
+}
+
+/* search(const int tid, ObservedNode &predObserved, ObservedNode
+ * &parentObserved, ObservedNode &nodeObserved, const int &key) A proposed
+ * successor-predecessor pair is generated by searching for a given key, if the
+ * key is not found, the path is then validated to ensure it was not missed.
+ * Where appropriate, the predecessor (predObserved), parent (parentObserved)
+ * and node (nodeObserved) are provided to the caller.
+ */
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::search(
+    const int tid, ObservedNode &oPred, ObservedNode &oParent,
+    ObservedNode &oNode, const K &key) {
+  assert(key <= maxKey);
+
+  K currKey;
+  casword_t nodeVNumMark;
+
+  ObservedNode *path = paths[tid].path;
+  path[0].node = root;
+  path[0].oVNumMark = root->vNumMark;
+
+  Node<K, V, RecordManager> *node = root->left;
+
+  ObservedNode *oPredPtr = NULL;
+  ObservedNode *oSuccPtr = &path[0];
+
+  int currSize = 1;
+
+  while (true) {
+    assert(currSize < MAX_PATH_SIZE - 1);
+    // We have hit a terminal node without finding our key, must validate
+    if (node == NULL) {
+      if (oPredPtr != NULL) {
+        oPred = *oPredPtr;
+
+        // The path could be valid, but we could be in the wrong sub-tree
+        if (key <= oPredPtr->node->key || key >= oSuccPtr->node->key) {
+          return RetCode::RETRY;
+        }
+        // The path could be valid, but we could be in the wrong sub-tree
+      } else if (key >= oSuccPtr->node->key) {
+        return RetCode::RETRY;
+      }
+
+      if (validatePath(tid, currSize, key, path)) {
+        oParent = path[currSize - 1];
+        return RetCode::FAILURE;
+      } else {
+        return RetCode::RETRY;
+      }
+    }
+
+    nodeVNumMark = node->vNumMark;
+
+    currKey = node->key;
+
+    path[currSize].node = node;
+    path[currSize].oVNumMark = nodeVNumMark;
+    currSize++;
+
+    if (key > currKey) {
+      node = node->right;
+      oPredPtr = &path[currSize - 1];
+    } else if (key < currKey) {
+      node = node->left;
+      oSuccPtr = &path[currSize - 1];
+    }
+    // no validation required on finding a key
+    else {
+      if (oPredPtr != NULL) {
+        oPred = *oPredPtr;
+      }
+
+      oParent = path[currSize - 2];
+      oNode = path[currSize - 1];
+      return RetCode::SUCCESS;
+    }
+  }
+}
+
+/* validatePath(const int tid, const int size, const int key, ObservedNode
+ * path[MAX_PATH_SIZE])
+ * ### Validates all nodes in a path such that they are not marked and their
+ * version numbers have not changed ### validated a given path, ensuring that
+ * all version numbers of observed nodes still match the version numbers stored
+ * locally within nodes within the tree. This provides the caller with certainty
+ * that there was a time that this path existed in the tree Returns true for a
+ * valid path Returns false for an invalid path (some node version number
+ * changed)
+ */
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline bool
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::validatePath(
+    const int tid, const int &size, const K &key, ObservedNode *path) {
+  assert(size > 0);
+
+  for (int i = 0; i < size; i++) {
+    ObservedNode oNode = path[i];
+    if (oNode.node->vNumMark != oNode.oVNumMark || IS_MARKED(oNode.oVNumMark)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline V InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                      _maxKey>::insertIfAbsent(const int tid, const K &key,
+                                               const V &value) {
+  ObservedNode oParent;
+  ObservedNode oNode;
+  ObservedNode oPred;
+  // auto guard = recmgr->getGuard(tid);
+
+  while (true) {
+
+    int res;
+    while ((res = (search(tid, oPred, oParent, oNode, key))) ==
+           RetCode::RETRY) { /* keep trying until we get a result */
+    }
+
+    if (res == RetCode::SUCCESS) {
+      return (V)oNode.node->value;
+    }
+
+    assert(res == RetCode::FAILURE);
+    if (internalInsert(tid, oPred, oParent, key, value)) {
+      return 0;
+    }
+  }
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::internalInsert(const int tid, ObservedNode &oPred,
+                                          ObservedNode &oParent, const K &key,
+                                          const V &value) {
+  /* INSERT KCAS (K = 2-3)
+   * predecessor's version number*: vNumber  ->  vNumber + 1
+   * parent's version number:   vNumber  ->  vNumber + 1
+   * parent's child pointer:    NULL     ->  newNode
+   */
+
+  kcas::start();
+  Node<K, V, RecordManager> *parent = oParent.node;
+
+  Node<K, V, RecordManager> *pred = oPred.node;
+
+  if (pred != NULL) {
+    if (pred->key == key) {
+      return RetCode::RETRY;
+    } else if (pred != parent) {
+      kcas::add(&pred->vNumMark, oPred.oVNumMark, oPred.oVNumMark);
+    }
+  }
+
+  Node<K, V, RecordManager> *newNode = createNode(tid, key, value);
+
+  if (key > parent->key) {
+    kcas::add(&parent->right, (Node<K, V, RecordManager> *)NULL, newNode);
+  } else if (key < parent->key) {
+    kcas::add(&parent->left, (Node<K, V, RecordManager> *)NULL, newNode);
+  } else {
+    return RetCode::RETRY;
+  }
+
+  kcas::add(&parent->vNumMark, oParent.oVNumMark, oParent.oVNumMark + 2);
+
+  if (kcas::execute()) {
+    return RetCode::SUCCESS;
+  }
+
+  // recmgr->reclaim(newNode);
+
+  return RetCode::RETRY;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline V
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::erase(
+    const int tid, const K &key) {
+  ObservedNode oPred;
+  ObservedNode oParent;
+  ObservedNode oNode;
+  // auto guard = recmgr->getGuard(tid);
+
+  while (true) {
+    int res = 0;
+    while ((res = (search(tid, oPred, oParent, oNode, key))) ==
+           RetCode::RETRY) { /* keep trying until we get a result */
+    }
+
+    if (res == RetCode::FAILURE) {
+      return 0;
+    }
+
+    assert(res == RetCode::SUCCESS);
+    if ((res = internalErase(tid, oParent, oNode, key))) {
+      return (V)oNode.node->value;
+    }
+  }
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+int InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                 _maxKey>::internalErase(const int tid, ObservedNode &oParent,
+                                         ObservedNode &oNode, const K &key) {
+  Node<K, V, RecordManager> *parent = oParent.node;
+  Node<K, V, RecordManager> *node = oNode.node;
+
+  int numChildren = countChildren(tid, node);
+
+  kcas::start();
+
+  if (IS_MARKED(oParent.oVNumMark) || IS_MARKED(oNode.oVNumMark)) {
+    return RetCode::RETRY;
+  }
+
+  if (numChildren == 0) {
+    /* ERASE KCAS - NO CHILDREN (K = 4)
+     * node's mark:     false    ->  true
+     * parent's child pointer:  node     ->  NULL
+     * parent's version number: vNumber  ->  vNumber + 1
+     * node's version number: vNumber  ->  vNumber + 1
+     */
+
+    if (key > parent->key) {
+      kcas::add(&parent->right, node, (Node<K, V, RecordManager> *)NULL);
+    } else if (key < parent->key) {
+      kcas::add(&parent->left, node, (Node<K, V, RecordManager> *)NULL);
+    } else {
+      return RetCode::RETRY;
+    }
+
+    kcas::add(&parent->vNumMark, oParent.oVNumMark, oParent.oVNumMark + 2,
+              &node->vNumMark, oNode.oVNumMark, oNode.oVNumMark + 3);
+
+    if (kcas::execute()) {
+      assert(IS_MARKED(node->vNumMark));
+      // recmgr->reclaim(node);
+
+      return RetCode::SUCCESS;
+    }
+
+    return RetCode::RETRY;
+  } else if (numChildren == 1) {
+    /* ERASE KCAS - 1 CHILD (K = 6)
+     * reroute child's version number:  vNumber  ->  vNumber + 1
+     * reroute child's parent:      node    ->  parent
+     * node's mark:         false    ->  true
+     * parent's child pointer:      node     ->  reroute child
+     * parent's version number:     vNumber  ->  vNumber + 1
+     * node's version number:     vNumber  ->  vNumber + 1
+     */
+    Node<K, V, RecordManager> *left = node->left;
+    Node<K, V, RecordManager> *right = node->right;
+    Node<K, V, RecordManager> *reroute;
+
+    // determine which child will be the replacement
+    if (left != NULL) {
+      reroute = left;
+    } else if (right != NULL) {
+      reroute = right;
+    } else {
+      return RetCode::RETRY;
+    }
+
+    casword_t rerouteVNum = reroute->vNumMark;
+
+    if (IS_MARKED(rerouteVNum)) {
+      return RetCode::RETRY;
+    }
+
+    if (key > parent->key) {
+      kcas::add(&parent->right, node, reroute);
+    } else if (key < parent->key) {
+      kcas::add(&parent->left, node, reroute);
+    } else {
+      return RetCode::RETRY;
+    }
+
+    kcas::add(&reroute->vNumMark, rerouteVNum, rerouteVNum + 2, &node->vNumMark,
+              oNode.oVNumMark, oNode.oVNumMark + 3, &parent->vNumMark,
+              oParent.oVNumMark, oParent.oVNumMark + 2);
+
+    if (kcas::execute()) {
+      assert(IS_MARKED(node->vNumMark));
+      // recmgr->reclaim(node);
+
+      return RetCode::SUCCESS;
+    }
+
+    return RetCode::RETRY;
+  } else if (numChildren == 2) {
+    /* ERASE KCAS - 2 CHILD (K = 6 or 8)
+     * successor's right child version number*: vNumber  ->  vNumber + 1
+     * successor's right child parent*:   node -> parent
+     * node's key         key -> successor's key
+     * successor's parent's child pointer:    successor -> successor's
+     * right child successor's mark:        false -> true
+     * successor's version number:      vNumber -> vNumber + 1
+     * successor's parent's version number:   vNumber -> vNumber + 1
+     * node's version number:     vNumber -> vNumber + 1
+     * node's value:        value -> succ's value
+     */
+
+    ObservedNode oSucc;
+    ObservedNode oSuccParent;
+
+    // the (decendant) successor's key will be promoted
+    if (getSuccessor(tid, node, oSucc, oSuccParent, key) == RetCode::RETRY) {
+      return RetCode::RETRY;
+    }
+
+    if (oSucc.node == NULL) {
+      return RetCode::RETRY;
+    }
+
+    Node<K, V, RecordManager> *succ = oSucc.node;
+    Node<K, V, RecordManager> *succParent = oSuccParent.node;
+
+    if (oSuccParent.node == NULL) {
+      return RetCode::RETRY;
+    }
+
+    K succKey = succ->key;
+
+    assert(succKey <= maxKey);
+
+    if (IS_MARKED(oSuccParent.oVNumMark)) {
+      return RetCode::RETRY;
+    }
+
+    Node<K, V, RecordManager> *succRight = succ->right;
+
+    if (succRight != NULL) {
+      casword_t succRightVNum = succRight->vNumMark;
+
+      if (IS_MARKED(succRightVNum)) {
+        return RetCode::RETRY;
+      }
+
+      kcas::add(&succRight->vNumMark, succRightVNum, succRightVNum + 2);
+    }
+
+    if (succParent->right == succ) {
+      kcas::add(&succParent->right, succ, succRight);
+    } else if (succParent->left == succ) {
+      kcas::add(&succParent->left, succ, succRight);
+    } else {
+      return RetCode::RETRY;
+    }
+
+    V nodeVal = node->value;
+    V succVal = succ->value;
+
+    kcas::add(&node->value, nodeVal, succVal, &node->key, key, succKey,
+              &succ->vNumMark, oSucc.oVNumMark, oSucc.oVNumMark + 3,
+              &succParent->vNumMark, oSuccParent.oVNumMark,
+              oSuccParent.oVNumMark + 2);
+
+    if (succParent != node) {
+      kcas::add(&node->vNumMark, oNode.oVNumMark, oNode.oVNumMark + 2);
+    }
+
+    if (kcas::execute()) {
+      assert(IS_MARKED(succ->vNumMark));
+      // recmgr->reclaim(succ);
+      return RetCode::SUCCESS;
+    }
+
+    return RetCode::RETRY;
+  }
+  assert(false);
+  return RetCode::RETRY;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+inline int
+InternalKCAS<RecordManager, K, V, _numThreads, _minKey, _maxKey>::countChildren(
+    const int tid, Node<K, V, RecordManager> *node) {
+  return (node->left == NULL ? 0 : 1) + (node->right == NULL ? 0 : 1);
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+long InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::validateSubtree(Node<K, V, RecordManager> *node,
+                                            long smaller, long larger,
+                                            std::unordered_set<casword_t> &keys,
+                                            ofstream &graph, ofstream &log,
+                                            bool &errorFound) {
+
+  if (node == NULL)
+    return 0;
+  graph << "\"" << node << "\""
+        << "[label=\"K: " << node->key << "\"];\n";
+
+  if (IS_MARKED(node->vNumMark)) {
+    log << "MARKED NODE! " << node->key << "\n";
+    errorFound = true;
+  }
+  Node<K, V, RecordManager> *nodeLeft = node->left;
+  Node<K, V, RecordManager> *nodeRight = node->right;
+
+  if (nodeLeft != NULL) {
+    graph << "\"" << node << "\" -> \"" << nodeLeft << "\"";
+    if (node->key < nodeLeft->key) {
+      assert(false);
+      graph << "[color=red]";
+    } else {
+      graph << "[color=blue]";
+    }
+
+    graph << ";\n";
+  }
+
+  if (nodeRight != NULL) {
+    graph << "\"" << node << "\" -> \"" << nodeRight << "\"";
+    if (node->key > nodeRight->key) {
+      assert(false);
+      graph << "[color=red]";
+    } else {
+      graph << "[color=green]";
+    }
+    graph << ";\n";
+  }
+
+  if (!(keys.count(node->key) == 0)) {
+    log << "DUPLICATE KEY! " << node->key << "\n";
+    errorFound = true;
+  }
+
+  if ((node->key < smaller) || (node->key > larger)) {
+    log << "IMPROPER LOCAL TREE! " << node->key << "\n";
+    errorFound = true;
+  }
+
+  keys.insert(node->key);
+  long ret = 1 + max(validateSubtree(node->left, smaller, node->key, keys,
+                                     graph, log, errorFound),
+                     validateSubtree(node->right, node->key, larger, keys,
+                                     graph, log, errorFound));
+
+  return ret;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+bool InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::validate() {
+  std::unordered_set<casword_t> keys = {};
+  bool errorFound = false;
+
+  rename("graph.dot", "graph_before.dot");
+  ofstream graph;
+  graph.open("graph.dot");
+  graph << "digraph G {\n";
+
+  ofstream log;
+  log.open("log.txt", std::ofstream::out | std::ofstream::app);
+
+  auto t = std::time(nullptr);
+  auto tm = *std::localtime(&t);
+  log << "Run at: " << std::put_time(&tm, "%d-%m-%Y %H-%M-%S") << "\n";
+
+  // long ret = validateSubtree(root->left, minKey, maxKey, keys, graph, log,
+  // errorFound);
+  graph << "}";
+  graph.close();
+
+  if (!errorFound) {
+    log << "Validated Successfully!\n";
+  }
+
+  log.close();
+
+  return !errorFound;
+}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::printDebuggingDetails() {}
+
+template <class RecordManager, typename K, typename V, int _numThreads,
+          int _minKey, std::int64_t _maxKey>
+void InternalKCAS<RecordManager, K, V, _numThreads, _minKey,
+                  _maxKey>::freeSubtree(const int tid,
+                                        Node<K, V, RecordManager> *node) {
+  if (node == NULL)
+    return;
+  freeSubtree(tid, node->left);
+  freeSubtree(tid, node->right);
+  // recmgr->reclaim(tid,node);
+}
diff --git a/artifact/ds/baseline/int_bst_pathcas/kcas.h b/artifact/ds/baseline/int_bst_pathcas/kcas.h
new file mode 100644
index 0000000..81d19d2
--- /dev/null
+++ b/artifact/ds/baseline/int_bst_pathcas/kcas.h
@@ -0,0 +1,104 @@
+#pragma once
+#include <stdint.h>         // Include this to use uintptr_t
+#define casword_t uintptr_t // Move the definition of casword_t here
+#define CASWORD_BITS_TYPE casword_t
+
+#ifndef likely
+#define likely(x) __builtin_expect((x), 1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect((x), 0)
+#endif
+
+template <typename T> struct casword {
+public:
+  CASWORD_BITS_TYPE volatile bits;
+
+public:
+  casword();
+
+  T setInitVal(T other);
+
+  operator T();
+
+  T operator->();
+
+  T getValue();
+
+  casword_t getValueUnsafe(bool &isPtr);
+
+  void addToDescriptor(T oldVal, T newVal);
+};
+
+#if defined KCAS_LOCKFREE
+#include "kcas_reuse_impl.h"
+#elif defined KCAS_HTM
+#include "kcas_reuse_htm_impl.h"
+#elif defined KCAS_HTM_FULL
+#include "kcas_reuse_htm_full_impl.h"
+#elif defined KCAS_VALIDATE
+#include "kcas_validate.h"
+#elif defined KCAS_VALIDATE_HTM
+#include "kcas_validate_htm.h"
+#else
+#error must define one of KCAS_LOCKFREE KCAS_HTM KCAS_HTM_FULL
+#endif
+
+namespace kcas {
+#if defined KCAS_LOCKFREE
+KCASLockFree<MAX_KCAS> instance;
+#elif defined KCAS_HTM
+KCASHTM<MAX_KCAS> instance;
+#elif defined KCAS_HTM_FULL
+KCASHTM_FULL<MAX_KCAS> instance;
+#elif defined KCAS_VALIDATE
+KCASValidate<MAX_KCAS> instance;
+#elif defined KCAS_VALIDATE_HTM
+KCASValidateHTM<MAX_KCAS> instance;
+#endif
+
+void writeInitPtr(casword_t volatile *addr, casword_t const newval) {
+  return instance.writeInitPtr(addr, newval);
+}
+
+void writeInitVal(casword_t volatile *addr, casword_t const newval) {
+  return instance.writeInitVal(addr, newval);
+}
+
+casword_t readPtr(casword_t volatile *addr) { return instance.readPtr(addr); }
+
+casword_t readVal(casword_t volatile *addr) { return instance.readVal(addr); }
+
+bool execute() { return instance.execute(); }
+
+inline kcasptr_t getDescriptor() { return instance.getDescriptor(); }
+
+bool start() {
+  instance.start();
+  return true;
+}
+
+template <typename T> void add(casword<T> *caswordptr, T oldVal, T newVal) {
+  return instance.add(caswordptr, oldVal, newVal);
+}
+
+template <typename T, typename... Args>
+void add(casword<T> *caswordptr, T oldVal, T newVal, Args... args) {
+  instance.add(caswordptr, oldVal, newVal, args...);
+}
+
+#if defined KCAS_VALIDATE || defined KCAS_VALIDATE_HTM
+
+inline bool validate() { return instance.validate(); }
+
+inline bool validateAndExecute() { return instance.validateAndExecute(); }
+
+template <typename NodePtrType> inline casword_t visit(NodePtrType node) {
+  assert(node != NULL);
+  return instance.visit(node);
+}
+#endif
+
+}; // namespace kcas
+
+#include "casword.h"
diff --git a/artifact/ds/baseline/int_bst_pathcas/kcas_reuse_htm_impl.h b/artifact/ds/baseline/int_bst_pathcas/kcas_reuse_htm_impl.h
new file mode 100644
index 0000000..2b8bf10
--- /dev/null
+++ b/artifact/ds/baseline/int_bst_pathcas/kcas_reuse_htm_impl.h
@@ -0,0 +1,575 @@
+#pragma once
+
+#include <cassert>
+#include <cstring>
+#include <immintrin.h>
+#include <sstream>
+#include <stdint.h>
+
+using namespace std;
+
+/**
+ * Note: this algorithm supports a limited number of threads (print LAST_TID to
+ * see how many). It should be several thousand, at least. The alg can be
+ * tweaked to support more.
+ */
+
+#define BOOL_CAS __sync_bool_compare_and_swap
+#define VAL_CAS __sync_val_compare_and_swap
+
+/**
+ *
+ * Descriptor reuse macros
+ *
+ */
+
+/**
+ * seqbits_t corresponds to the seqBits field of the descriptor.
+ * it contains the mutable fields of the descriptor and a sequence number.
+ * the width, offset and mask for the sequence number is defined below.
+ * this sequence number width, offset and mask are also shared by tagptr_t.
+ *
+ * in particular, for any tagptr_t x and seqbits_t y, the sequence numbers
+ * in x and y are equal iff x&MASK_SEQ == y&MASK_SEQ (despite differing types).
+ *
+ * tagptr_t consists of a triple <seq, tid, testbit>.
+ * these three fields are defined by the TAGPTR_ macros below.
+ */
+
+typedef intptr_t tagptr_t;
+typedef intptr_t seqbits_t;
+#include <thread>
+
+#ifndef WIDTH_SEQ
+#define WIDTH_SEQ 48
+#endif
+#define OFFSET_SEQ 14
+#define MASK_SEQ                                                               \
+  ((uintptr_t)((1LL << WIDTH_SEQ) - 1)                                         \
+   << OFFSET_SEQ) /* cast to avoid signed bit shifting */
+#define UNPACK_SEQ(tagptrOrSeqbits)                                            \
+  (((uintptr_t)(tagptrOrSeqbits)) >> OFFSET_SEQ)
+
+#define TAGPTR_OFFSET_USER 0
+#define TAGPTR_OFFSET_TID 3
+#define TAGPTR_MASK_USER                                                       \
+  ((1 << TAGPTR_OFFSET_TID) - 1) /* assumes TID is next field after USER */
+#define TAGPTR_MASK_TID (((1 << OFFSET_SEQ) - 1) & (~(TAGPTR_MASK_USER)))
+#define TAGPTR_UNPACK_TID(tagptr)                                              \
+  ((int)((((tagptr_t)(tagptr)) & TAGPTR_MASK_TID) >> TAGPTR_OFFSET_TID))
+#define TAGPTR_UNPACK_PTR(descArray, tagptr)                                   \
+  (&(descArray)[TAGPTR_UNPACK_TID((tagptr))])
+#define TAGPTR_NEW(tid, seqBits, userBits)                                     \
+  ((tagptr_t)(((UNPACK_SEQ(seqBits)) << OFFSET_SEQ) |                          \
+              ((tid) << TAGPTR_OFFSET_TID) |                                   \
+              (tagptr_t)(userBits) << TAGPTR_OFFSET_USER))
+// assert: there is no thread with tid DUMMY_TID that ever calls TAGPTR_NEW
+#define LAST_TID (TAGPTR_MASK_TID >> TAGPTR_OFFSET_TID)
+#define TAGPTR_STATIC_DESC(id) ((tagptr_t)TAGPTR_NEW(LAST_TID - 1 - id, 0))
+#define TAGPTR_DUMMY_DESC(id) ((tagptr_t)TAGPTR_NEW(LAST_TID, id << OFFSET_SEQ))
+
+#define comma ,
+
+#define SEQBITS_UNPACK_FIELD(seqBits, mask, offset)                            \
+  ((((seqbits_t)(seqBits)) & (mask)) >> (offset))
+// TODO: make more efficient version "SEQBITS_CAS_BIT"
+// TODO: change sequence # unpacking to masking for quick comparison
+// note: if there is only one subfield besides seq#, then the third if-block is
+// redundant, and you should just return false if the cas fails, since the only
+// way the cas fails and the field being cas'd contains still old is if the
+// sequence number has changed.
+#define SEQBITS_CAS_FIELD(successBit, fldSeqBits, snapSeqBits, oldval, val,    \
+                          mask, offset)                                        \
+  {                                                                            \
+    seqbits_t __v = (fldSeqBits);                                              \
+    while (1) {                                                                \
+      if (UNPACK_SEQ(__v) != UNPACK_SEQ((snapSeqBits))) {                      \
+        (successBit) = false;                                                  \
+        break;                                                                 \
+      }                                                                        \
+      if (((successBit) = __sync_bool_compare_and_swap(                        \
+               &(fldSeqBits), (__v & ~(mask)) | (oldval),                      \
+               (__v & ~(mask)) | ((val) << (offset))))) {                      \
+        break;                                                                 \
+      }                                                                        \
+      __v = (fldSeqBits);                                                      \
+      if (SEQBITS_UNPACK_FIELD(__v, (mask), (offset)) != (oldval)) {           \
+        (successBit) = false;                                                  \
+        break;                                                                 \
+      }                                                                        \
+    }                                                                          \
+  }
+// TODO: change sequence # unpacking to masking for quick comparison
+// note: SEQBITS_FAA_FIELD would be very similar to SEQBITS_CAS_FIELD; i think
+// one would simply delete the last if block and change the new val from
+// (val)<<offset to (val&mask)+1.
+#define SEQBITS_WRITE_FIELD(fldSeqBits, snapSeqBits, val, mask, offset)        \
+  {                                                                            \
+    seqbits_t __v = (fldSeqBits);                                              \
+    while (UNPACK_SEQ(__v) == UNPACK_SEQ((snapSeqBits)) &&                     \
+           SEQBITS_UNPACK_FIELD(__v, (mask), (offset)) != (val) &&             \
+           !__sync_bool_compare_and_swap(                                      \
+               &(fldSeqBits), __v, (__v & ~(mask)) | ((val) << (offset)))) {   \
+      __v = (fldSeqBits);                                                      \
+    }                                                                          \
+  }
+#define SEQBITS_WRITE_BIT(fldSeqBits, snapSeqBits, mask)                       \
+  {                                                                            \
+    seqbits_t __v = (fldSeqBits);                                              \
+    while (                                                                    \
+        UNPACK_SEQ(__v) == UNPACK_SEQ((snapSeqBits)) && !(__v & (mask)) &&     \
+        !__sync_bool_compare_and_swap(&(fldSeqBits), __v, (__v | (mask)))) {   \
+      __v = (fldSeqBits);                                                      \
+    }                                                                          \
+  }
+
+// WARNING: uses a GCC extension "({ })". to get rid of this, use an inline
+// function.
+#define DESC_SNAPSHOT(descType, descArray, descDest, tagptr, sz)               \
+  ({                                                                           \
+    descType *__src = TAGPTR_UNPACK_PTR((descArray), (tagptr));                \
+    memcpy((descDest), __src, (sz));                                           \
+    __asm__ __volatile__(                                                      \
+        "" ::                                                                  \
+            : "memory"); /* prevent compiler from reordering read of           \
+                            __src->seqBits before (at least the reading        \
+                            portion of) the memcpy */                          \
+    (UNPACK_SEQ(__src->seqBits) == UNPACK_SEQ((tagptr)));                      \
+  })
+#define DESC_READ_FIELD(successBit, fldSeqBits, tagptr, mask, offset)          \
+  ({                                                                           \
+    seqbits_t __seqBits = (fldSeqBits);                                        \
+    successBit = (__seqBits & MASK_SEQ) == ((tagptr)&MASK_SEQ);                \
+    SEQBITS_UNPACK_FIELD(__seqBits, (mask), (offset));                         \
+  })
+#define DESC_NEW(descArray, macro_seqBitsNew, tid)                             \
+  &(descArray)[(tid)];                                                         \
+  { /* note: only the process invoking this following macro can change the     \
+       sequence# */                                                            \
+    seqbits_t __v = (descArray)[(tid)].seqBits;                                \
+    (descArray)[(tid)].seqBits = macro_seqBitsNew(__v);                        \
+    /*__sync_synchronize();*/                                                  \
+  }
+#define DESC_INITIALIZED(descArray, tid)                                       \
+  (descArray)[(tid)].seqBits = (descArray)[(tid)].seqBits + (1 << OFFSET_SEQ);
+
+#define DESC_INIT_ALL(descArray, macro_seqBitsNew)                             \
+  {                                                                            \
+    for (int i = 0; i < (LAST_TID - 1); ++i) {                                 \
+      (descArray)[i].seqBits = macro_seqBitsNew(0);                            \
+    }                                                                          \
+  }
+
+/**
+ *
+ * KCAS implementation
+ *
+ */
+
+#define kcastagptr_t uintptr_t
+#define rdcsstagptr_t uintptr_t
+#define rdcssptr_t rdcssdesc_t *
+#define kcasptr_t kcasdesc_t<MAX_KCAS> *
+#define RDCSS_TAGBIT 0x1
+#define KCAS_TAGBIT 0x2
+
+#define KCAS_STATE_UNDECIDED 0
+#define KCAS_STATE_SUCCEEDED 4
+#define KCAS_STATE_FAILED 8
+
+#define KCAS_LEFTSHIFT 2
+#define HTM_READ_DESCRIPTOR 20
+#define HTM_BAD_OLD_VAL 30
+#define MAX_RETRIES 5
+
+#define KCAS_MAX_THREADS 500
+
+void *volatile thread_ids[KCAS_MAX_THREADS] = {};
+
+class TIDGenerator {
+public:
+  int myslot = -1;
+
+  TIDGenerator() {
+    int i;
+    while (true) {
+      i = 0;
+      while (thread_ids[i]) {
+        ++i;
+      }
+
+      assert(i < KCAS_MAX_THREADS);
+      if (__sync_bool_compare_and_swap(&thread_ids[i], 0, this)) {
+        myslot = i;
+        break;
+      }
+    }
+  }
+
+  ~TIDGenerator() { thread_ids[myslot] = 0; }
+
+  operator int() { return myslot; }
+
+  int getId() { return myslot; }
+
+  void explicitRelease() { thread_ids[myslot] = 0; }
+};
+
+thread_local TIDGenerator kcas_tid;
+
+struct rdcssdesc_t {
+  volatile seqbits_t seqBits;
+  casword_t volatile *addr1;
+  casword_t old1;
+  casword_t volatile *addr2;
+  casword_t old2;
+  casword_t new2;
+  const static int size = sizeof(seqBits) + sizeof(addr1) + sizeof(old1) +
+                          sizeof(addr2) + sizeof(old2) + sizeof(new2);
+  volatile char padding[128 + ((64 - size % 64) %
+                               64)]; // add padding to prevent false sharing
+};
+
+struct kcasentry_t { // just part of kcasdesc_t, not a standalone descriptor
+  casword_t volatile *addr;
+  casword_t oldval;
+  casword_t newval;
+};
+
+template <int MAX_K> class kcasdesc_t {
+public:
+  volatile seqbits_t seqBits;
+  casword_t numEntries;
+  kcasentry_t entries[MAX_K];
+  const static int size =
+      sizeof(seqBits) + sizeof(numEntries) + sizeof(entries);
+  volatile char padding[128 + ((64 - size % 64) %
+                               64)]; // add padding to prevent false sharing
+
+  inline void addValAddr(casword_t volatile *addr, casword_t oldval,
+                         casword_t newval) {
+    entries[numEntries].addr = addr;
+    entries[numEntries].oldval = oldval << KCAS_LEFTSHIFT;
+    entries[numEntries].newval = newval << KCAS_LEFTSHIFT;
+    ++numEntries;
+    assert(numEntries <= MAX_K);
+  }
+
+  inline void addPtrAddr(casword_t volatile *addr, casword_t oldval,
+                         casword_t newval) {
+    entries[numEntries].addr = addr;
+    entries[numEntries].oldval = oldval;
+    entries[numEntries].newval = newval;
+    ++numEntries;
+    assert(numEntries <= MAX_K);
+  }
+};
+
+inline static bool isRdcss(casword_t val) { return (val & RDCSS_TAGBIT); }
+
+inline static bool isKcas(casword_t val) { return (val & KCAS_TAGBIT); }
+
+template <int MAX_K> class KCASHTM {
+public:
+  /**
+   * Data definitions
+   */
+private:
+  // descriptor reduction algorithm
+#define KCAS_SEQBITS_OFFSET_STATE 0
+#define KCAS_SEQBITS_MASK_STATE 0xf
+#define KCAS_SEQBITS_NEW(seqBits)                                              \
+  ((((seqBits)&MASK_SEQ) + (1 << OFFSET_SEQ)) |                                \
+   (KCAS_STATE_UNDECIDED << KCAS_SEQBITS_OFFSET_STATE))
+#define RDCSS_SEQBITS_NEW(seqBits) (((seqBits)&MASK_SEQ) + (1 << OFFSET_SEQ))
+  volatile char __padding_desc[128];
+  kcasdesc_t<MAX_K> kcasDescriptors[LAST_TID + 1] __attribute__((aligned(64)));
+  rdcssdesc_t rdcssDescriptors[LAST_TID + 1] __attribute__((aligned(64)));
+  volatile char __padding_desc3[128];
+
+  /**
+   * Function declarations
+   */
+public:
+  KCASHTM();
+  void writeInitPtr(casword_t volatile *addr, casword_t const newval);
+  void writeInitVal(casword_t volatile *addr, casword_t const newval);
+  casword_t readPtr(casword_t volatile *addr);
+  casword_t readVal(casword_t volatile *addr);
+  bool execute();
+
+  kcasptr_t getDescriptor();
+  bool start();
+  casword_t rdcssRead(casword_t volatile *addr);
+  void helpOther(kcastagptr_t tagptr);
+  void deinitThread();
+  template <typename T> void add(casword<T> *caswordptr, T oldVal, T newVal);
+  template <typename T, typename... Args>
+  void add(casword<T> *caswordptr, T oldVal, T newVal, Args... args);
+
+private:
+  casword_t rdcss(rdcssptr_t ptr, rdcsstagptr_t tagptr);
+  bool help(kcastagptr_t tagptr, kcasptr_t ptr, bool helpingOther);
+  void rdcssHelp(rdcsstagptr_t tagptr, rdcssptr_t snapshot, bool helpingOther);
+  void rdcssHelpOther(rdcsstagptr_t tagptr);
+};
+
+template <int MAX_K>
+void KCASHTM<MAX_K>::rdcssHelp(rdcsstagptr_t tagptr, rdcssptr_t snapshot,
+                               bool helpingOther) {
+  bool readSuccess;
+  casword_t v =
+      DESC_READ_FIELD(readSuccess, *snapshot->addr1, snapshot->old1,
+                      KCAS_SEQBITS_MASK_STATE, KCAS_SEQBITS_OFFSET_STATE);
+  if (!readSuccess)
+    v = KCAS_STATE_SUCCEEDED; // return;
+
+  if (v == KCAS_STATE_UNDECIDED) {
+    BOOL_CAS(snapshot->addr2, (casword_t)tagptr, snapshot->new2);
+  } else {
+    // the "fuck it i'm done" action (the same action you'd take if the kcas
+    // descriptor hung around indefinitely)
+    BOOL_CAS(snapshot->addr2, (casword_t)tagptr, snapshot->old2);
+  }
+}
+
+template <int MAX_K> void KCASHTM<MAX_K>::rdcssHelpOther(rdcsstagptr_t tagptr) {
+  rdcssdesc_t newSnapshot;
+  const int sz = rdcssdesc_t::size;
+  if (DESC_SNAPSHOT(rdcssdesc_t, rdcssDescriptors, &newSnapshot, tagptr, sz)) {
+    rdcssHelp(tagptr, &newSnapshot, true);
+  }
+}
+
+template <int MAX_K>
+casword_t KCASHTM<MAX_K>::rdcss(rdcssptr_t ptr, rdcsstagptr_t tagptr) {
+  casword_t r;
+  do {
+    r = VAL_CAS(ptr->addr2, ptr->old2, (casword_t)tagptr);
+    if (unlikely(isRdcss(r))) {
+      rdcssHelpOther((rdcsstagptr_t)r);
+    } else
+      break;
+  } while (true);
+  if (r == ptr->old2)
+    rdcssHelp(tagptr, ptr, false); // finish our own operation
+  return r;
+}
+
+template <int MAX_K>
+casword_t KCASHTM<MAX_K>::rdcssRead(casword_t volatile *addr) {
+  casword_t r;
+  do {
+    r = *addr;
+    if (unlikely(isRdcss(r))) {
+      rdcssHelpOther((rdcsstagptr_t)r);
+    } else
+      break;
+  } while (true);
+  return r;
+}
+
+template <int MAX_K> KCASHTM<MAX_K>::KCASHTM() {
+  DESC_INIT_ALL(kcasDescriptors, KCAS_SEQBITS_NEW);
+  DESC_INIT_ALL(rdcssDescriptors, RDCSS_SEQBITS_NEW);
+}
+
+template <int MAX_K> void KCASHTM<MAX_K>::helpOther(kcastagptr_t tagptr) {
+  kcasdesc_t<MAX_K> newSnapshot;
+  const int sz = kcasdesc_t<MAX_K>::size;
+  // cout<<"size of kcas descriptor is "<<sizeof(kcasdesc_t<MAX_K>)<<" and
+  // sz="<<sz<<endl;
+  if (DESC_SNAPSHOT(kcasdesc_t<MAX_K>, kcasDescriptors, &newSnapshot, tagptr,
+                    sz)) {
+    help(tagptr, &newSnapshot, true);
+  }
+}
+
+template <int MAX_K>
+bool KCASHTM<MAX_K>::help(kcastagptr_t tagptr, kcasptr_t snapshot,
+                          bool helpingOther) {
+  // phase 1: "locking" addresses for this kcas
+  int newstate;
+
+  // read state field
+  kcasptr_t ptr = TAGPTR_UNPACK_PTR(kcasDescriptors, tagptr);
+  bool successBit;
+  int state =
+      DESC_READ_FIELD(successBit, ptr->seqBits, tagptr, KCAS_SEQBITS_MASK_STATE,
+                      KCAS_SEQBITS_OFFSET_STATE);
+  if (!successBit) {
+    assert(helpingOther);
+    return false;
+  }
+
+  if (state == KCAS_STATE_UNDECIDED) {
+    newstate = KCAS_STATE_SUCCEEDED;
+    for (int i = helpingOther; i < snapshot->numEntries; i++) {
+    retry_entry:
+      // prepare rdcss descriptor and run rdcss
+      rdcssdesc_t *rdcssptr =
+          DESC_NEW(rdcssDescriptors, RDCSS_SEQBITS_NEW, kcas_tid.getId());
+      rdcssptr->addr1 = (casword_t *)&ptr->seqBits;
+      rdcssptr->old1 = tagptr; // pass the sequence number (as part of tagptr)
+      rdcssptr->old2 = snapshot->entries[i].oldval;
+      rdcssptr->addr2 = snapshot->entries[i].addr; // p stopped here (step 2)
+      rdcssptr->new2 = (casword_t)tagptr;
+      DESC_INITIALIZED(rdcssDescriptors, kcas_tid.getId());
+
+      casword_t val;
+      val = rdcss(rdcssptr, TAGPTR_NEW(kcas_tid.getId(), rdcssptr->seqBits,
+                                       RDCSS_TAGBIT));
+
+      // check for failure of rdcss and handle it
+      if (isKcas(val)) {
+        // if rdcss failed because of a /different/ kcas, we help it
+        if (val != (casword_t)tagptr) {
+          helpOther((kcastagptr_t)val);
+          goto retry_entry;
+        }
+      } else {
+        if (val != snapshot->entries[i].oldval) {
+          newstate = KCAS_STATE_FAILED;
+          break;
+        }
+      }
+    }
+    SEQBITS_CAS_FIELD(successBit, ptr->seqBits, snapshot->seqBits,
+                      KCAS_STATE_UNDECIDED, newstate, KCAS_SEQBITS_MASK_STATE,
+                      KCAS_SEQBITS_OFFSET_STATE);
+  }
+  // phase 2 (all addresses are now "locked" for this kcas)
+  state = DESC_READ_FIELD(successBit, ptr->seqBits, tagptr,
+                          KCAS_SEQBITS_MASK_STATE, KCAS_SEQBITS_OFFSET_STATE);
+  if (!successBit)
+    return false;
+
+  bool succeeded = (state == KCAS_STATE_SUCCEEDED);
+
+  for (int i = 0; i < snapshot->numEntries; i++) {
+    casword_t newval =
+        succeeded ? snapshot->entries[i].newval : snapshot->entries[i].oldval;
+    BOOL_CAS(snapshot->entries[i].addr, (casword_t)tagptr, newval);
+  }
+
+  return succeeded;
+}
+
+// TODO: replace crappy bubblesort with something fast for large MAX_K (maybe
+// even use insertion sort for small MAX_K)
+template <int MAX_K> static void kcasdesc_sort(kcasptr_t ptr) {
+  kcasentry_t temp;
+  bool swapped = false;
+  for (int i = 0; i < ptr->numEntries; i++) {
+    for (int j = 0; j < ptr->numEntries - i - 1; j++) {
+      if (ptr->entries[j].addr > ptr->entries[j + 1].addr) {
+        temp = ptr->entries[j];
+        ptr->entries[j] = ptr->entries[j + 1];
+        ptr->entries[j + 1] = temp;
+        swapped = true;
+      }
+    }
+    if (!swapped)
+      break;
+  }
+}
+
+template <int MAX_K> bool KCASHTM<MAX_K>::execute() {
+  assert(kcas_tid.getId() != -1);
+
+  auto desc = &kcasDescriptors[kcas_tid.getId()];
+  // sort entries in the kcas descriptor to guarantee progress
+
+  DESC_INITIALIZED(kcasDescriptors, kcas_tid.getId());
+  kcastagptr_t tagptr =
+      TAGPTR_NEW(kcas_tid.getId(), desc->seqBits, KCAS_TAGBIT);
+
+  for (int i = 0; i < MAX_RETRIES; i++) {
+    int status;
+    if ((status = _xbegin()) == _XBEGIN_STARTED) {
+      for (int j = 0; j < desc->numEntries; j++) {
+        casword_t val = *desc->entries[j].addr;
+        if (val != desc->entries[j].oldval) {
+          if (isKcas(val))
+            _xabort(HTM_READ_DESCRIPTOR);
+          _xabort(HTM_BAD_OLD_VAL);
+        }
+      }
+      for (int j = 0; j < desc->numEntries; j++) {
+        *desc->entries[j].addr = desc->entries[j].newval;
+      }
+      _xend();
+      return true;
+    } else {
+      if (_XABORT_EXPLICIT & status) {
+        if (_XABORT_CODE(status) == HTM_READ_DESCRIPTOR) {
+          break;
+        } else if (_XABORT_CODE(status) == HTM_BAD_OLD_VAL) {
+          return false;
+        }
+      }
+    }
+  }
+
+  kcasdesc_sort<MAX_K>(desc);
+  return help(tagptr, desc, false);
+}
+
+template <int MAX_K>
+inline casword_t KCASHTM<MAX_K>::readPtr(casword_t volatile *addr) {
+  casword_t r;
+  do {
+    r = rdcssRead(addr);
+    if (unlikely(isKcas(r))) {
+      helpOther((kcastagptr_t)r);
+    } else
+      break;
+  } while (true);
+  return r;
+}
+
+template <int MAX_K>
+inline casword_t KCASHTM<MAX_K>::readVal(casword_t volatile *addr) {
+  return ((casword_t)readPtr(addr)) >> KCAS_LEFTSHIFT;
+}
+
+template <int MAX_K>
+inline void KCASHTM<MAX_K>::writeInitPtr(casword_t volatile *addr,
+                                         casword_t const newval) {
+  *addr = newval;
+}
+
+template <int MAX_K>
+inline void KCASHTM<MAX_K>::writeInitVal(casword_t volatile *addr,
+                                         casword_t const newval) {
+  writeInitPtr(addr, newval << KCAS_LEFTSHIFT);
+}
+
+template <int MAX_K> bool KCASHTM<MAX_K>::start() {
+  // allocate a new kcas descriptor
+  kcasptr_t ptr = DESC_NEW(kcasDescriptors, KCAS_SEQBITS_NEW, kcas_tid.getId());
+  ptr->numEntries = 0;
+  return true;
+}
+
+template <int MAX_K> inline kcasptr_t KCASHTM<MAX_K>::getDescriptor() {
+  return &kcasDescriptors[kcas_tid.getId()];
+}
+
+template <int MAX_K> void KCASHTM<MAX_K>::deinitThread() {
+  kcas_tid.explicitRelease();
+}
+
+template <int MAX_K>
+template <typename T>
+inline void KCASHTM<MAX_K>::add(casword<T> *caswordptr, T oldVal, T newVal) {
+  caswordptr->addToDescriptor(oldVal, newVal);
+}
+
+template <int MAX_K>
+template <typename T, typename... Args>
+void KCASHTM<MAX_K>::add(casword<T> *caswordptr, T oldVal, T newVal,
+                         Args... args) {
+  caswordptr->addToDescriptor(oldVal, newVal);
+  add(args...);
+}
diff --git a/artifact/ds/baseline/lazylist_omap.h b/artifact/ds/baseline/lazylist_omap.h
new file mode 100644
index 0000000..371b17c
--- /dev/null
+++ b/artifact/ds/baseline/lazylist_omap.h
@@ -0,0 +1,158 @@
+#pragma once
+
+#include <atomic>
+
+/// An ordered map, implemented as a singly-linked list, using the lazy list
+/// algorithm. This map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param DESCRIPTOR A thread descriptor type, for safe memory reclamation
+template <typename K, typename V, class DESCRIPTOR, K DUMMY_KEY, V DUMMY_VAL>
+class lazylist_omap {
+  /// A simple test-and-test-and-set lock
+  ///
+  /// NB: The original code used a pthread_spinlock_t, which was causing
+  ///     excessive latency.  This implementation is more faster, because it
+  ///     uses XCHG without a function call.
+  struct lock_t {
+    std::atomic<bool> lock_ = {false}; // An atomic bool to serve as the lock
+
+    /// Acquire the lock
+    void acquire() {
+      while (true) {
+        if (!lock_.exchange(true))
+          break;
+        while (lock_.load()) {
+        }
+      }
+    }
+
+    /// Release the lock
+    void release() { lock_.store(false); }
+  };
+
+  struct node_t : DESCRIPTOR::reclaimable_t {
+    const K key;
+    V val;
+    std::atomic<node_t *> next;
+    lock_t lock;
+    node_t(K _key, V _val) : key(_key), val(_val), next(nullptr) {}
+    ~node_t() {}
+  };
+
+public:
+  node_t *head; // A pointer to the list head sentinel
+  node_t *tail; // A pointer to the list tail sentinel
+
+  /// Default construct a list by constructing and connecting two sentinel nodes
+  lazylist_omap(DESCRIPTOR *me, auto *cfg) {
+    head = new node_t(DUMMY_KEY, DUMMY_VAL);
+    tail = new node_t(DUMMY_KEY, DUMMY_VAL);
+    head->next = tail;
+  }
+
+private:
+  bool is_marked_ref(uintptr_t ptr) { return ptr & 1; }
+
+  uintptr_t unset_mark(uintptr_t ptr) { return ptr & (UINTPTR_MAX - 1); }
+
+  uintptr_t set_mark(uintptr_t ptr) { return ptr | 1; }
+
+  inline node_t *get_unmarked_ref(node_t *ptr) {
+    return (node_t *)unset_mark((uintptr_t)ptr);
+  }
+
+  inline node_t *get_marked_ref(node_t *ptr) {
+    return (node_t *)set_mark((uintptr_t)ptr);
+  }
+
+  /*
+   * Checking that both curr and pred are both unmarked and that pred's next
+   * pointer points to curr to verify that the entries are adjacent and present
+   * in the list.
+   */
+  inline int parse_validate(node_t *pred, node_t *curr) {
+    return (!is_marked_ref((uintptr_t)pred->next.load()) &&
+            !is_marked_ref((uintptr_t)curr->next.load()) &&
+            (pred->next == curr));
+  }
+
+public:
+  bool get(DESCRIPTOR *me, const K &key, V &val) {
+    node_t *curr = head;
+    while (curr->key < key && curr != tail)
+      curr = get_unmarked_ref(curr->next);
+    V v = curr->val;
+    auto res = ((curr != head) && (curr->key == key) &&
+                !is_marked_ref((uintptr_t)curr->next.load()));
+    if (res)
+      val = v;
+    return res;
+  }
+
+  // a failed validate is counted as an abort
+  bool insert(DESCRIPTOR *me, const K &key, const V &val) {
+    node_t *curr, *pred;
+    int result, validated, notVal;
+    while (true) {
+      pred = head;
+      curr = get_unmarked_ref(pred->next);
+      while (curr->key < key && curr != tail) {
+        pred = curr;
+        curr = get_unmarked_ref(curr->next);
+      }
+      pred->lock.acquire();
+      curr->lock.acquire();
+      validated = parse_validate(pred, curr);
+      notVal = (curr->key != key || curr == tail);
+      result = (validated && notVal);
+      if (result) {
+        node_t *newnode = new node_t(key, val);
+        newnode->next = curr;
+        pred->next = newnode;
+      }
+      curr->lock.release();
+      pred->lock.release();
+      if (validated)
+        return result;
+    }
+  }
+
+  /*
+   * Logically remove an element by setting a mark bit to 1
+   * before removing it physically.
+   *
+   * NB. it is not safe to free the element after physical deletion as a
+   * pre-empted find operation may currently be parsing the element.
+   * TODO: must implement a stop-the-world garbage collector to correctly
+   * free the memory.
+   */
+  // a failed validate is counted as an abort
+  bool remove(DESCRIPTOR *me, const K &key) {
+    node_t *pred, *curr;
+    int result, validated, isVal;
+    while (1) {
+      pred = head;
+      curr = get_unmarked_ref(pred->next);
+      while (curr->key < key && curr != tail) {
+        pred = curr;
+        curr = get_unmarked_ref(curr->next);
+      }
+      pred->lock.acquire();
+      curr->lock.acquire();
+      validated = parse_validate(pred, curr);
+      isVal = key == curr->key && curr != tail;
+      result = validated && isVal;
+      if (result) {
+        curr->next = get_marked_ref(curr->next);
+        pred->next = get_unmarked_ref(curr->next);
+        me->reclaim(curr);
+      }
+      curr->lock.release();
+      pred->lock.release();
+      if (validated)
+        return result;
+    }
+  }
+};
diff --git a/artifact/ds/baseline/lfskiplist_omap.h b/artifact/ds/baseline/lfskiplist_omap.h
new file mode 100644
index 0000000..862232a
--- /dev/null
+++ b/artifact/ds/baseline/lfskiplist_omap.h
@@ -0,0 +1,561 @@
+/******************************************************************************
+ * Skip lists, allowing concurrent update by use of CAS primitives.
+ *
+ * Copyright (c) 2001-2003, K A Fraser
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <atomic>
+#include <functional>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+/*
+ * A really simple random-number generator. Crappy linear congruential
+ * taken from glibc, but has at least a 2^32 period.
+ */
+
+#define rand_next(_ptst) ((_ptst)->rand = ((_ptst)->rand * 1103515245) + 12345)
+
+struct ptst_t {
+  unsigned long rand;
+};
+
+extern pthread_key_t ptst_key;
+
+typedef unsigned long long tick_t;
+#define RDTICK()                                                               \
+  ({                                                                           \
+    tick_t __t;                                                                \
+    __asm__ __volatile__("rdtsc" : "=A"(__t));                                 \
+    __t;                                                                       \
+  })
+
+/*
+ * POINTER MARKING
+ */
+
+#define get_marked_ref(_p) ((void *)(((unsigned long)(_p)) | 1))
+#define get_unmarked_ref(_p) ((void *)(((unsigned long)(_p)) & ~1))
+#define is_marked_ref(_p) (((unsigned long)(_p)) & 1)
+
+/* Number of unique block sizes we can deal with. */
+#define MAX_SIZES 20
+
+typedef unsigned long setkey_t;
+typedef void *setval_t;
+
+/*************************************
+ * INTERNAL DEFINITIONS
+ */
+
+/* Fine for 2^NUM_LEVELS nodes. */
+#define NUM_LEVELS 20
+
+/* Internal key values with special meanings. */
+#define SENTINEL_KEYMIN (1UL)  /* Key value of first dummy node. */
+#define SENTINEL_KEYMAX (~0UL) /* Key value of last dummy node.  */
+
+/*
+ * Used internally by set access functions, so that callers can use
+ * key values 0 and 1, without knowing these have special meanings.
+ */
+#define CALLER_TO_INTERNAL_KEY(_k) ((_k) + 2)
+
+pthread_key_t ptst_key;
+
+/*
+ * Enter/leave a critical region. A thread gets a state handle for
+ * use during critical regions.
+ */
+void critical_exit(ptst_t *) {}
+
+ptst_t *critical_enter(void) {
+  ptst_t *ptst;
+
+  ptst = (ptst_t *)pthread_getspecific(ptst_key);
+  if (ptst == NULL) {
+
+    ptst = (ptst_t *)malloc(sizeof(*ptst));
+    if (ptst == NULL)
+      exit(1);
+    memset(ptst, 0, sizeof(*ptst));
+    // ptst->gc = gc_init();
+    ptst->rand = RDTICK();
+
+    pthread_setspecific(ptst_key, ptst);
+  }
+
+  // gc_enter(ptst);
+  return (ptst);
+}
+
+static void ptst_destructor(ptst_t *ptst) {}
+
+void _init_ptst_subsystem(void) {
+  if (pthread_key_create(&ptst_key, (void (*)(void *))ptst_destructor)) {
+    exit(1);
+  }
+}
+
+/*
+ * SKIP LIST
+ */
+
+struct node_t {
+  std::atomic<int> level;
+#define LEVEL_MASK 0x0ff
+#define READY_FOR_FREE 0x100
+  setkey_t k;
+  std::atomic<setval_t> v;
+  std::atomic<node_t *> next[1];
+};
+
+struct set_t {
+  node_t head;
+};
+
+/*
+ * PRIVATE FUNCTIONS
+ */
+
+/*
+ * Random level generator. Drop-off rate is 0.5 per level.
+ * Returns value 1 <= level <= NUM_LEVELS.
+ */
+static int get_level(ptst_t *ptst) {
+  unsigned long r = rand_next(ptst);
+  int l = 1;
+  r = (r >> 4) & ((1 << (NUM_LEVELS - 1)) - 1);
+  while ((r & 1)) {
+    l++;
+    r >>= 1;
+  }
+  return (l);
+}
+
+/*
+ * Allocate a new node, and initialise its @level field.
+ * NB. Initialisation will eventually be pushed into garbage collector,
+ * because of dependent read reordering.
+ */
+static node_t *alloc_node(ptst_t *ptst) {
+  int l;
+  node_t *n;
+  l = get_level(ptst);
+  // NB: Removed call to gc_alloc, because everything should use the same
+  //     allocator
+  // n = (node_t *)gc_alloc(ptst, gc_id[l - 1]);
+  n = (node_t *)malloc(sizeof(*n) + l * sizeof(node_t *));
+  n->level.store(l, std::memory_order_relaxed);
+  return (n);
+}
+
+/* Free a node to the garbage collector. */
+static void free_node(ptst_t *ptst, node_t *n) {
+  // NB: For now, just leak it...
+  // gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]);
+}
+
+/*
+ * Search for first non-deleted node, N, with key >= @k at each level in @l.
+ * RETURN VALUES:
+ *  Array @pa: @pa[i] is non-deleted predecessor of N at level i
+ *  Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
+ *  MAIN RETURN VALUE: same as @na[0].
+ */
+static node_t *strong_search_predecessors(set_t *l, setkey_t k, node_t **pa,
+                                          node_t **na) {
+  node_t *x, *x_next, *old_x_next, *y, *y_next;
+  setkey_t y_k;
+  int i;
+
+retry:
+  // RMB(); // NB: This fence was unnecessary
+
+  x = &l->head;
+  for (i = NUM_LEVELS - 1; i >= 0; i--) {
+    /* We start our search at previous level's unmarked predecessor. */
+    x_next = x->next[i].load();
+    /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
+    if (is_marked_ref(x_next))
+      goto retry;
+
+    for (y = x_next;; y = y_next) {
+      /* Shift over a sequence of marked nodes. */
+      for (;;) {
+        y_next = y->next[i].load();
+        if (!is_marked_ref(y_next))
+          break;
+        y = (node_t *)get_unmarked_ref(y_next);
+      }
+
+      y_k = y->k;
+      if (y_k >= k)
+        break;
+
+      /* Update estimate of predecessor at this level. */
+      x = y;
+      x_next = y_next;
+    }
+
+    /* Swing forward pointer over any marked nodes. */
+    if (x_next != y) {
+      old_x_next = x_next;
+      x->next[i].compare_exchange_strong(old_x_next, y);
+      if (old_x_next != x_next)
+        goto retry;
+    }
+
+    if (pa)
+      pa[i] = x;
+    if (na)
+      na[i] = y;
+  }
+
+  return (y);
+}
+
+/* This function does not remove marked nodes. Use it optimistically. */
+node_t *weak_search_predecessors(set_t *l, setkey_t k, node_t **pa,
+                                 node_t **na) {
+  node_t *x, *x_next;
+  setkey_t x_next_k;
+  int i;
+
+  x = &l->head;
+  for (i = NUM_LEVELS - 1; i >= 0; i--) {
+    for (;;) {
+      x_next = x->next[i].load();
+      x_next = (node_t *)get_unmarked_ref(x_next);
+
+      x_next_k = x_next->k;
+      if (x_next_k >= k)
+        break;
+
+      x = x_next;
+    }
+
+    if (pa)
+      pa[i] = x;
+    if (na)
+      na[i] = x_next;
+  }
+
+  return (x_next);
+}
+
+/*
+ * Mark @x deleted at every level in its list from @level down to level 1.
+ * When all forward pointers are marked, node is effectively deleted.
+ * Future searches will properly remove node by swinging predecessors'
+ * forward pointers.
+ */
+static void mark_deleted(node_t *x, int level) {
+  node_t *x_next;
+
+  while (--level >= 0) {
+    x_next = x->next[level].load();
+    while (!is_marked_ref(x_next)) {
+      x->next[level].compare_exchange_strong(x_next,
+                                             (node_t *)get_marked_ref(x_next));
+    }
+  }
+}
+
+static int check_for_full_delete(node_t *x) {
+  int level = x->level.load();
+  return ((level & READY_FOR_FREE) ||
+          !x->level.compare_exchange_strong(level, level | READY_FOR_FREE));
+}
+
+static void do_full_delete(ptst_t *ptst, set_t *l, node_t *x, int level) {
+  int k = x->k;
+  (void)strong_search_predecessors(l, k, NULL, NULL);
+  free_node(ptst, x);
+}
+
+/*
+ * PUBLIC FUNCTIONS
+ */
+
+set_t *set_alloc(void) {
+  set_t *l;
+  node_t *n;
+  int i;
+
+  n = (node_t *)malloc(sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
+  n->k = SENTINEL_KEYMAX;
+
+  /*
+   * Set the forward pointers of final node to other than NULL,
+   * otherwise READ_FIELD() will continually execute costly barriers.
+   * Note use of 0xfe -- that doesn't look like a marked value!
+   */
+  for (i = 0; i < NUM_LEVELS; i++) {
+    n->next[i].store((node_t *)0xfe);
+  }
+
+  l = (set_t *)malloc(sizeof(*l) + (NUM_LEVELS - 1) * sizeof(node_t *));
+  l->head.k = SENTINEL_KEYMIN;
+  l->head.level.store(NUM_LEVELS, std::memory_order_relaxed);
+  for (i = 0; i < NUM_LEVELS; i++) {
+    l->head.next[i].store(n);
+  }
+
+  return (l);
+}
+
+bool set_update(set_t *l, setkey_t k, setval_t &v, int overwrite) {
+  setval_t ov, new_ov;
+  ptst_t *ptst;
+  node_t *preds[NUM_LEVELS];
+  node_t *succs[NUM_LEVELS];
+  node_t *pred, *succ, *_new = NULL, *new_next, *old_next;
+  int i, level;
+  bool result = false;
+
+  k = CALLER_TO_INTERNAL_KEY(k);
+
+  ptst = critical_enter();
+
+  succ = weak_search_predecessors(l, k, preds, succs);
+
+retry:
+  ov = NULL;
+  result = false;
+
+  if (succ->k == k) {
+    /* Already a @k node in the list: update its mapping. */
+    new_ov = succ->v;
+    // NB: Removed overwrite ability, for compatibility with harness
+    // do {
+    if ((ov = new_ov) == NULL) {
+      /* Finish deleting the node, then retry. */
+      level = succ->level;
+      mark_deleted(succ, level & LEVEL_MASK);
+      succ = strong_search_predecessors(l, k, preds, succs);
+      goto retry;
+    }
+    // } while (overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov));
+
+    if (_new != NULL)
+      free_node(ptst, _new);
+    goto out;
+  }
+
+  result = true;
+
+  /* Not in the list, so initialise a new node for insertion. */
+  if (_new == NULL) {
+    _new = alloc_node(ptst);
+    _new->k = k;
+    _new->v.store(v, std::memory_order_relaxed);
+  }
+  level = _new->level;
+
+  /* If successors don't change, this saves us some CAS operations. */
+  for (i = 0; i < level; i++) {
+    _new->next[i].store(succs[i], std::memory_order_relaxed);
+  }
+
+  /* We've committed when we've inserted at level 1. */
+  // WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
+  old_next = succ;
+  preds[0]->next[0].compare_exchange_strong(old_next, _new);
+  if (old_next != succ) {
+    succ = strong_search_predecessors(l, k, preds, succs);
+    goto retry;
+  }
+
+  /* Insert at each of the other levels in turn. */
+  i = 1;
+  while (i < level) {
+    pred = preds[i];
+    succ = succs[i];
+
+    /* Someone *can* delete @_new under our feet! */
+    new_next = _new->next[i].load();
+    if (is_marked_ref(new_next))
+      goto success;
+
+    /* Ensure forward pointer of new node is up to date. */
+    if (new_next != succ) {
+      old_next = new_next;
+      _new->next[i].compare_exchange_strong(old_next, succ);
+      if (is_marked_ref(old_next))
+        goto success;
+      assert(old_next == new_next);
+    }
+
+    /* Ensure we have unique key values at every level. */
+    if (succ->k == k)
+      goto new_world_view;
+    assert((pred->k < k) && (succ->k > k));
+
+    /* Replumb predecessor's forward pointer. */
+    old_next = succ;
+    pred->next[i].compare_exchange_strong(old_next, _new);
+    if (old_next != succ) {
+    new_world_view:
+      // RMB(); /* get up-to-date view of the world. */
+      (void)strong_search_predecessors(l, k, preds, succs);
+      continue;
+    }
+
+    /* Succeeded at this level. */
+    i++;
+  }
+
+success:
+  /* Ensure node is visible at all levels before punting deletion. */
+  // WEAK_DEP_ORDER_WMB();
+  if (check_for_full_delete(_new)) {
+    // MB(); /* make sure we see all marks in @new. */
+    do_full_delete(ptst, l, _new, level - 1);
+  }
+out:
+  critical_exit(ptst);
+  return (result);
+}
+
+bool set_remove(set_t *l, setkey_t k, setval_t &v) {
+  setval_t new_v;
+  ptst_t *ptst;
+  node_t *preds[NUM_LEVELS], *x;
+  int level, i;
+  bool result = false;
+
+  k = CALLER_TO_INTERNAL_KEY(k);
+  v = NULL;
+
+  ptst = critical_enter();
+
+  x = weak_search_predecessors(l, k, preds, NULL);
+
+  if (x->k > k)
+    goto out;
+  level = x->level;
+  level = level & LEVEL_MASK;
+
+  /* Once we've marked the value field, the node is effectively deleted. */
+  new_v = x->v;
+  for (;;) {
+    v = new_v;
+    if (v == NULL)
+      goto out;
+
+    x->v.compare_exchange_strong(new_v, NULL);
+    if (new_v == v)
+      break;
+  }
+
+  result = true;
+
+  /* Committed to @x: mark lower-level forward pointers. */
+  // WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
+  mark_deleted(x, level);
+
+  /*
+   * We must swing predecessors' pointers, or we can end up with
+   * an unbounded number of marked but not fully deleted nodes.
+   * Doing this creates a bound equal to number of threads in the system.
+   * Furthermore, we can't legitimately call 'free_node' until all shared
+   * references are gone.
+   */
+  for (i = level - 1; i >= 0; i--) {
+    node_t *tmp = x;
+    preds[i]->next[i].compare_exchange_strong(
+        tmp, (node_t *)get_unmarked_ref(x->next[i].load()));
+    if (tmp != x) {
+      if ((i != (level - 1)) || check_for_full_delete(x)) {
+        // MB(); /* make sure we see node at all levels. */
+        do_full_delete(ptst, l, x, i);
+      }
+      goto out;
+    }
+  }
+
+  free_node(ptst, x);
+
+out:
+  critical_exit(ptst);
+  return (result);
+}
+
+bool set_lookup(set_t *l, setkey_t k, setval_t &v) {
+  ptst_t *ptst;
+  node_t *x;
+
+  k = CALLER_TO_INTERNAL_KEY(k);
+
+  ptst = critical_enter();
+  bool res = false;
+
+  x = weak_search_predecessors(l, k, NULL, NULL);
+  if (x->k == k) {
+    v = x->v;
+    res = true;
+  }
+  critical_exit(ptst);
+
+  return res;
+}
+
+template <typename K, typename V, class DESCRIPTOR> class fraser_skiplist {
+  set_t *sl;
+
+public:
+  fraser_skiplist(DESCRIPTOR *me, auto *cfg) {
+    _init_ptst_subsystem();
+    sl = set_alloc();
+  }
+
+  ~fraser_skiplist() {}
+
+  bool get(DESCRIPTOR *me, const K &k, V &v) { return set_lookup(sl, k, v); }
+
+  bool insert(DESCRIPTOR *me, const K &key, V &v) {
+    K k = key;
+    return set_update(sl, k, v, 0);
+  }
+
+  bool remove(DESCRIPTOR *me, const K &k) {
+    V v;
+    return set_remove(sl, k, v);
+  }
+};
diff --git a/artifact/ds/handSTM/dlist_carumap.h b/artifact/ds/handSTM/dlist_carumap.h
new file mode 100644
index 0000000..280c49f
--- /dev/null
+++ b/artifact/ds/handSTM/dlist_carumap.h
@@ -0,0 +1,499 @@
+#pragma once
+
+#include <bit>
+#include <cassert>
+#include <iostream>
+
+/// An unordered map, implemented as a resizable array of lists (closed
+/// addressing, resizable).  This map supports get(), insert() and remove()
+/// operations.
+///
+/// This implementation is based loosely on Liu's nonblocking resizable hash
+/// table from PODC 2014.  At the current time, we do not support the heuristic
+/// for contracting the list, but we do support expanding the list.
+///
+/// @tparam K       The type of the keys stored in this map
+/// @tparam V       The type of the values stored in this map
+/// @tparam HANDSTM The thread's descriptor type, for interacting with STM
+template <typename K, typename V, class HANDSTM> class dlist_carumap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  /// A list node.  It has prev and next pointers, but no key or value.  It's
+  /// useful for sentinels, so that K and V don't have to be default
+  /// constructable.
+  struct node_t : ownable_t {
+    FIELD<node_t *> prev; // Pointer to predecessor
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : ownable_t(), prev(nullptr), next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// We need to know if buckets have been rehashed to a new table.  We do this
+  /// by making the head of each bucket a `sentinel_t`, and adding a `closed`
+  /// bool.  Note that the tail of each bucket's list is just a node_t.
+  struct sentinel_t : node_t {
+    /// Track if this sentinel is for a bucket that has been rehashed
+    FIELD<bool> closed;
+
+    /// Construct a sentinel_t
+    sentinel_t() : node_t(), closed(false) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~sentinel_t() {}
+  };
+
+  /// A list node that also has a key and value
+  struct data_t : node_t {
+    const K key;  // The key of this key/value pair
+    FIELD<V> val; // The value of this key/value pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key The key that is stored in this node
+    /// @param _val The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~data_t() {}
+  };
+
+  /// An array of lists, along with its size
+  ///
+  /// NB: to avoid indirection, the array is inlined into the tbl_t.  To make
+  ///     this compatible with SMR, tbl_t must be ownable.
+  class tbl_t : public ownable_t {
+    using bucket_t = FIELD<sentinel_t *>;
+
+    /// Construct a table
+    ///
+    /// @param _size The desired size of the table
+    tbl_t(uint64_t _size) : size(_size) {}
+
+  public:
+    const uint64_t size; // The size of the table
+    bucket_t tbl[];      // The buckets of the table
+
+    /// Allocate a tbl_t of size `size`
+    ///
+    /// @param size The desired size
+    /// @param tx   The calling operation's descriptor
+    ///
+    /// @return A table, all of whose buckets are set to null
+    static tbl_t *make(uint64_t size, WOSTM &tx) {
+      tbl_t *tbl =
+          tx.LOG_NEW((tbl_t *)malloc(sizeof(tbl_t) + size * sizeof(bucket_t)));
+      auto ret = new (tbl) tbl_t(size);
+      for (size_t i = 0; i < size; ++i)
+        ret->tbl[i].set(tx, ret, nullptr);
+      return ret;
+    }
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~tbl_t() {}
+  };
+
+  ownable_t *tbl_orec;    // An orec for protecting `active` and `frozen`
+  FIELD<tbl_t *> active;  // The active table
+  FIELD<tbl_t *> frozen;  // The frozen table
+  std::hash<K> _pre_hash; // A weak hash function for converting keys to ints
+  const uint64_t RESIZE_THRESHOLD; // Max bucket size before resizing
+
+  /// Given a key, determine the bucket into which it should go.  As in the Liu
+  /// hash, we do not change the hash function when we resize, we just change
+  /// the number of bits to use
+  ///
+  /// @param key  The key to hash
+  /// @param size The size of the table into which this should be hashed
+  ///
+  /// @return An integer between 0 and size
+  uint64_t table_hash(HANDSTM *me, const K &key, const uint64_t size) const {
+    return me->hash(_pre_hash(key)) % size;
+  }
+
+public:
+  /// Default construct a map as having a valid active table.
+  ///
+  /// NB: Calls std::terminate if the provided size is not a power of 2.
+  ///
+  /// @param me  The operation that is creating this umap
+  /// @param cfg A config object with `buckets` and `resize_threshold`
+  dlist_carumap(HANDSTM *me, auto *cfg)
+      : tbl_orec(new ownable_t()), RESIZE_THRESHOLD(cfg->resize_threshold) {
+    // Enforce power-of-2 initial size
+    if (std::popcount(cfg->buckets) != 1)
+      throw("cfg->buckets should be power of 2");
+
+    // Create an initial active table in which all of the buckets are
+    // initialized but empty (null <- head <-> tail -> null).
+    BEGIN_WO(me);
+    auto n = tbl_t::make(cfg->buckets, wo);
+    for (size_t i = 0; i < cfg->buckets; ++i)
+      n->tbl[i].set(wo, n, create_list(wo));
+    // NB: since all buckets are initialized, nobody will ever go to the
+    //     frozen table, so we can leave it as null
+    active.set(wo, tbl_orec, n);
+    frozen.set(wo, tbl_orec, nullptr);
+  }
+
+private:
+  /// Create a dlist with head and tail sentinels
+  ///
+  /// @param tx A writing TM context.  Even though this code can't fail, we need
+  ///           the context in order to use tm_field correctly.
+  ///
+  /// @return A pointer to the head sentinel of the list
+  sentinel_t *create_list(WOSTM &tx) {
+    // NB: By default, a node's prev and next will be nullptr, which is what we
+    //     want for head->prev and tail->next.
+    auto head = tx.LOG_NEW(new sentinel_t());
+    auto tail = tx.LOG_NEW(new node_t());
+    head->next.set(tx, head, tail);
+    tail->prev.set(tx, tail, head);
+    return head;
+  }
+
+  /// `resize()` is an internal method for changing the size of the active
+  /// table. Strictly speaking, it should be called `expand`, because for now we
+  /// only support expansion, not contraction.  When `insert()` discovers that
+  /// it has made a bucket "too big", it will linearize its insertion, then call
+  /// resize().
+  ///
+  /// At a high level, `resize()` is supposed to be open-nested and not to incur
+  /// any blocking, except due to orec conflicts.  We accomplish this through
+  /// laziness and stealing.  resize() finishes the /last/ resize, moves the
+  /// `active` table to `frozen`, and installs a new `active` table.  Subsequent
+  /// operations will do most of the migrating.  Note that resize() returns once
+  /// *anyone* resizes the table.
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param a_tbl  The active table, to resize
+  void resize(HANDSTM *me, tbl_t *a_tbl) {
+    tbl_t *ft = nullptr; // The frozen table
+    while (true) {
+      // If ft is null, then there's no frozen table, so just install a new
+      // active table and all is good.
+      {
+        BEGIN_WO(me);
+        // If someone else initiated a resize, then this attempt can end
+        // immediately
+        auto new_at = active.get(wo, tbl_orec);
+        if (new_at != a_tbl)
+          return;
+
+        // If the frozen table is clean, just do a swap and we're done
+        ft = frozen.get(wo, tbl_orec);
+        if (ft == nullptr) {
+          // Make and initialize a table that is twice as big, move active to
+          // frozen, and make the new table active.
+          auto new_tbl = tbl_t::make(a_tbl->size * 2, wo);
+          frozen.set(wo, tbl_orec, a_tbl);
+          active.set(wo, tbl_orec, new_tbl);
+          return;
+        }
+      }
+
+      // There is still an incomplete migration from frozen to active.  Migrate
+      // everything out of frozen, remove the frozen table, and retry
+      prepare_resize(me, ft, a_tbl);
+    }
+  }
+
+  /// Finish one lazy resize, so that another may begin.
+  ///
+  /// This really just boils down to migrating everything from `frozen` to
+  /// `active` and then nulling `frozen` and reclaiming it.
+  ///
+  /// NB: This code takes the "frozen" and "active" tables as arguments.
+  ///     Consequently, we don't care about arbitrary delays.  If a thread calls
+  ///     this, rehashes half the table, and then suspends, another thread can
+  ///     rehash everything else and install a new active table.  When the first
+  ///     thread wakes, it'll find a bunch of empty buckets, and it'll be safe.
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param f_tbl  The "frozen table", really the "source" table
+  /// @param a_tbl  The "active table", really the "destination" table
+  void prepare_resize(HANDSTM *me, tbl_t *f_tbl, tbl_t *a_tbl) {
+    // NB: Right now, next_index == completed.  If we randomized the start
+    //     point, concurrent calls to prepare_resize() would contend less
+    uint64_t next_index = 0; // Next bucket to migrate
+    uint64_t completed = 0;  // Number of buckets migrated
+
+    // Migrate all data from `frozen` to `active`
+    while (completed != f_tbl->size) {
+      BEGIN_WO(me);
+
+      // Try to rehash the next bucket.  If it was already rehashed, there's a
+      // chance that the current resize phase is finished, so check
+      auto bkt = f_tbl->tbl[next_index].get(wo, f_tbl);
+      if (!rehash_expand_bucket(me, bkt, next_index, f_tbl->size, a_tbl, wo)) {
+        // NB:  A "finished" concurrent resize will change `active`, but if
+        //      another thread just got past this loop and uninstalled `frozen`,
+        //      that's also cause for early return.
+        if (active.get(wo, tbl_orec) != a_tbl || !frozen.get(wo, tbl_orec))
+          return;
+      }
+
+      // Move to the next bucket
+      ++next_index;
+      ++completed;
+    }
+
+    // Try to uninstall the `frozen` table, since it has been emptied.
+    {
+      BEGIN_WO(me);
+      if (frozen.get(wo, tbl_orec) != f_tbl)
+        return;
+      frozen.set(wo, tbl_orec, nullptr);
+    }
+
+    // Reclaim `old`'s buckets, then `old` itself
+    //
+    // NB:  This needs to be a transaction because of the API for reclamation,
+    //      but there shouldn't be conflicts.
+    {
+      BEGIN_WO(me);
+      for (size_t i = 0; i < f_tbl->size; i++) {
+        // reclaim head and tail of each bucket
+        auto head = f_tbl->tbl[i].get(wo, f_tbl);
+        auto tail = head->next.get(wo, head);
+        wo.reclaim(head);
+        wo.reclaim(tail);
+      }
+      wo.reclaim(f_tbl);
+    }
+  }
+
+  /// Get a pointer to the bucket in the active table that holds `key`.  This
+  /// may cause some rehashing to happen.
+  ///
+  /// NB: The pattern here is unconventional.  get_bucket() is the first step in
+  ///     WSTEP transactions.  If it doesn't rehash, then the caller WSTEP
+  ///     continues its operation.  If it does rehash, then the caller WSTEP
+  ///     commits and restarts, which is a poor-man's open-nested transaction.
+  ///     If it encounters an inconsistency, the caller WSTEP will be aborted.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key whose bucket is sought
+  /// @param tx  An active WSTEP transaction
+  ///
+  /// @return On success, a pointer to the head of a bucket, along with
+  ///         `tbl_orec`'s value.  nullptr on any rehash.
+  node_t *get_bucket(HANDSTM *me, const K &key, WOSTM &tx) {
+    // Get the head of the appropriate bucket in the active table
+    //
+    // NB: Validate or else a_tbl[a_idx] could be out of bounds
+    auto a_tbl = active.get(tx, tbl_orec);
+    auto a_idx = table_hash(me, key, a_tbl->size);
+    if (auto a_bucket = a_tbl->tbl[a_idx].get(tx, a_tbl))
+      return a_bucket; // not null --> no resize needed
+
+    // Find the bucket in the frozen table that needs rehashing
+    auto f_tbl = frozen.get(tx, tbl_orec);
+    auto f_idx = table_hash(me, key, f_tbl->size);
+    auto f_bucket = f_tbl->tbl[f_idx].get(tx, f_tbl);
+
+    // Rehash it, tell caller to commit so the rehash appears to be open nested
+    //
+    // NB: if the rehash fails, it's due to someone else rehashing, which is OK
+    rehash_expand_bucket(me, f_bucket, f_idx, f_tbl->size, a_tbl, tx);
+    return nullptr;
+  }
+
+  /// Re-hash one list in the frozen table into two lists in the active table
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param f_list A pointer to an (acquired!) list head in the frozen table
+  /// @param f_idx  The index of flist in the frozen table
+  /// @param f_size The size of the frozen table
+  /// @param a_tbl  A reference to the active table
+  /// @param tx     An active WSTEP transaction
+  ///
+  /// @return RESIZE_OK       - The frozen bucket was rehashed into `a_tbl`
+  ///         ALREADY_RESIZED - The frozen bucket was empty
+  bool rehash_expand_bucket(HANDSTM *me, sentinel_t *f_list, uint64_t f_idx,
+                            uint64_t f_size, tbl_t *a_tbl, WOSTM &tx) {
+    // Stop if this bucket is already rehashed
+    if (f_list->closed.get(tx, f_list))
+      return false;
+
+    // Shuffle nodes from f_list into two new lists that will go into `a_tbl`
+    auto l1 = create_list(tx), l2 = create_list(tx);
+    auto curr = f_list->next.get(tx, f_list);
+    while (curr->next.get(tx, curr) != nullptr) {
+      auto next = curr->next.get(tx, curr);
+      auto data = static_cast<data_t *>(curr);
+      auto dest = table_hash(me, data->key, a_tbl->size) == f_idx ? l1 : l2;
+      auto succ = dest->next.get(tx, dest);
+      dest->next.set(tx, dest, data);
+      data->next.set(tx, data, succ);
+      data->prev.set(tx, data, dest);
+      succ->prev.set(tx, succ, data);
+      curr = next;
+    }
+    // curr is tail, set head->tail
+    f_list->next.set(tx, f_list, curr);
+    // put the lists into the active table, close the frozen bucket
+    a_tbl->tbl[f_idx].set(tx, a_tbl, l1);
+    a_tbl->tbl[f_idx + f_size].set(tx, a_tbl, l2);
+    f_list->closed.set(tx, f_list, true);
+    return true;
+  }
+
+  /// Given the head sentinel of a list, search through the list to find the
+  /// node with key `key`, if such a node exists in the list.  If it doesn't,
+  /// then return the head pointer, along with a count of non-sentinel nodes in
+  /// the list
+  ///
+  /// @param key  The key for which we are searching
+  /// @param head The start of the list to search
+  /// @param tx   An active WSTEP transaction
+  ///
+  /// @return {head, count} if the key was not found
+  ///         {node, 0}     if the key was found at `node`
+  std::pair<node_t *, uint64_t> list_get_or_head(const K &key, sentinel_t *head,
+                                                 WOSTM &tx) {
+    // Get the head's successor; on any inconsistency, it'll abort
+    auto curr = head->next.get(tx, head);
+    uint64_t count = 0; // Number of nodes encountered during the loop
+    while (true) {
+      // if we reached the tail, return the head
+      if (curr->next.get(tx, curr) == nullptr)
+        return {head, count};
+
+      // return curr if it has a matching key
+      if (static_cast<data_t *>(curr)->key == key)
+        return {curr, 0};
+
+      // read `next` consistently
+      auto next = curr->next.get(tx, curr);
+      curr = next;
+      ++count;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HANDSTM *me, const K &key, V &val) {
+    while (true) {
+      BEGIN_WO(me);
+      // Get the bucket in `active` where `key` should be.  Returns `nullptr` if
+      // it did some resizing, in which case we should commit the resize, then
+      // try again.
+      auto bucket = get_bucket(me, key, wo);
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      auto [node, _] =
+          list_get_or_head(key, static_cast<sentinel_t *>(bucket), wo);
+
+      // If we got back the head, return false, otherwise read out the data
+      if (node == bucket)
+        return false;
+      data_t *dn = static_cast<data_t *>(node);
+      val = dn->val.get(wo, dn);
+      return true;
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    // If we discover that a bucket becomes too full, we'll insert, linearize,
+    // and then resize in a new transaction before returning.
+    tbl_t *a_tbl = nullptr;
+    while (true) {
+      BEGIN_WO(me);
+      auto bucket = get_bucket(me, key, wo);
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      auto [node, count] =
+          list_get_or_head(key, static_cast<sentinel_t *>(bucket), wo);
+
+      // If we didn't get the head, the key already exists, so return false
+      if (node != bucket)
+        return false;
+
+      auto next = node->next.get(wo, node);
+
+      // Stitch in a new node
+      data_t *new_dn = wo.LOG_NEW(new data_t(key, val));
+      new_dn->next.set(wo, new_dn, next);
+      new_dn->prev.set(wo, new_dn, node);
+      node->next.set(wo, node, new_dn);
+      next->prev.set(wo, next, new_dn);
+      if (count >= RESIZE_THRESHOLD) {
+        a_tbl = active.get(wo, tbl_orec);
+        break; // need to resize!
+      }
+      return true;
+    }
+
+    resize(me, a_tbl);
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(HANDSTM *me, const K &key) {
+    while (true) {
+      BEGIN_WO(me);
+      // Get the bucket in `active` where `key` should be.  Returns `nullptr` if
+      // it did some resizing, in which case we should commit the resize, then
+      // try again.
+      auto bucket = get_bucket(me, key, wo);
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      //
+      // NB:  This is a big transaction, so the active table can't have changed
+      auto [node, __] =
+          list_get_or_head(key, static_cast<sentinel_t *>(bucket), wo);
+
+      // If we got back the head, return false
+      if (node == bucket)
+        return false;
+
+      // unstitch it
+      auto pred = node->prev.get(wo, node), succ = node->next.get(wo, node);
+      pred->next.set(wo, pred, succ);
+      succ->prev.set(wo, succ, pred);
+      wo.reclaim(node);
+      return true;
+    }
+  }
+};
diff --git a/artifact/ds/handSTM/dlist_omap.h b/artifact/ds/handSTM/dlist_omap.h
new file mode 100644
index 0000000..03cafe3
--- /dev/null
+++ b/artifact/ds/handSTM/dlist_omap.h
@@ -0,0 +1,172 @@
+#pragma once
+
+/// An ordered map, implemented as a doubly-linked list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// @param K       The type of the keys stored in this map
+/// @param V       The type of the values stored in this map
+/// @param HANDSTM A thread descriptor type, for safe memory reclamation
+template <typename K, typename V, class HANDSTM> class dlist_omap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  /// A list node.  It has prev and next pointers, but no key or value.  It's
+  /// useful for sentinels, so that K and V don't have to be default
+  /// constructable.
+  struct node_t : ownable_t {
+    FIELD<node_t *> prev; // Pointer to predecessor
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : prev(nullptr), next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const.
+  struct data_t : public node_t {
+    const K key;  // The key of this pair
+    FIELD<V> val; // The value of this pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key The key that is stored in this node
+    /// @param _val The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~data_t() {}
+  };
+
+  node_t *const head; // The list head pointer
+  node_t *const tail; // The list tail pointer
+
+public:
+  /// Default construct a list by constructing and connecting two sentinel nodes
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object
+  dlist_omap(HANDSTM *me, auto *cfg) : head(new node_t()), tail(new node_t()) {
+    BEGIN_WO(me);
+    head->next.set(wo, head, tail);
+    tail->prev.set(wo, tail, head);
+  }
+
+private:
+  /// get_leq is an inclusive predecessor query that returns the largest node
+  /// whose key is <= the provided key.  It can return the head sentinel, but
+  /// not the tail sentinel.
+  ///
+  /// @param tx  The active transaction
+  /// @param key The key for which we are doing a predecessor query.
+  ///
+  /// @return The node that was found
+  node_t *get_leq(STM &tx, const K key) {
+    // Start at the head; read the next now, to avoid reading it in multiple
+    // iterations of the loop
+    node_t *curr = head;
+    auto *next = curr->next.get(tx, curr);
+
+    // Starting at `next`, search for key.
+    while (true) {
+      // Case 1: `next` is tail --> stop the search at curr
+      if (next == tail)
+        return curr;
+
+      // read next's `next` and `key`
+      auto next_next = next->next.get(tx, next);
+      auto nkey = static_cast<data_t *>(next)->key;
+
+      // Case 2: `next` is a data node: stop if next->key >= key
+      if (nkey > key)
+        return curr;
+      if (nkey == key)
+        return next;
+
+      // Case 3: keep traversing to `next`
+      curr = next;
+      next = next_next;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The thread context
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HANDSTM *me, const K &key, V &val) {
+    BEGIN_RO(me); // RO tx(me);
+    // get_leq will use a read-only transaction to find the largest node with
+    // a key <= `key`.
+    auto n = get_leq(ro, key);
+
+    // Since we have EBR, we can read n.key without validating and fast-fail
+    // on key-not-found
+    if (n == head || static_cast<data_t *>(n)->key != key)
+      return false;
+
+    // NB: given EBR, we don't need to worry about n._obj being deleted, so
+    //     we don't need to validate before looking at the value
+    data_t *dn = static_cast<data_t *>(n);
+    val = dn->val.get(ro, dn);
+    return true;
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The thread context
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    BEGIN_WO(me); // WO tx(me);
+
+    auto n = get_leq(wo, key);
+    if (n != head && static_cast<data_t *>(n)->key == key)
+      return false;
+
+    auto next = n->next.get(wo, n);
+
+    // stitch in a new node
+    data_t *new_dn = wo.LOG_NEW(new data_t(key, val));
+    new_dn->next.set(wo, new_dn, next);
+    new_dn->prev.set(wo, new_dn, n);
+    n->next.set(wo, n, new_dn);
+    next->prev.set(wo, next, new_dn);
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The thread context
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me); // WO tx(me);
+
+    auto n = get_leq(wo, key);
+    if (n == head || static_cast<data_t *>(n)->key != key)
+      return false;
+
+    // unstitch it
+    auto pred = n->prev.get(wo, n), succ = n->next.get(wo, n);
+    pred->next.set(wo, pred, succ);
+    succ->prev.set(wo, succ, pred);
+    wo.reclaim(n);
+    return true;
+  }
+};
diff --git a/artifact/ds/handSTM/ibst_omap.h b/artifact/ds/handSTM/ibst_omap.h
new file mode 100644
index 0000000..7a6f63a
--- /dev/null
+++ b/artifact/ds/handSTM/ibst_omap.h
@@ -0,0 +1,267 @@
+#pragma once
+
+/// An ordered map, implemented as an unbalanced, internal binary search tree.
+/// This map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param HANDSTM    A thread HANDSTM type, for safe memory reclamation
+template <typename K, typename V, class HANDSTM> class ibst_omap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  /// An easy-to-remember way of indicating the left and right children
+  enum DIRS { LEFT = 0, RIGHT = 1 };
+
+  /// node_t is the base type for all tree nodes.  It doesn't have key/value
+  /// fields.
+  struct node_t : ownable_t {
+    /// The node's children.  Be sure to use LEFT and RIGHT to index it
+    FIELD<node_t *> children[2];
+
+    /// Construct a node_t.  This should only be called from a writer
+    /// transaction
+    ///
+    /// @param _left  The left child of this node
+    /// @param _right The right child of this node
+    node_t(WOSTM &wo, node_t *_left = nullptr, node_t *_right = nullptr) {
+      children[LEFT].set(wo, this, _left);
+      children[RIGHT].set(wo, this, _right);
+    }
+  };
+
+  /// A pair holding a child node and its parent
+  struct ret_pair_t {
+    node_t *child;  // The child
+    node_t *parent; // The parent of that child
+  };
+
+  /// Our tree uses a sentinel root node, so that we always have a valid node
+  /// for which to compute an orec.  The sentinel's *LEFT* child is the true
+  /// root of the tree.  That is, logically sentinel has the value "TOP".
+  node_t *sentinel;
+
+  /// data_t is the type for all internal and leaf nodes in the data structure.
+  /// It extends the base type with a key and value.
+  ///
+  /// NB: keys are *not* const, because we want to overwrite nodes instead of
+  ///     swapping them
+  struct data_t : public node_t {
+    FIELD<K> key; // The key stored in this node
+    FIELD<V> val; // The value stored in this node
+
+    /// Construct a node
+    ///
+    /// @param _left left child of the node
+    /// @param _right right child of the node
+    /// @param _key the key of the node
+    /// @param _val the value of the node
+    data_t(WOSTM &wo, node_t *_left, node_t *_right, const K &_key, V &_val)
+        : node_t(wo, _left, _right) {
+      key.set(wo, this, _key);
+      val.set(wo, this, _val);
+    }
+  };
+
+public:
+  /// Default construct an empty tree
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object
+  ibst_omap(HANDSTM *me, auto *cfg) {
+    // NB: Even though the constructor is operating on private data, it needs a
+    //     TM context in order to use tm_fields
+    BEGIN_WO(me);
+    sentinel = new node_t(wo);
+  }
+
+private:
+  /// Search for a `key` in the tree, and return the node holding it, as well
+  /// as the node's parent.  If the key is not found, return null, and the
+  /// node that ought to be parent of the (not found) `key`.
+  ///
+  /// NB: The caller is responsible for clearing the checkpoint stack before
+  ///     calling get_node().
+  ///
+  /// @param key The key to search for
+  ///
+  /// @return {found, parent} if `key` is in the tree
+  ///         {nullptr, parent} if `key` is not in the tree
+  ret_pair_t get_node(STM &tx, const K &key) {
+    // Traverse downward to the target node:
+    node_t *parent = sentinel;
+    node_t *child = parent->children[LEFT].get(tx, parent);
+
+    // Traverse downward from the parent until we find null child or `key`
+    while (true) {
+      // nullptr == not found, so stop.  We know parent was valid, so we can
+      // just return it
+      if (!child)
+        return {nullptr, parent};
+
+      // It's time to move downward.  Read fields of child and grandchild
+      //
+      // NB: we may not use grandchild, but it's better to read it here
+      auto child_key = static_cast<data_t *>(child)->key.get(tx, child);
+      auto grandchild =
+          child->children[(key < child_key) ? LEFT : RIGHT].get(tx, child);
+
+      // If the child key matches, return {child, parent}.  We know both are
+      // valid (parent came from stack; we just checked child)
+      //
+      // NB: the snapshotting code requires that no node with matching key
+      //     goes into `snapshots`
+      if (child_key == key)
+        return {child, parent};
+
+      // Otherwise traverse downward
+      parent = child;
+      child = grandchild;
+    }
+  }
+
+  /// Given a node and its orec value, find the tree node that holds the key
+  /// that logically succeeds it (i.e., the leftmost descendent of the right
+  /// child)
+  ///
+  /// NB: The caller must ensure that `node` has a valid right child before
+  ///     calling this method
+  ///
+  /// @param me   The active CCDS operation
+  /// @param node An object and orec value to use as the starting point
+  ///
+  /// @return {{found, orec}, {parent, orec}} if no inconsistency occurs
+  ///         {{nullptr, 0},  {nullptr, 0}}   on any consistency violation
+  ret_pair_t get_succ_pair(STM &tx, node_t *node) {
+    // Read the right child
+    node_t *parent = node, *child = node->children[RIGHT].get(tx, node);
+
+    // Find the leftmost non-null node in the tree rooted at child
+    while (true) {
+      auto next = child->children[LEFT].get(tx, child);
+      // If next is null, `child` is the successor.  Otherwise keep traversing
+      if (!next)
+        return {child, parent};
+      parent = child;
+      child = next;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's HANDSTM
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HANDSTM *me, const K &key, V &val) {
+    BEGIN_RO(me);
+    // Get the node that holds `key`, if it is present, and also its parent.
+    // If it isn't present, we'll get a null pointer.  That corresponds to a
+    // consistent read of the parent, which means we already linearized and
+    // we're done
+    auto [curr, _] = get_node(ro, key);
+    if (curr == nullptr)
+      return false;
+
+    // read the value
+    auto dn = static_cast<data_t *>(curr);
+    val = dn->val.get(ro, dn);
+    return true;
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's HANDSTM
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    BEGIN_WO(me);
+    auto [child, parent] = get_node(wo, key);
+    if (child)
+      return false;
+    // We must have a null child and a valid parent.  If it's sentinel, we
+    // must insert as LEFT.  Otherwise, compute which child to set.
+    auto cID = (parent == sentinel ? LEFT : RIGHT) &
+               (key > static_cast<data_t *>(parent)->key.get(wo, parent));
+    auto new_child = new data_t(wo, nullptr, nullptr, key, val);
+    parent->children[cID].set(wo, parent, new_child);
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's HANDSTM
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    auto [target, parent] = get_node(wo, key);
+    if (target == nullptr)
+      return false;
+
+    // Read the target node's children
+    data_t *t_child[2];
+    t_child[RIGHT] =
+        static_cast<data_t *>(target->children[RIGHT].get(wo, target));
+    t_child[LEFT] =
+        static_cast<data_t *>(target->children[LEFT].get(wo, target));
+
+    // If either child is null, and if the parent is still valid, then we can
+    // unstitch the target, link the parent to a grandchild and we're done.
+    if (!t_child[LEFT] || !t_child[RIGHT]) {
+      // Acquire the (possibly null) grandchild to link to the parent
+      auto gID = t_child[LEFT] ? LEFT : RIGHT;
+
+      // Which child of the parent is target?
+      auto cID =
+          parent->children[LEFT].get(wo, parent) == target ? LEFT : RIGHT;
+
+      // Unstitch and reclaim
+      parent->children[cID].set(wo, parent, t_child[gID]);
+      wo.reclaim(target);
+      return true;
+    }
+
+    // `target` has two children.  WLOG, the leftmost descendent of the right
+    // child is `target`'s successor, and must have at most one child.  We
+    // want to put that node's key and value into `target`, and then remove
+    // that node by setting its parent's LEFT to its RIGHT (which might be
+    // null).
+    auto [succ, s_parent] = get_succ_pair(wo, target);
+
+    // If target's successor is target's right child, then target._ver must
+    // equal s_parent._ver.  As long as we lock target before we try
+    // to lock s_parent, we'll get the check for free.
+
+    // Copy `succ`'s key/value into `target`
+    static_cast<data_t *>(target)->key.set(
+        wo, target, static_cast<data_t *>(succ)->key.get(wo, succ));
+    static_cast<data_t *>(target)->val.set(
+        wo, target, static_cast<data_t *>(succ)->val.get(wo, succ));
+
+    // Unstitch `succ` by setting its parent's left to its right
+    // Case 1: there are intermediate nodes between target and successor
+    if (s_parent != target)
+      s_parent->children[LEFT].set(wo, s_parent,
+                                   succ->children[RIGHT].get(wo, succ));
+    // Case 2: target is successor's parent
+    else
+      s_parent->children[RIGHT].set(wo, s_parent,
+                                    succ->children[RIGHT].get(wo, succ));
+    wo.reclaim(succ);
+    return true;
+  }
+};
diff --git a/artifact/ds/handSTM/iht_carumap.h b/artifact/ds/handSTM/iht_carumap.h
new file mode 100644
index 0000000..fcab57c
--- /dev/null
+++ b/artifact/ds/handSTM/iht_carumap.h
@@ -0,0 +1,286 @@
+#pragma once
+
+/// iht_umap is an HANDSTM implementation of the interlocked hash table.  There
+/// is one significant simplification:
+///
+/// - It *does not* employ the max-depth trick for ensuring constant time
+///   access.  The worst-case asymptotic complexity is thus O(log(log(N))).
+///   That's probably small enough that nobody will ever care.
+///
+/// - Note that TM makes it very easy to use the trick where the type of a node
+///   is embedded in the type, rather than in the pointer to the type.  This is
+///   more like the original IHT, less like our baseline version.
+template <typename K, typename V, class HANDSTM> class iht_carumap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  /// Common parent for EList and PList types.  It uses a bool as a proxy for
+  /// RTTI for distinguishing between PLists and ELists
+  ///
+  /// NB: In golang, the lock would go in Base.
+  struct Base : ownable_t {
+    const bool isEList; // Is this an EList (true) or a PList (false)
+
+    // Construct the base type by setting its `isElist` field
+    Base(bool _isEList) : isEList(_isEList) {}
+  };
+
+  /// EList (ElementList) stores a bunch of K/V pairs
+  ///
+  /// NB: We construct with a factory, so the pairs can be a C-style variable
+  ///     length array field.
+  struct EList : Base {
+    /// The key/value pair.  We don't structure split, so that we can have the
+    /// array as a field.
+    struct pair_t {
+      FIELD<K> key; // A key
+      FIELD<V> val; // A value
+    };
+
+    FIELD<size_t> count; // # live elements
+    pair_t pairs[];      // The K/V pairs stored in this EList
+
+  private:
+    /// Force construction via the make_elist factory
+    EList() : Base(true), count(0) {}
+
+  public:
+    /// Construct a EList that can hold up to `size` elements
+    static EList *make(WOSTM &wo, size_t size) {
+      EList *e = new (wo.LOG_NEW(
+          (ownable_t *)malloc(sizeof(EList) + size * sizeof(pair_t)))) EList();
+      return e;
+    }
+
+    /// Insert into an EList, without checking if there is enough room
+    void unchecked_insert(WOSTM &wo, const K &key, const V &val) {
+      auto c = count.get(wo, this);
+      pairs[c].key.set(wo, this, key);
+      pairs[c].val.set(wo, this, val);
+      count.set(wo, this, c + 1);
+    }
+  };
+
+  /// PList (PointerList) stores a bunch of pointers and their associated locks
+  ///
+  /// NB: We construct with a factory, so the pairs can be a C-style variable
+  ///     length array field.  This means that `depth` and `count` can't be
+  ///     const, but that's OK.
+  struct PList : Base {
+    /// A wrapper around a pointer to a Base object
+    struct bucket_t {
+      FIELD<Base *> base; // pointer to P/E List
+    };
+
+    bucket_t buckets[]; // The pointers stored in this PList
+
+  private:
+    /// Force construction via the make_plist factory
+    PList() : Base(false) {}
+
+  public:
+    /// Construct a PList at depth `depth` that can hold up to `size` elements
+    static PList *make(WOSTM &wo, size_t size) {
+      PList *p = new (wo.LOG_NEW((ownable_t *)malloc(
+          sizeof(PList) + size * sizeof(bucket_t)))) PList();
+      for (size_t i = 0; i < size; ++i)
+        p->buckets[i].base.set(wo, p, nullptr);
+      return p;
+    }
+  };
+
+  const size_t elist_size; // The size of all ELists
+  const size_t plist_size; // The size of the root PList
+  PList *root;             // The root PList
+  std::hash<K> pre_hash;   // A low-quality hash function from K to size_t
+
+  /// For the time being, we re-hash a key at each level, xor-ing in the level
+  /// so that keys are unlikely to collide repeatedly.
+  ///
+  /// TODO: This shouldn't be too expensive, but we can probably do better.
+  uint64_t level_hash(HANDSTM *me, const K &key, size_t level) {
+    return me->hash(level ^ pre_hash(key));
+  }
+
+  /// Given a PList where the `index`th bucket is a full EList, create a new
+  /// PList that is twice the size of `parent` and hash the full EList's
+  /// elements into it.  This only takes O(1) time.
+  ///
+  /// @param parent The PList whose bucket needs rehashing
+  /// @param pcount The number of elements in `parent`
+  /// @param pdepth The depth of `parent`
+  /// @param pidx   The index in `parent` of the bucket to rehash
+  PList *rehash(HANDSTM *me, WOSTM &wo, PList *parent, size_t pcount,
+                size_t pdepth, size_t pidx) {
+    // Make a new PList that is twice as big, with all locks set to E_UNLOCKED
+    auto p = PList::make(wo, pcount * 2);
+
+    // hash everything from the full EList into it
+    auto source =
+        static_cast<EList *>(parent->buckets[pidx].base.get(wo, parent));
+    auto c = source->count.get(wo, source);
+    for (size_t i = 0; i < c; ++i) {
+      auto k = source->pairs[i].key.get(wo, source);
+      auto b = level_hash(me, k, pdepth + 1) % pcount;
+      auto base = p->buckets[b].base.get(wo, p);
+      if (base == nullptr) {
+        base = EList::make(wo, elist_size);
+        p->buckets[b].base.set(wo, p, base);
+      }
+      EList *dest = static_cast<EList *>(base);
+      dest->unchecked_insert(wo, k, source->pairs[i].val.get(wo, source));
+    }
+
+    // The caller locked the pointer to the EList, so we can reclaim the EList
+    wo.reclaim(source);
+    return p;
+  }
+
+public:
+  /// Construct an IHT by configuring the constants and building the root PList
+  ///
+  /// @param me  Unused thread descriptor
+  /// @param cfg A configuration object with `chunksize` and `buckets` fields,
+  ///            for setting the EList size and root PList size.
+  iht_carumap(HANDSTM *me, auto *cfg)
+      : elist_size(cfg->chunksize), plist_size(cfg->buckets) {
+    BEGIN_WO(me);
+    root = PList::make(wo, plist_size);
+  }
+
+  /// Search for a key in the map.  If found, return `true` and set the ref
+  /// parameter `val` to the associated value.  Otherwise return `false`.
+  ///
+  /// @param me  Thread context
+  /// @param key The key to search for
+  /// @param val The value (pass-by-ref) that was found
+  bool get(HANDSTM *me, const K &key, V &val) {
+    BEGIN_RO(me);
+    auto curr = root; // Start at the root PList
+    size_t depth = 1, count = plist_size;
+    while (true) {
+      auto bucket = level_hash(me, key, depth) % count;
+      // If it's null, fail
+      auto b = curr->buckets[bucket].base.get(ro, curr);
+      if (b == nullptr)
+        return false;
+
+      // If it's a PList, keep traversing
+      if (!b->isEList) {
+        curr = static_cast<PList *>(b);
+        ++depth;
+        count *= 2;
+        continue;
+      }
+
+      // If it's not null, do a linear search of the keys
+      auto e = static_cast<EList *>(b);
+      auto c = e->count.get(ro, e);
+      for (size_t i = 0; i < c; ++i) {
+        if (e->pairs[i].key.get(ro, e) == key) {
+          val = e->pairs[i].val.get(ro, e);
+          return true;
+        }
+      }
+      // Not found
+      return false;
+    }
+  }
+
+  /// Search for a key in the map.  If found, remove it and its associated value
+  /// and return `true`.  Otherwise return `false`.
+  ///
+  /// @param me  Thread context
+  /// @param key The key to search for
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    auto curr = root; // Start at the root PList
+    size_t depth = 1, count = plist_size;
+    while (true) {
+      auto bucket = level_hash(me, key, depth) % count;
+      // If it's null, fail
+      auto b = curr->buckets[bucket].base.get(wo, curr);
+      if (b == nullptr)
+        return false;
+
+      // If it's a PList, keep traversing
+      if (!b->isEList) {
+        curr = static_cast<PList *>(b);
+        ++depth;
+        count *= 2;
+        continue;
+      }
+
+      // If it's not null, do a linear search of the keys
+      auto e = static_cast<EList *>(b);
+      auto c = e->count.get(wo, e);
+      for (size_t i = 0; i < c; ++i) {
+        if (e->pairs[i].key.get(wo, e) == key) {
+          // remove the K/V pair by overwriting, but only if there's >1 key
+          if (c > 1) {
+            e->pairs[i].key.set(wo, e, e->pairs[c - 1].key.get(wo, e));
+            e->pairs[i].val.set(wo, e, e->pairs[c - 1].val.get(wo, e));
+          }
+          e->count.set(wo, e, c - 1);
+          return true;
+        }
+      }
+
+      // Not found
+      return false;
+    }
+  }
+
+  /// Insert a new key/value pair into the map, but only if the key is not
+  /// already present.  Return `true` if a mapping was added, `false` otherwise.
+  ///
+  /// @param me  Thread context
+  /// @param key The key to try to insert
+  /// @param val The value to try to insert
+  bool insert(HANDSTM *me, const K key, const V val) {
+    BEGIN_WO(me);
+    auto curr = root; // Start at the root PList
+    size_t depth = 1, count = plist_size;
+    while (true) {
+      auto bucket = level_hash(me, key, depth) % count;
+      // If it's null, make a new EList, insert, and we're done
+      auto b = curr->buckets[bucket].base.get(wo, curr);
+      if (b == nullptr) {
+        auto e = EList::make(wo, elist_size);
+        e->unchecked_insert(wo, key, val);
+        curr->buckets[bucket].base.set(wo, curr, e);
+        return true;
+      }
+
+      // If it's a PList, keep traversing
+      if (!b->isEList) {
+        curr = static_cast<PList *>(b);
+        ++depth;
+        count *= 2;
+        continue;
+      }
+
+      // If It's not null, do a linear search of the keys, return false if found
+      auto e = static_cast<EList *>(b);
+      auto c = e->count.get(wo, e);
+      for (size_t i = 0; i < c; ++i) {
+        if (e->pairs[i].key.get(wo, e) == key)
+          return false;
+      }
+
+      // Not found: insert if room
+      if (c < elist_size) {
+        e->unchecked_insert(wo, key, val);
+        return true;
+      }
+
+      // Otherwise expand and keep traversing, because pathological hash
+      // collisions are always possible.
+      curr->buckets[bucket].base.set(
+          wo, curr, rehash(me, wo, curr, count, depth, bucket));
+    }
+  }
+};
diff --git a/artifact/ds/handSTM/rbtree_omap.h b/artifact/ds/handSTM/rbtree_omap.h
new file mode 100644
index 0000000..835cd95
--- /dev/null
+++ b/artifact/ds/handSTM/rbtree_omap.h
@@ -0,0 +1,365 @@
+#pragma once
+
+/// An ordered map, implemented as a balanced, internal binary search tree. This
+/// map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param HANDSTM    A thread descriptor type, for safe memory reclamation
+/// @param dummy_key  A default key to use
+/// @param dummy_val  A default value to use
+template <typename K, typename V, class HANDSTM, K dummy_key, V dummy_val>
+class rbtree_omap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  static const int RED = 0;   // Enum for red
+  static const int BLACK = 1; // Enum for black
+
+  /// nodes in a red/black tree
+  struct node_t : ownable_t {
+    FIELD<K> key;             // Key stored at this node
+    FIELD<V> val;             // Value stored at this node
+    FIELD<int> color;         // color (RED or BLACK)
+    FIELD<node_t *> parent;   // pointer to parent
+    FIELD<int> ID;            // 0/1 for left/right child
+    FIELD<node_t *> child[2]; // L/R children
+
+    /// basic constructor
+    node_t(WOSTM &wo, int color, K key, V val, node_t *parent, long ID,
+           node_t *child0, node_t *child1)
+        : key(key), val(val), color(color), parent(parent), ID(ID) {
+      child[0].set(wo, this, child0);
+      child[1].set(wo, this, child1);
+    }
+  };
+
+  node_t *sentinel; // The (sentinel) root node of the tree
+
+public:
+  /// Construct a list by creating a sentinel node at the head
+  rbtree_omap(HANDSTM *me, auto *) {
+    BEGIN_WO(me);
+    sentinel = new node_t(wo, BLACK, dummy_key, dummy_val, nullptr, 0, nullptr,
+                          nullptr);
+  }
+
+  // binary search for the node that has v as its value
+  bool get(HANDSTM *me, const K &key, V &val) const {
+    BEGIN_RO(me);
+    node_t *curr = sentinel->child[0].get(ro, sentinel);
+    while (curr != nullptr && curr->key.get(ro, curr) != key)
+      curr = curr->child[(key < curr->key.get(ro, curr)) ? 0 : 1].get(ro, curr);
+    bool res = (curr != nullptr) && (curr->key.get(ro, curr) == key);
+    if (res)
+      val = curr->val.get(ro, curr);
+    return res;
+  }
+
+  // insert a node with k/v as its pair if no such key exists in the tree
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    bool res = false;
+    {
+      BEGIN_WO(me);
+      // find insertion point
+      node_t *curr = sentinel;
+      int cID = 0;
+      node_t *child = curr->child[cID].get(wo, curr);
+      while (child != nullptr) {
+        long ckey = child->key.get(wo, child);
+        if (ckey == key)
+          return false;
+        cID = key < ckey ? 0 : 1;
+        curr = child;
+        child = curr->child[cID].get(wo, curr);
+      }
+
+      // make a red node and connect it to `curr`
+      res = true;
+      child = new node_t(wo, RED, key, val, curr, cID, nullptr, nullptr);
+      curr->child[cID].set(wo, curr, child);
+
+      // balance the tree
+      while (true) {
+        // Get the parent, grandparent, and their relationship
+        node_t *parent = child->parent.get(wo, child);
+        int pID = parent->ID.get(wo, parent);
+        node_t *gparent = parent->parent.get(wo, parent);
+
+        // Easy exit condition: no more propagation needed
+        if ((gparent == sentinel) || (BLACK == parent->color.get(wo, parent)))
+          break;
+
+        // If parent's sibling is also red, we push red up to grandparent
+        node_t *psib = gparent->child[1 - pID].get(wo, gparent);
+        if ((psib != nullptr) && (RED == psib->color.get(wo, psib))) {
+          parent->color.set(wo, parent, BLACK);
+          psib->color.set(wo, psib, BLACK);
+          gparent->color.set(wo, gparent, RED);
+          child = gparent;
+          continue; // restart loop at gparent level
+        }
+
+        int cID = child->ID.get(wo, child);
+        if (cID != pID) {
+          // set child's child to parent's cID'th child
+          node_t *baby = child->child[1 - cID].get(wo, child);
+          parent->child[cID].set(wo, parent, baby);
+          if (baby != nullptr) {
+            baby->parent.set(wo, baby, parent);
+            baby->ID.set(wo, baby, cID);
+          }
+          // move parent into baby's position as a child of child
+          child->child[1 - cID].set(wo, child, parent);
+          parent->parent.set(wo, parent, child);
+          parent->ID.set(wo, parent, 1 - cID);
+          // move child into parent's spot as pID'th child of gparent
+          gparent->child[pID].set(wo, gparent, child);
+          child->parent.set(wo, child, gparent);
+          child->ID.set(wo, child, pID);
+          // now swap child with curr and fall through
+          node_t *temp = child;
+          child = parent;
+          parent = temp;
+        }
+
+        parent->color.set(wo, parent, BLACK);
+        gparent->color.set(wo, gparent, RED);
+        // promote parent
+        node_t *ggparent = gparent->parent.get(wo, gparent);
+        int gID = gparent->ID.get(wo, gparent);
+        node_t *ochild = parent->child[1 - pID].get(wo, parent);
+        // make gparent's pIDth child ochild
+        gparent->child[pID].set(wo, gparent, ochild);
+        if (ochild != nullptr) {
+          ochild->parent.set(wo, ochild, gparent);
+          ochild->ID.set(wo, ochild, pID);
+        }
+        // make gparent the 1-pID'th child of parent
+        parent->child[1 - pID].set(wo, parent, gparent);
+        gparent->parent.set(wo, gparent, parent);
+        gparent->ID.set(wo, gparent, 1 - pID);
+        // make parent the gIDth child of ggparent
+        ggparent->child[gID].set(wo, ggparent, parent);
+        parent->parent.set(wo, parent, ggparent);
+        parent->ID.set(wo, parent, gID);
+      }
+
+      // now just set the root to black
+      node_t *root = sentinel->child[0].get(wo, sentinel);
+      if (root->color.get(wo, root) != BLACK)
+        root->color.set(wo, root, BLACK);
+    }
+
+    return res;
+  }
+
+  // remove the node with k as its key if it exists in the tree
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    // find key
+    node_t *curr = sentinel->child[0].get(wo, sentinel);
+
+    while (curr != nullptr) {
+      int ckey = curr->key.get(wo, curr);
+      if (ckey == key)
+        break;
+      curr = curr->child[key < ckey ? 0 : 1].get(wo, curr);
+    }
+
+    // if we didn't find v, we're done
+    if (curr == nullptr)
+      return false;
+
+    // If `curr` has two children, we need to swap it with its successor
+    if ((curr->child[1].get(wo, curr) != nullptr) &&
+        ((curr->child[0].get(wo, curr)) != nullptr)) {
+      node_t *leftmost = curr->child[1].get(wo, curr);
+      while (leftmost->child[0].get(wo, leftmost) != nullptr)
+        leftmost = leftmost->child[0].get(wo, leftmost);
+      curr->key.set(wo, curr, leftmost->key.get(wo, leftmost));
+      curr->val.set(wo, curr, leftmost->val.get(wo, leftmost));
+      curr = leftmost;
+    }
+
+    // extract x from the tree and prep it for deletion
+    node_t *parent = curr->parent.get(wo, curr);
+    node_t *child =
+        curr->child[(curr->child[0].get(wo, curr) != nullptr) ? 0 : 1].get(
+            wo, curr);
+    int xID = curr->ID.get(wo, curr);
+    parent->child[xID].set(wo, parent, child);
+    if (child != nullptr) {
+      child->parent.set(wo, child, parent);
+      child->ID.set(wo, child, xID);
+    }
+
+    // fix black height violations
+    if ((BLACK == curr->color.get(wo, curr)) && (child != nullptr)) {
+      if (RED == child->color.get(wo, child)) {
+        curr->color.set(wo, curr, RED);
+        child->color.set(wo, child, BLACK);
+      }
+    }
+
+    // rebalance... be sure to save the deletion target!
+    node_t *to_delete = curr;
+    while (true) {
+      parent = curr->parent.get(wo, curr);
+      if ((parent == sentinel) || (RED == curr->color.get(wo, curr)))
+        break;
+      int cID = curr->ID.get(wo, curr);
+      node_t *sibling = parent->child[1 - cID].get(wo, parent);
+
+      // we'd like y's sibling s to be black
+      // if it's not, promote it and recolor
+      if (RED == sibling->color.get(wo, sibling)) {
+        /*
+            Bp          Bs
+           / \         / \
+          By  Rs  =>  Rp  B2
+          / \        / \
+         B1 B2     By  B1
+       */
+        parent->color.set(wo, parent, RED);
+        sibling->color.set(wo, sibling, BLACK);
+        // promote sibling
+        node_t *gparent = parent->parent.get(wo, parent);
+        int pID = parent->ID.get(wo, parent);
+        node_t *nephew = sibling->child[cID].get(wo, sibling);
+        // set nephew as 1-cID child of parent
+        parent->child[1 - cID].set(wo, parent, nephew);
+        nephew->parent.set(wo, nephew, parent);
+        nephew->ID.set(wo, nephew, 1 - cID);
+        // make parent the cID child of the sibling
+        sibling->child[cID].set(wo, sibling, parent);
+        parent->parent.set(wo, parent, sibling);
+        parent->ID.set(wo, parent, cID);
+        // make sibling the pID child of gparent
+        gparent->child[pID].set(wo, gparent, sibling);
+        sibling->parent.set(wo, sibling, gparent);
+        sibling->ID.set(wo, sibling, pID);
+        // reset sibling
+        sibling = nephew;
+      }
+
+      // Handle when the far nephew is red
+      node_t *n = sibling->child[1 - cID].get(wo, sibling);
+      if ((n != nullptr) && (RED == (n->color.get(wo, n)))) {
+        /*
+           ?p          ?s
+           / \         / \
+          By  Bs  =>  Bp  Bn
+         / \         / \
+        ?1 Rn      By  ?1
+        */
+        sibling->color.set(wo, sibling, parent->color.get(wo, parent));
+        parent->color.set(wo, parent, BLACK);
+        n->color.set(wo, n, BLACK);
+        // promote sibling
+        node_t *gparent = parent->parent.get(wo, parent);
+        int pID = parent->ID.get(wo, parent);
+        node_t *nephew = sibling->child[cID].get(wo, sibling);
+        // make nephew the 1-cID child of parent
+        parent->child[1 - cID].set(wo, parent, nephew);
+        if (nephew != nullptr) {
+          nephew->parent.set(wo, nephew, parent);
+          nephew->ID.set(wo, nephew, 1 - cID);
+        }
+        // make parent the cID child of the sibling
+        sibling->child[cID].set(wo, sibling, parent);
+        parent->parent.set(wo, parent, sibling);
+        parent->ID.set(wo, parent, cID);
+        // make sibling the pID child of gparent
+        gparent->child[pID].set(wo, gparent, sibling);
+        sibling->parent.set(wo, sibling, gparent);
+        sibling->ID.set(wo, sibling, pID);
+        break; // problem solved
+      }
+
+      n = sibling->child[cID].get(wo, sibling);
+      if ((n != nullptr) && (RED == (n->color.get(wo, n)))) {
+        /*
+             ?p          ?p
+             / \         / \
+           By  Bs  =>  By  Bn
+               / \           \
+              Rn B1          Rs
+                               \
+                               B1
+        */
+        sibling->color.set(wo, sibling, RED);
+        n->color.set(wo, n, BLACK);
+        // promote n
+        node_t *gneph = n->child[1 - cID].get(wo, n);
+        // make gneph the cID child of sibling
+        sibling->child[cID].set(wo, sibling, gneph);
+        if (gneph != nullptr) {
+          gneph->parent.set(wo, gneph, sibling);
+          gneph->ID.set(wo, gneph, cID);
+        }
+        // make sibling the 1-cID child of n
+        n->child[1 - cID].set(wo, n, sibling);
+        sibling->parent.set(wo, sibling, n);
+        sibling->ID.set(wo, sibling, 1 - cID);
+        // make n the 1-cID child of parent
+        parent->child[1 - cID].set(wo, parent, n);
+        n->parent.set(wo, n, parent);
+        n->ID.set(wo, n, 1 - cID);
+        // swap sibling and `n`
+        node_t *temp = sibling;
+        sibling = n;
+        n = temp;
+
+        // now the far nephew is red... copy of code from above
+        sibling->color.set(wo, sibling, parent->color.get(wo, parent));
+        parent->color.set(wo, parent, BLACK);
+        n->color.set(wo, n, BLACK);
+        // promote sibling
+        node_t *gparent = parent->parent.get(wo, parent);
+        int pID = parent->ID.get(wo, parent);
+        node_t *nephew = sibling->child[cID].get(wo, sibling);
+        // make nephew the 1-cID child of parent
+        parent->child[1 - cID].set(wo, parent, nephew);
+        if (nephew != nullptr) {
+          nephew->parent.set(wo, nephew, parent);
+          nephew->ID.set(wo, nephew, 1 - cID);
+        }
+        // make parent the cID child of the sibling
+        sibling->child[cID].set(wo, sibling, parent);
+        parent->parent.set(wo, parent, sibling);
+        parent->ID.set(wo, parent, cID);
+        // make sibling the pID child of gparent
+        gparent->child[pID].set(wo, gparent, sibling);
+        sibling->parent.set(wo, sibling, gparent);
+        sibling->ID.set(wo, sibling, pID);
+
+        break; // problem solved
+      }
+
+      /*
+           ?p          ?p
+           / \         / \
+         Bx  Bs  =>  Bp  Rs
+             / \         / \
+            B1 B2      B1  B2
+       */
+
+      sibling->color.set(wo, sibling, RED); // propagate upwards
+
+      // advance to parent and balance again
+      curr = parent;
+    }
+
+    // if curr was red, this fixes the balance
+    curr->color.set(wo, curr, BLACK);
+
+    // free the node and return
+    wo.reclaim(to_delete);
+
+    return true;
+  }
+};
diff --git a/artifact/ds/handSTM/rbtree_omap_opt.h b/artifact/ds/handSTM/rbtree_omap_opt.h
new file mode 100644
index 0000000..13d6633
--- /dev/null
+++ b/artifact/ds/handSTM/rbtree_omap_opt.h
@@ -0,0 +1,385 @@
+#pragma once
+
+/// An ordered map, implemented as a balanced, internal binary search tree. This
+/// map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param HANDSTM    A thread descriptor type, for safe memory reclamation
+/// @param dummy_key  A default key to use
+/// @param dummy_val  A default value to use
+template <typename K, typename V, class HANDSTM, K dummy_key, V dummy_val>
+class rbtree_omap_opt {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  static const int RED = 0;   // Enum for red
+  static const int BLACK = 1; // Enum for black
+
+  /// nodes in a red/black tree
+  struct node_t : ownable_t {
+    FIELD<K> key;             // Key stored at this node
+    FIELD<V> val;             // Value stored at this node
+    FIELD<int> color;         // color (RED or BLACK)
+    FIELD<node_t *> parent;   // pointer to parent
+    FIELD<int> ID;            // 0/1 for left/right child
+    FIELD<node_t *> child[2]; // L/R children
+
+    /// basic constructor
+    node_t(WOSTM &wo, int color, K key, V val, node_t *parent, long ID,
+           node_t *child0, node_t *child1)
+        : key(key), val(val), color(color), parent(parent), ID(ID) {
+      child[0].set_cap(wo, this, child0);
+      child[1].set_cap(wo, this, child1);
+    }
+  };
+
+  node_t *sentinel; // The (sentinel) root node of the tree
+
+public:
+  /// Construct a list by creating a sentinel node at the head
+  rbtree_omap_opt(HANDSTM *me, auto *) {
+    BEGIN_WO(me);
+    sentinel = new node_t(wo, BLACK, dummy_key, dummy_val, nullptr, 0, nullptr,
+                          nullptr);
+  }
+
+  // binary search for the node that has v as its value
+  bool get(HANDSTM *me, const K &key, V &val) const {
+    BEGIN_RO(me);
+    node_t *curr = sentinel->child[0].get(ro, sentinel);
+    K k = key;
+    while (true) {
+      if (curr == nullptr)
+        break;
+      k = curr->key.get(ro, curr);
+      if (k == key)
+        break;
+      curr = curr->child[key < k ? 0 : 1].re_get(ro, curr);
+    }
+    bool res = (curr != nullptr) && (k == key);
+    if (res)
+      val = curr->val.re_get(ro, curr);
+    return res;
+  }
+
+  // insert a node with k/v as its pair if no such key exists in the tree
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    bool res = false;
+    {
+      BEGIN_WO(me);
+      // find insertion point
+      node_t *curr = sentinel;
+      int cID = 0;
+      node_t *child = curr->child[cID].get(wo, curr);
+      while (child != nullptr) {
+        auto ckey = child->key.get(wo, child);
+        if (ckey == key)
+          return false;
+        cID = key < ckey ? 0 : 1;
+        curr = child;
+        child = curr->child[cID].re_get(wo, curr);
+      }
+
+      // make a red node and connect it to `curr`
+      res = true;
+      child = new node_t(wo, RED, key, val, curr, cID, nullptr, nullptr);
+      curr->child[cID].set(wo, curr, child);
+
+      // balance the tree
+      while (true) {
+        // Get the parent, grandparent, and their relationship
+        // NB: child is captured or owned
+        node_t *parent = child->parent.get_mine(wo, child);
+        int pID = parent->ID.get(wo, parent);
+        auto parentcolor = parent->color.get_in_seq(wo, parent);
+        node_t *gparent = parent->parent.re_get(wo, parent);
+
+        // Easy exit condition: no more propagation needed
+        if ((gparent == sentinel) || (BLACK == parentcolor))
+          break;
+
+        // If parent's sibling is also red, we push red up to grandparent
+        node_t *psib = gparent->child[1 - pID].get(wo, gparent);
+        if ((psib != nullptr) && (RED == psib->color.get(wo, psib))) {
+          parent->color.set(wo, parent, BLACK);
+          psib->color.set(wo, psib, BLACK);
+          gparent->color.set(wo, gparent, RED);
+          child = gparent;
+          continue; // restart loop at gparent level
+        }
+
+        int cID = child->ID.get_mine(wo, child);
+        if (cID != pID) {
+          // set child's child to parent's cID'th child
+          node_t *baby = child->child[1 - cID].get_mine(wo, child);
+          parent->child[cID].set(wo, parent, baby);
+          if (baby != nullptr) {
+            baby->parent.set(wo, baby, parent);
+            baby->ID.set_mine(wo, baby, cID);
+          }
+          // move parent into baby's position as a child of child
+          child->child[1 - cID].set_mine(wo, child, parent);
+          parent->parent.set_mine(wo, parent, child);
+          parent->ID.set_mine(wo, parent, 1 - cID);
+          // move child into parent's spot as pID'th child of gparent
+          gparent->child[pID].set(wo, gparent, child);
+          child->parent.set_mine(wo, child, gparent);
+          child->ID.set_mine(wo, child, pID);
+          // now swap child with parent and fall through
+          node_t *temp = child;
+          child = parent;
+          parent = temp;
+        }
+
+        parent->color.set(wo, parent, BLACK);
+        gparent->color.set(wo, gparent, RED);
+        // promote parent
+        node_t *ggparent = gparent->parent.get_mine(wo, gparent);
+        int gID = gparent->ID.get_mine(wo, gparent);
+        node_t *ochild = parent->child[1 - pID].get_mine(wo, parent);
+        // make gparent's pIDth child ochild
+        gparent->child[pID].set_mine(wo, gparent, ochild);
+        if (ochild != nullptr) {
+          ochild->parent.set(wo, ochild, gparent);
+          ochild->ID.set_mine(wo, ochild, pID);
+        }
+        // make gparent the 1-pID'th child of parent
+        parent->child[1 - pID].set_mine(wo, parent, gparent);
+        gparent->parent.set_mine(wo, gparent, parent);
+        gparent->ID.set_mine(wo, gparent, 1 - pID);
+        // make parent the gIDth child of ggparent
+        ggparent->child[gID].set(wo, ggparent, parent);
+        parent->parent.set_mine(wo, parent, ggparent);
+        parent->ID.set_mine(wo, parent, gID);
+      }
+
+      // now just set the root to black
+      node_t *root = sentinel->child[0].get(wo, sentinel);
+      if (root->color.get(wo, root) != BLACK)
+        root->color.set(wo, root, BLACK);
+    }
+
+    return res;
+  }
+
+  // remove the node with k as its key if it exists in the tree
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    // find key
+    node_t *curr = sentinel->child[0].get(wo, sentinel);
+
+    while (curr != nullptr) {
+      auto ckey = curr->key.get(wo, curr);
+      if (ckey == key)
+        break;
+      curr = curr->child[key < ckey ? 0 : 1].re_get(wo, curr);
+    }
+
+    // if we didn't find v, we're done
+    if (curr == nullptr)
+      return false;
+
+    // If `curr` has two children, we need to swap it with its successor
+    if ((curr->child[1].get(wo, curr) != nullptr) &&
+        ((curr->child[0].get(wo, curr)) != nullptr)) {
+      auto lchild = curr->child[0].get_in_seq(wo, curr);
+      auto rchild = curr->child[1].re_get(wo, curr);
+      if ((lchild != nullptr) && (rchild != nullptr)) {
+        node_t *leftmost = rchild;
+        while (true) {
+          auto next = leftmost->child[0].get(wo, leftmost);
+          if (next == nullptr)
+            break;
+          leftmost = next;
+        }
+        auto lk = leftmost->key.get_in_seq(wo, leftmost);
+        auto lv = leftmost->val.re_get(wo, leftmost);
+        curr->key.set(wo, curr, lk);
+        curr->val.set_mine(wo, curr, lv);
+        curr = leftmost;
+      }
+    }
+
+    // extract x from the tree and prep it for deletion
+    node_t *parent = curr->parent.get(wo, curr);
+    node_t *lchild = curr->child[0].get_in_seq(wo, curr);
+    node_t *rchild = curr->child[1].get_in_seq(wo, curr);
+    node_t *child = (lchild != nullptr) ? lchild : rchild;
+    int xID = curr->ID.re_get(wo, curr);
+    parent->child[xID].set(wo, parent, child);
+    if (child != nullptr) {
+      child->parent.set(wo, child, parent);
+      child->ID.set_mine(wo, child, xID);
+    }
+
+    // fix black height violations
+    if ((BLACK == curr->color.re_get(wo, curr)) && (child != nullptr)) {
+      if (RED == child->color.get_mine(wo, child)) {
+        curr->color.set(wo, curr, RED);
+        child->color.set_mine(wo, child, BLACK);
+      }
+    }
+
+    // rebalance... be sure to save the deletion target!
+    node_t *to_delete = curr;
+    while (true) {
+      parent = curr->parent.get(wo, curr);
+      if ((parent == sentinel) || (RED == curr->color.re_get(wo, curr)))
+        break;
+      int cID = curr->ID.re_get(wo, curr);
+      node_t *sibling = parent->child[1 - cID].get(wo, parent);
+
+      // we'd like y's sibling s to be black
+      // if it's not, promote it and recolor
+      if (RED == sibling->color.get(wo, sibling)) {
+        /*
+            Bp          Bs
+           / \         / \
+          By  Rs  =>  Rp  B2
+          / \        / \
+         B1 B2     By  B1
+       */
+        parent->color.set(wo, parent, RED);
+        sibling->color.set(wo, sibling, BLACK);
+        // promote sibling
+        node_t *gparent = parent->parent.get_mine(wo, parent);
+        int pID = parent->ID.get_mine(wo, parent);
+        node_t *nephew = sibling->child[cID].get_mine(wo, sibling);
+        // set nephew as 1-cID child of parent
+        parent->child[1 - cID].set_mine(wo, parent, nephew);
+        nephew->parent.set(wo, nephew, parent);
+        nephew->ID.set_mine(wo, nephew, 1 - cID);
+        // make parent the cID child of the sibling
+        sibling->child[cID].set_mine(wo, sibling, parent);
+        parent->parent.set_mine(wo, parent, sibling);
+        parent->ID.set_mine(wo, parent, cID);
+        // make sibling the pID child of gparent
+        gparent->child[pID].set(wo, gparent, sibling);
+        sibling->parent.set_mine(wo, sibling, gparent);
+        sibling->ID.set_mine(wo, sibling, pID);
+        // reset sibling
+        sibling = nephew;
+      }
+
+      // Handle when the far nephew is red
+      node_t *n = sibling->child[1 - cID].re_get(wo, sibling);
+      if ((n != nullptr) && (RED == (n->color.get(wo, n)))) {
+        /*
+           ?p          ?s
+           / \         / \
+          By  Bs  =>  Bp  Bn
+         / \         / \
+        ?1 Rn      By  ?1
+        */
+        sibling->color.set(wo, sibling, parent->color.re_get(wo, parent));
+        parent->color.set(wo, parent, BLACK);
+        n->color.set(wo, n, BLACK);
+        // promote sibling
+        node_t *gparent = parent->parent.get_mine(wo, parent);
+        int pID = parent->ID.get_mine(wo, parent);
+        node_t *nephew = sibling->child[cID].get_mine(wo, sibling);
+        // make nephew the 1-cID child of parent
+        parent->child[1 - cID].set_mine(wo, parent, nephew);
+        if (nephew != nullptr) {
+          nephew->parent.set(wo, nephew, parent);
+          nephew->ID.set_mine(wo, nephew, 1 - cID);
+        }
+        // make parent the cID child of the sibling
+        sibling->child[cID].set_mine(wo, sibling, parent);
+        parent->parent.set_mine(wo, parent, sibling);
+        parent->ID.set_mine(wo, parent, cID);
+        // make sibling the pID child of gparent
+        gparent->child[pID].set(wo, gparent, sibling);
+        sibling->parent.set_mine(wo, sibling, gparent);
+        sibling->ID.set_mine(wo, sibling, pID);
+        break; // problem solved
+      }
+
+      n = sibling->child[cID].re_get(wo, sibling);
+      if ((n != nullptr) && (RED == (n->color.get(wo, n)))) {
+        /*
+             ?p          ?p
+             / \         / \
+           By  Bs  =>  By  Bn
+               / \           \
+              Rn B1          Rs
+                               \
+                               B1
+        */
+        sibling->color.set(wo, sibling, RED);
+        n->color.set(wo, n, BLACK);
+        // promote n
+        node_t *gneph = n->child[1 - cID].get_mine(wo, n);
+        // make gneph the cID child of sibling
+        sibling->child[cID].set_mine(wo, sibling, gneph);
+        if (gneph != nullptr) {
+          gneph->parent.set(wo, gneph, sibling);
+          gneph->ID.set_mine(wo, gneph, cID);
+        }
+        // make sibling the 1-cID child of n
+        n->child[1 - cID].set_mine(wo, n, sibling);
+        sibling->parent.set_mine(wo, sibling, n);
+        sibling->ID.set_mine(wo, sibling, 1 - cID);
+        // make n the 1-cID child of parent
+        parent->child[1 - cID].set(wo, parent, n);
+        n->parent.set_mine(wo, n, parent);
+        n->ID.set_mine(wo, n, 1 - cID);
+        // swap sibling and `n`
+        node_t *temp = sibling;
+        sibling = n;
+        n = temp;
+
+        // now the far nephew is red... copy of code from above
+        sibling->color.set_mine(wo, sibling,
+                                parent->color.get_mine(wo, parent));
+        parent->color.set_mine(wo, parent, BLACK);
+        n->color.set_mine(wo, n, BLACK);
+        // promote sibling
+        node_t *gparent = parent->parent.get_mine(wo, parent);
+        int pID = parent->ID.get_mine(wo, parent);
+        node_t *nephew = sibling->child[cID].get_mine(wo, sibling);
+        // make nephew the 1-cID child of parent
+        parent->child[1 - cID].set_mine(wo, parent, nephew);
+        if (nephew != nullptr) {
+          nephew->parent.set(wo, nephew, parent);
+          nephew->ID.set_mine(wo, nephew, 1 - cID);
+        }
+        // make parent the cID child of the sibling
+        sibling->child[cID].set_mine(wo, sibling, parent);
+        parent->parent.set_mine(wo, parent, sibling);
+        parent->ID.set_mine(wo, parent, cID);
+        // make sibling the pID child of gparent
+        gparent->child[pID].set(wo, gparent, sibling);
+        sibling->parent.set_mine(wo, sibling, gparent);
+        sibling->ID.set_mine(wo, sibling, pID);
+
+        break; // problem solved
+      }
+
+      /*
+           ?p          ?p
+           / \         / \
+         Bx  Bs  =>  Bp  Rs
+             / \         / \
+            B1 B2      B1  B2
+       */
+
+      sibling->color.set(wo, sibling, RED); // propagate upwards
+
+      // advance to parent and balance again
+      curr = parent;
+    }
+
+    // if curr was red, this fixes the balance
+    curr->color.set(wo, curr, BLACK);
+
+    // free the node and return
+    wo.reclaim(to_delete);
+
+    return true;
+  }
+};
diff --git a/artifact/ds/handSTM/rbtree_tl2_omap.h b/artifact/ds/handSTM/rbtree_tl2_omap.h
new file mode 100644
index 0000000..b95d4cc
--- /dev/null
+++ b/artifact/ds/handSTM/rbtree_tl2_omap.h
@@ -0,0 +1,437 @@
+/* =============================================================================
+ *
+ * rbtree.h
+ * -- Red-black balanced binary search tree
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Sun Microsystems Inc., 2006.  All Rights Reserved.
+ * Authors: Dave Dice, Nir Shavit, Ori Shalev.
+ *
+ * STM: Transactional Locking for Disjoint Access Parallelism
+ *
+ * Transactional Locking II,
+ * Dave Dice, Ori Shalev, Nir Shavit
+ * DISC 2006, Sept 2006, Stockholm, Sweden.
+ */
+
+#pragma once
+
+// NB: The tl2 rbtree does not have a sentinel that points to the root.
+//     Consequently, we will make this an ownable_t, so it can have an orec to
+//     protect the root pointer.
+template <typename K, typename V, class HANDSTM, K dummy_key, V dummy_val>
+class rbtree_tl2_omap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  static const int RED = 0;   // Enum for red
+  static const int BLACK = 1; // Enum for black
+
+  struct node_t : ownable_t {
+    FIELD<K> key;
+    FIELD<V> val;
+    FIELD<node_t *> p;
+    FIELD<node_t *> l;
+    FIELD<node_t *> r;
+    FIELD<int> c;
+    char dummy[64];
+
+    node_t(WOSTM &wo, const K &_key, V &_val) {
+      key.set(wo, this, _key);
+      val.set(wo, this, _val);
+      p.set(wo, this, nullptr);
+      l.set(wo, this, nullptr);
+      r.set(wo, this, nullptr);
+      c.set(wo, this, RED);
+    }
+  };
+
+  ownable_t root_orec;
+  FIELD<node_t *> root;
+
+  char dummy[64];
+
+  node_t *lookup(STM &tx, K k) {
+    node_t *p = this->root.get(tx, &root_orec);
+    while (p != nullptr) {
+      if (k == p->key.get(tx, p)) {
+        return p;
+      }
+      p = (k < p->key.get(tx, p)) ? leftOf(tx, p) : rightOf(tx, p);
+    }
+    return nullptr;
+  }
+
+  void rotateLeft(WOSTM &wo, node_t *x) {
+    node_t *r = rightOf(wo, x);
+    node_t *rl = leftOf(wo, r);
+    setRight(wo, x, rl);
+    if (rl != nullptr) {
+      setParent(wo, rl, x);
+    }
+
+    node_t *xp = parentOf(wo, x);
+    setParent(wo, r, xp);
+    if (xp == nullptr) {
+      this->root.set(wo, &root_orec, r);
+    } else if (leftOf(wo, xp) == x) {
+      setLeft(wo, xp, r);
+    } else {
+      setRight(wo, xp, r);
+    }
+    setLeft(wo, r, x);
+    setParent(wo, x, r);
+  }
+
+  void rotateRight(WOSTM &wo, node_t *x) {
+    node_t *l = leftOf(wo, x);
+    node_t *lr = rightOf(wo, l);
+    setLeft(wo, x, lr);
+    if (lr != nullptr) {
+      setParent(wo, lr, x);
+    }
+    node_t *xp = parentOf(wo, x);
+    setParent(wo, l, xp);
+    if (xp == nullptr) {
+      this->root.set(wo, &root_orec, l);
+    } else if (rightOf(wo, xp) == x) {
+      setRight(wo, xp, l);
+    } else {
+      setLeft(wo, xp, l);
+    }
+    setRight(wo, l, x);
+    setParent(wo, x, l);
+  }
+
+  node_t *parentOf(WOSTM &wo, node_t *n) {
+    return (n ? n->p.get(wo, n) : nullptr);
+  }
+
+  node_t *leftOf(STM &tx, node_t *n) { return (n ? n->l.get(tx, n) : nullptr); }
+
+  node_t *rightOf(STM &tx, node_t *n) {
+    return (n ? n->r.get(tx, n) : nullptr);
+  }
+
+  int colorOf(WOSTM &wo, node_t *n) { return (n ? n->c.get(wo, n) : BLACK); }
+
+  void setColor(WOSTM &wo, node_t *n, int c) {
+    if (n != nullptr) {
+      n->c.set(wo, n, c);
+    }
+  }
+
+  void setParent(WOSTM &wo, node_t *n, node_t *p) {
+    if (n != nullptr) {
+      n->p.set(wo, n, p);
+    }
+  }
+
+  void setLeft(WOSTM &wo, node_t *n, node_t *l) {
+    if (n != nullptr) {
+      n->l.set(wo, n, l);
+    }
+  }
+
+  void setRight(WOSTM &wo, node_t *n, node_t *r) {
+    if (n != nullptr) {
+      n->r.set(wo, n, r);
+    }
+  }
+
+  void fixAfterInsertion(WOSTM &wo, node_t *x) {
+    setColor(wo, x, RED);
+    while (x != nullptr && x != this->root.get(wo, &root_orec)) {
+      node_t *xp = parentOf(wo, x);
+      if (colorOf(wo, xp) != RED) {
+        break;
+      }
+
+      if (parentOf(wo, x) == leftOf(wo, parentOf(wo, parentOf(wo, x)))) {
+        node_t *y = rightOf(wo, parentOf(wo, parentOf(wo, x)));
+        if (colorOf(wo, y) == RED) {
+          setColor(wo, parentOf(wo, x), BLACK);
+          setColor(wo, y, BLACK);
+          setColor(wo, parentOf(wo, parentOf(wo, x)), RED);
+          x = parentOf(wo, parentOf(wo, x));
+        } else {
+          if (x == rightOf(wo, parentOf(wo, x))) {
+            x = parentOf(wo, x);
+            rotateLeft(wo, x);
+          }
+          setColor(wo, parentOf(wo, x), BLACK);
+          setColor(wo, parentOf(wo, parentOf(wo, x)), RED);
+          if (parentOf(wo, parentOf(wo, x)) != nullptr) {
+            rotateRight(wo, parentOf(wo, parentOf(wo, x)));
+          }
+        }
+      } else {
+        node_t *y = leftOf(wo, parentOf(wo, parentOf(wo, x)));
+        if (colorOf(wo, y) == RED) {
+          setColor(wo, parentOf(wo, x), BLACK);
+          setColor(wo, y, BLACK);
+          setColor(wo, parentOf(wo, parentOf(wo, x)), RED);
+          x = parentOf(wo, parentOf(wo, x));
+        } else {
+          if (x == leftOf(wo, parentOf(wo, x))) {
+            x = parentOf(wo, x);
+            rotateRight(wo, x);
+          }
+          setColor(wo, parentOf(wo, x), BLACK);
+          setColor(wo, parentOf(wo, parentOf(wo, x)), RED);
+          if (parentOf(wo, parentOf(wo, x)) != nullptr) {
+            rotateLeft(wo, parentOf(wo, parentOf(wo, x)));
+          }
+        }
+      }
+    }
+    node_t *rt = this->root.get(wo, &root_orec);
+    if (colorOf(wo, rt) != BLACK) {
+      setColor(wo, rt, BLACK);
+    }
+  }
+
+  node_t *insertOrGet(WOSTM &wo, K k, V v, node_t *n) {
+    node_t *t = this->root.get(wo, &root_orec);
+    if (t == nullptr) {
+      if (n == nullptr) {
+        return nullptr;
+      }
+      setColor(wo, n, BLACK);
+      this->root.set(wo, &root_orec, n);
+      return nullptr;
+    }
+
+    for (;;) {
+      if (k == t->key.get(wo, t)) {
+        return t;
+      } else if (k < t->key.get(wo, t)) {
+        node_t *tl = leftOf(wo, t);
+        if (tl != nullptr) {
+          t = tl;
+        } else {
+          setParent(wo, n, t);
+          setLeft(wo, t, n);
+          fixAfterInsertion(wo, n);
+          return nullptr;
+        }
+      } else {
+        node_t *tr = rightOf(wo, t);
+        if (tr != nullptr) {
+          t = tr;
+        } else {
+          setParent(wo, n, t);
+          setRight(wo, t, n);
+          fixAfterInsertion(wo, n);
+          return nullptr;
+        }
+      }
+    }
+  }
+
+  node_t *successor(WOSTM &wo, node_t *t) {
+    if (t == nullptr) {
+      return nullptr;
+    } else if (rightOf(wo, t) != nullptr) {
+      node_t *p = rightOf(wo, t);
+      while (leftOf(wo, p) != nullptr) {
+        p = leftOf(wo, p);
+      }
+      return p;
+    } else {
+      node_t *p = parentOf(wo, t);
+      node_t *ch = t;
+      while (p != nullptr && ch == rightOf(wo, p)) {
+        ch = p;
+        p = parentOf(wo, p);
+      }
+      return p;
+    }
+  }
+
+  void fixAfterDeletion(WOSTM &wo, node_t *x) {
+    while (x != this->root.get(wo, &root_orec) && colorOf(wo, x) == BLACK) {
+      if (x == leftOf(wo, parentOf(wo, x))) {
+        node_t *sib = rightOf(wo, parentOf(wo, x));
+        if (colorOf(wo, sib) == RED) {
+          setColor(wo, sib, BLACK);
+          setColor(wo, parentOf(wo, x), RED);
+          rotateLeft(wo, parentOf(wo, x));
+          sib = rightOf(wo, parentOf(wo, x));
+        }
+        if (colorOf(wo, leftOf(wo, sib)) == BLACK &&
+            colorOf(wo, rightOf(wo, sib)) == BLACK) {
+          setColor(wo, sib, RED);
+          x = parentOf(wo, x);
+        } else {
+          if (colorOf(wo, rightOf(wo, sib)) == BLACK) {
+            setColor(wo, leftOf(wo, sib), BLACK);
+            setColor(wo, sib, RED);
+            rotateRight(wo, sib);
+            sib = rightOf(wo, parentOf(wo, x));
+          }
+          setColor(wo, sib, colorOf(wo, parentOf(wo, x)));
+          setColor(wo, parentOf(wo, x), BLACK);
+          setColor(wo, rightOf(wo, sib), BLACK);
+          rotateLeft(wo, parentOf(wo, x));
+
+          x = this->root.get(wo, &root_orec);
+        }
+      } else {
+        node_t *sib = leftOf(wo, parentOf(wo, x));
+        if (colorOf(wo, sib) == RED) {
+          setColor(wo, sib, BLACK);
+          setColor(wo, parentOf(wo, x), RED);
+          rotateRight(wo, parentOf(wo, x));
+          sib = leftOf(wo, parentOf(wo, x));
+        }
+        if (colorOf(wo, rightOf(wo, sib)) == BLACK &&
+            colorOf(wo, leftOf(wo, sib)) == BLACK) {
+          setColor(wo, sib, RED);
+          x = parentOf(wo, x);
+        } else {
+          if (colorOf(wo, leftOf(wo, sib)) == BLACK) {
+            setColor(wo, rightOf(wo, sib), BLACK);
+            setColor(wo, sib, RED);
+            rotateLeft(wo, sib);
+            sib = leftOf(wo, parentOf(wo, x));
+          }
+          setColor(wo, sib, colorOf(wo, parentOf(wo, x)));
+          setColor(wo, parentOf(wo, x), BLACK);
+          setColor(wo, leftOf(wo, sib), BLACK);
+          rotateRight(wo, parentOf(wo, x));
+
+          x = this->root.get(wo, &root_orec);
+        }
+      }
+    }
+
+    if (x != nullptr && colorOf(wo, x) != BLACK) {
+      setColor(wo, x, BLACK);
+    }
+  }
+
+  node_t *delete_node(WOSTM &wo, node_t *p) {
+
+    if (leftOf(wo, p) != nullptr && rightOf(wo, p) != nullptr) {
+      node_t *s = successor(wo, p);
+      ((p)->key) = ((((((s))->key))));
+      ((p)->val) = ((((((s))->val))));
+      p = s;
+    }
+
+    node_t *replacement =
+        ((leftOf(wo, p) != nullptr) ? leftOf(wo, p) : rightOf(wo, p));
+
+    if (replacement != nullptr) {
+      setParent(wo, replacement, parentOf(wo, p));
+      node_t *pp = parentOf(wo, p);
+      if (pp == nullptr) {
+        this->root.set(wo, &root_orec, replacement);
+      } else if (p == leftOf(wo, pp)) {
+        setLeft(wo, pp, replacement);
+      } else {
+        setRight(wo, pp, replacement);
+      }
+
+      setLeft(wo, p, nullptr);
+      setRight(wo, p, nullptr);
+      setParent(wo, p, nullptr);
+
+      if (colorOf(wo, p) == BLACK) {
+        fixAfterDeletion(wo, replacement);
+      }
+    } else if (parentOf(wo, p) == nullptr) {
+      this->root.set(wo, &root_orec, nullptr);
+    } else {
+      if (colorOf(wo, p) == BLACK) {
+        fixAfterDeletion(wo, p);
+      }
+      node_t *pp = parentOf(wo, p);
+      if (pp != nullptr) {
+        if (p == leftOf(wo, pp)) {
+          setLeft(wo, pp, nullptr);
+        } else if (p == rightOf(wo, pp)) {
+          setRight(wo, pp, nullptr);
+        }
+        setParent(wo, p, nullptr);
+      }
+    }
+    return p;
+  }
+
+  void releaseNode(WOSTM &wo, node_t *n) { wo.reclaim(n); }
+
+  void freeNode(WOSTM &wo, node_t *n) {
+    if (n) {
+      freeNode(leftOf(wo, n));
+      freeNode(rightOf(wo, n));
+      releaseNode(n);
+    }
+  }
+
+public:
+  rbtree_tl2_omap(HANDSTM *me, auto *cfg) {
+    BEGIN_WO(me);
+    root.set(wo, &root_orec, nullptr);
+  }
+
+  void rbtree_free(rbtree_tl2_omap *r) {
+    freeNode(r->root);
+    free(r);
+  }
+
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    BEGIN_WO(me);
+    node_t *node = new node_t(wo, key, val);
+    node_t *ex = insertOrGet(wo, key, val, node);
+    if (ex != nullptr) {
+      releaseNode(wo, node);
+    }
+    return ((ex == nullptr) ? true : false);
+  }
+
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    node_t *node = nullptr;
+    node = lookup(wo, key);
+    if (node != nullptr) {
+      node = delete_node(wo, node);
+    }
+    if (node != nullptr) {
+      releaseNode(wo, node);
+    }
+    return ((node != nullptr) ? true : false);
+  }
+
+  bool rbtree_update(HANDSTM *me, K key, V val) {
+    BEGIN_WO(me);
+    node_t *nn = new node_t(key, val);
+    node_t *ex = insertOrGet(wo, key, val, nn);
+    if (ex != nullptr) {
+      ex->val = val;
+      releaseNode(nn);
+      return true;
+    }
+    return false;
+  }
+
+  V get(HANDSTM *me, const K &key, V &val) {
+    BEGIN_RO(me);
+    node_t *n = lookup(ro, key);
+    if (n != nullptr) {
+      val = n->val.get(ro, n);
+      return true;
+    }
+    return false;
+  }
+
+  bool rbtree_contains(K key) {
+    node_t *n = lookup(key);
+    return (n != nullptr);
+  }
+};
diff --git a/artifact/ds/handSTM/skiplist_omap_bigtx.h b/artifact/ds/handSTM/skiplist_omap_bigtx.h
new file mode 100644
index 0000000..564ce18
--- /dev/null
+++ b/artifact/ds/handSTM/skiplist_omap_bigtx.h
@@ -0,0 +1,334 @@
+#pragma once
+
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+
+/// NB: in this implementation we use same orec for each node and its chimney
+/// nodes
+
+/// An ordered map, implemented as a doubly-linked skip list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param HANDSTM    A thread descriptor type, for safe memory reclamation
+/// @param dummy_key  A fake key, to use in sentinel nodes
+/// @param dummy_val  A fake value, to use in sentinel nodes
+template <typename K, typename V, class HANDSTM, K dummy_key, V dummy_val>
+class skiplist_omap_bigtx {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  /// data_t is a node in the skip list.  It has a key, a value, an owner, a
+  /// state, and a "tower" of predecessor and successor pointers
+  ///
+  /// NB: Height isn't always the size of tower... it tracks how many levels are
+  ///     fully and correctly stitched, so it changes during insertion and
+  ///     removal.
+  struct data_t : ownable_t {
+    /// A pair of data pointers, for the successor and predecessor at a level of
+    /// the tower
+    struct level_t {
+      FIELD<data_t *> next; // Succ at this level
+      FIELD<data_t *> prev; // Pred at this level
+    };
+
+    const K key;          // The key stored in this node
+    FIELD<V> val;         // The value stored in this node
+    const uint8_t height; // # valid tower nodes
+    level_t tower[0];     // Tower of pointers to pred/succ
+
+  private:
+    /// Construct a data node.  This is private to force the use of our make_*
+    /// methods, which handle allocating enough space for the tower.
+    ///
+    /// @param _key    The key that is stored in this node
+    /// @param _val    The value that is stored in this node
+    data_t(K _key, V _val, uint8_t _height)
+        : key(_key), val(_val), height(_height) {}
+
+  public:
+    /// Construct a sentinel (head or tail) node.  Note that the sentinels can't
+    /// easily be of a node type that lacks key and value fields, or else the
+    /// variable-length array would preclude inheriting from it.
+    ///
+    /// @param iHeight  The max number of index layers this node will have
+    static data_t *make_sentinel(uint8_t iHeight) {
+      int node_size = sizeof(data_t) + (iHeight + 1) * sizeof(level_t);
+      void *region = malloc(node_size);
+      return new (region) data_t(dummy_key, dummy_val, iHeight);
+    }
+
+    /// Construct a data node
+    ///
+    /// @param iHeight The max number of index layers this node will have
+    /// @param key     The key to store in this node
+    /// @param val     The value to store in this node
+    static data_t *make_data(WOSTM &wo, uint64_t iHeight, K key, V val) {
+      int node_size = sizeof(data_t) + (iHeight + 1) * sizeof(level_t);
+      void *region = malloc(node_size);
+      return wo.LOG_NEW(new (region) data_t(key, val, iHeight));
+    }
+  };
+
+  const int NUM_INDEX_LAYERS;   // # of index layers.  Doesn't count data layer
+  const int SNAPSHOT_FREQUENCY; // # of nodes between snapshots
+  data_t *const head;           // The head sentinel
+  data_t *const tail;           // The tail sentinel
+
+public:
+  /// Default construct a skip list by stitching a head sentinel to a tail
+  /// sentinel at each level
+  ///
+  /// @param _op The operation that is constructing the list
+  /// @param cfg A configuration object that has a `snapshot_freq` field
+  skiplist_omap_bigtx(HANDSTM *me, auto *cfg)
+      : NUM_INDEX_LAYERS(cfg->max_levels),
+        SNAPSHOT_FREQUENCY(cfg->snapshot_freq),
+        head(data_t::make_sentinel(NUM_INDEX_LAYERS)),
+        tail(data_t::make_sentinel(NUM_INDEX_LAYERS)) {
+    // NB: Even though the constructor is operating on private data, it needs a
+    //     TM context in order to set the head and tail's towers to each other
+    BEGIN_WO(me);
+    for (auto i = 0; i <= NUM_INDEX_LAYERS; i++) {
+      head->tower[i].next.set(wo, head, tail);
+      tail->tower[i].prev.set(wo, tail, head);
+    }
+  }
+
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HANDSTM *me, const K &key, V &val) {
+    BEGIN_RO(me);
+    // Do a leq... if head, we fail.  n will never be null or tail
+    auto n = get_leq(ro, key);
+    if (n == head || n->key != key)
+      return false;
+
+    val = n->val.get(ro, n);
+    return true;
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's HANDSTM
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(HANDSTM *me, const K &key, V &val) {
+
+    BEGIN_WO(me);
+    data_t *new_dn = nullptr;            // The node that we insert, if any
+    int target_height = randomLevel(me); // The target index height of new_dn
+    // This transaction linearizes the insert by adding the node to the data
+    // layer
+
+    // Get the insertion point, make sure `key` not already present
+    auto n = get_leq(wo, key);
+
+    if (n != head && n->key == key)
+      return false;
+
+    // Stitch new node in at lowest level.  Get() can see it immediately.
+    // Remove() has to wait
+    auto next = n->tower[0].next.get(wo, n);
+    new_dn = data_t::make_data(wo, target_height, key, val);
+    new_dn->tower[0].next.set(wo, new_dn, next);
+    new_dn->tower[0].prev.set(wo, new_dn, n);
+    n->tower[0].next.set(wo, n, new_dn);
+    next->tower[0].prev.set(wo, next, new_dn);
+
+    // If this doesn't have any index nodes, we can unmark it and return
+    if (target_height == 0)
+      return true;
+
+    // 'me' did an insert, still owns the node, and needs to stitch it into
+    // index levels.  Do so from bottom to top.  Release after the last level.
+    for (int level = 0; level < target_height; ++level)
+      index_stitch(wo, new_dn, level);
+
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's HANDSTM
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    // Find the node.  Fail if key not present
+    auto n = get_leq(wo, key);
+    if (n == head || n->key != key)
+      return false;
+
+    // Unstitch the node, starting from its topmost level
+    //
+    // NB: When depth = 0, it's inefficient to unlock node and then re-lock.
+    //     However, it's also inefficient to abort because we can't lock the
+    //     predecessor or successor.  Taking ownership then committing avoids
+    //     re-traversing, so we'll go with it, especially since doing so won't
+    //     block get() or nonconflicting insert().
+    index_unstitch(wo, n, n->height);
+    return true;
+  }
+
+private:
+  /// get_leq uses the towers to skip from the head sentinel to the node
+  /// with the largest key <= the search key.  It can return the head data
+  /// sentinel, but not the tail sentinel.
+  ///
+  /// get_leq can return an OWNED node.
+  ///
+  /// @param key The key for which we are doing a predecessor query.
+  ///
+  /// @return The data node that was found
+  data_t *get_leq(STM &tx, const K &key) {
+    // We always start at the head sentinel.  Scan its tower to find the
+    // highest non-tail level
+    data_t *curr = head;
+    int current_level = 0;
+    for (int i = NUM_INDEX_LAYERS; i > 0; --i) {
+      if (head->tower[i].next.get(tx, head) != tail) {
+        current_level = i;
+        break;
+      }
+    }
+
+    // Traverse over and down through the index layers
+    while (current_level > 0) {
+      curr = index_leq(tx, key, curr, current_level);
+      if (curr->key == key)
+        return curr;
+      --current_level;
+    }
+
+    // Search in the data layer.  Only return if result valid, not DELETED
+    return data_leq(tx, key, curr);
+  }
+
+  /// Traverse forward from `start`, considering only tower level `level`,
+  /// stopping at the largest key <= `key`
+  ///
+  /// This can return nodes that are OWNED.  The caller must check.
+  ///
+  /// @param key   The key for which we are doing a predecessor query.
+  /// @param start The start position of this traversal.
+  /// @param level The tower level to consider
+  ///
+  /// @return The node that was found (possibly `start`).
+  data_t *index_leq(STM &tx, K key, data_t *start, uint64_t level) {
+    auto curr = start;
+    while (true) {
+      data_t *next = curr->tower[level].next.get(tx, curr);
+      if (next == tail)
+        return curr;
+      auto next_key = next->key; // not tail => next has a valid key
+      if (next_key == key)
+        return next;
+      if (next_key > key)
+        return curr;
+      curr = next;
+    }
+  }
+
+  /// Traverse in the data layer to find the largest node with key <= `key`.
+  /// This can return an OWNED node.
+  ///
+  /// @param key   The key for which we are doing a predecessor query.
+  /// @param start The start position of this traversal.  This may be the head,
+  ///              or an intermediate point in the list
+  ///
+  /// @return The node that was found (possibly `start`).
+  data_t *data_leq(STM &tx, K key, data_t *start) {
+    auto curr = start;
+    auto next = curr->tower[0].next.get(tx, curr);
+    while (true) {
+      if (next == tail)
+        return curr;
+      auto nkey = next->key;
+      if (nkey > key)
+        return curr;
+      if (nkey == key)
+        return next;
+      curr = next;
+      next = next->tower[0].next.get(tx, next);
+    }
+  }
+
+  /// Generate a random level for a new node
+  ///
+  /// NB: This code has been verified to produce a nice geometric distribution
+  ///     in constant time per call
+  ///
+  /// @param me The caller's HANDSTM operation
+  ///
+  /// @return a random number between 0 and NUM_INDEX_LAYERS, inclusive
+  int randomLevel(HANDSTM *me) {
+    // Get a random int between 0 and 0xFFFFFFFF
+    int rr = me->rand();
+    // Add 1 to it, then find the lowest nonzero bit.  This way, we never return
+    // a zero for small integers, and the distribution is correct.
+    int res = __builtin_ffs(rr + 1);
+    // Now take one off of that, so that we return a zero-based integer
+    res -= 1;
+    // But if rr was 0xFFFFFFFF, we've got a problem, so coerce it back
+    // Also, drop it down to within NUM_INDEX_LAYERS
+    return (res < 0 || res > NUM_INDEX_LAYERS) ? NUM_INDEX_LAYERS : res;
+  }
+
+  /// index_stitch is a small atomic operation that stitches a node in at a
+  /// given index level.
+  ///
+  /// @param node    The node that was just inserted and stitched into `level`
+  /// @param level   The level below where we're stitching
+  void index_stitch(WOSTM &wo, data_t *node, uint8_t level) {
+    // Go backwards, then up, to find a node at current `level` that is tall
+    // enough.  Then get its successor
+    data_t *pred = node;
+    while (true) {
+      pred = pred->tower[level].prev.get(wo, pred);
+      if (pred->height > level)
+        break;
+    }
+    auto succ = pred->tower[level + 1].next.get(wo, pred);
+
+    // Stitch `node` in between pred and succ, update node's height
+    node->tower[level + 1].next.set(wo, node, succ);
+    node->tower[level + 1].prev.set(wo, node, pred);
+    pred->tower[level + 1].next.set(wo, pred, node);
+    succ->tower[level + 1].prev.set(wo, succ, node);
+  }
+
+  /// Unstitch `node`, starting at its topmost index layer.  It is currently
+  /// OWNED.
+  ///
+  /// @param node   The node that we are unstitching
+  /// @param height The highest index layer for `node`
+  void index_unstitch(WOSTM &wo, data_t *node, int height) {
+    // Work our way downward, unstitching at each level
+    for (int level = height; level >= 0; --level) {
+      auto pre = node->tower[level].prev.get(wo, node);
+      auto nxt = node->tower[level].next.get(wo, node);
+      pre->tower[level].next.set(wo, pre, nxt);
+      nxt->tower[level].prev.set(wo, nxt, pre);
+    }
+    wo.reclaim(node);
+  }
+};
diff --git a/artifact/ds/handSTM/slist_omap.h b/artifact/ds/handSTM/slist_omap.h
new file mode 100644
index 0000000..8b0cffa
--- /dev/null
+++ b/artifact/ds/handSTM/slist_omap.h
@@ -0,0 +1,154 @@
+#pragma once
+
+/// An ordered map, implemented as a singly-linked list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// @param K       The type of the keys stored in this map
+/// @param V       The type of the values stored in this map
+/// @param HANDSTM A thread descriptor type, for safe memory reclamation
+template <typename K, typename V, class HANDSTM> class slist_omap {
+  using WOSTM = typename HANDSTM::WOSTM;
+  using ROSTM = typename HANDSTM::ROSTM;
+  using STM = typename HANDSTM::STM;
+  using ownable_t = typename HANDSTM::ownable_t;
+  template <typename T> using FIELD = typename HANDSTM::template xField<T>;
+
+  /// A list node.  It has a next pointer, but no key or value.  It's useful for
+  /// sentinels, so that K and V don't have to be default constructable.
+  struct node_t : ownable_t {
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const.
+  struct data_t : public node_t {
+    const K key;  // The key of this pair
+    FIELD<V> val; // The value of this  pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key         The key that is stored in this node
+    /// @param _val         The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+  };
+
+  node_t *const head; // The list head pointer
+  node_t *const tail; // The list tail pointer
+
+public:
+  /// Default construct a list by constructing and connecting two sentinel nodes
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object that has a `snapshot_freq` field
+  slist_omap(HANDSTM *me, auto *cfg) : head(new node_t()), tail(new node_t()) {
+    BEGIN_WO(me);
+    head->next.set(wo, head, tail);
+  }
+
+private:
+  /// get_leq is an inclusive predecessor query that returns the largest node
+  /// whose key is <= the provided key.  It can return the head sentinel, but
+  /// not the tail sentinel.
+  ///
+  /// @param key     The key for which we are doing a predecessor query.
+  /// @param lt_mode When `true`, this behaves as `get_lt`.  When `false`, it
+  ///                behaves as `get_leq`.
+  ///
+  /// @return The node that was found, and its orec value
+  node_t *get_leq(STM &tx, const K key, bool lt_mode = false) {
+    // Start at the head; read the next now, to avoid reading it in multiple
+    // iterations of the loop
+    node_t *curr = head;
+
+    // Starting at `next`, search for key.
+    while (true) {
+      // Read the next node, fail if we can't do it consistently
+      auto next = curr->next.get(tx, curr);
+
+      // Stop if next's key is too big or next is tail
+      if (next == tail)
+        return curr;
+      data_t *dn = static_cast<data_t *>(next);
+      K k = dn->key;
+      if (lt_mode ? k >= key : k > key)
+        return curr;
+
+      // Stop if `next` is the match we were hoping for
+      if (k == key)
+        return next;
+
+      // Keep traversing to `next`
+      curr = next;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HANDSTM *me, const K &key, V &val) {
+    BEGIN_RO(me);
+    // find the largest node with a key <= `key`.
+    auto n = get_leq(ro, key);
+    if (n == head || static_cast<data_t *>(n)->key != key)
+      return false;
+    data_t *dn = static_cast<data_t *>(n);
+    val = dn->val.get(ro, dn);
+    return true;
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(HANDSTM *me, const K &key, V &val) {
+    BEGIN_WO(me);
+
+    auto n = get_leq(wo, key);
+    if (n != head && static_cast<data_t *>(n)->key == key)
+      return false;
+
+    // stitch in a new node
+    data_t *new_dn = wo.LOG_NEW(new data_t(key, val));
+    new_dn->next.set(wo, new_dn, n->next.get(wo, n));
+    n->next.set(wo, n, new_dn);
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(HANDSTM *me, const K &key) {
+    BEGIN_WO(me);
+    // NB: this will be a lt query, not a leq query
+    auto prev = get_leq(wo, key, true);
+    auto curr = prev->next.get(wo, prev);
+    // if curr doesn't have a matching key, fail
+    if (curr == tail || static_cast<data_t *>(curr)->key != key)
+      return false;
+    auto next = curr->next.get(wo, curr);
+    prev->next.set(wo, prev, next);
+    wo.reclaim(curr);
+    return true;
+  }
+};
diff --git a/artifact/ds/hybrid/dlist_carumap.h b/artifact/ds/hybrid/dlist_carumap.h
new file mode 100644
index 0000000..85667b6
--- /dev/null
+++ b/artifact/ds/hybrid/dlist_carumap.h
@@ -0,0 +1,696 @@
+#pragma once
+
+#include <atomic>
+#include <bit>
+#include <functional>
+/// An unordered map, implemented as a resizable array of lists (closed
+/// addressing, resizable).  This map supports get(), insert() and remove()
+/// operations.
+///
+/// This implementation is based loosely on Liu's nonblocking resizable hash
+/// table from PODC 2014.  At the current time, we do not support the heuristic
+/// for contracting the list, but we do support expanding the list.
+///
+/// @param K      The type of the keys stored in this map
+/// @param V      The type of the values stored in this map
+/// @param HYPOL The HYPOL implementation (PO or PS)
+template <typename K, typename V, class HYPOL> class dlist_carumap {
+  using WOSTM = typename HYPOL::WOSTM;
+  using ROSTM = typename HYPOL::ROSTM;
+  using RSTEP = typename HYPOL::RSTEP;
+  using WSTEP = typename HYPOL::WSTEP;
+  using ownable_t = typename HYPOL::ownable_t;
+  template <typename T> using FIELD = typename HYPOL::template sxField<T>;
+
+  /// A list node.  It has prev and next pointers, but no key or value.  It's
+  /// useful for sentinels, so that K and V don't have to be default
+  /// constructable.
+  ///
+  /// NB: we do not need a `valid` bit, because any operation that would clear
+  ///     it would also acquire this node's orec, and thus any node that would
+  ///     encounter a cleared valid bit would also detect an orec inconsistency.
+  struct node_t : ownable_t {
+    FIELD<node_t *> prev; // Pointer to predecessor
+    FIELD<node_t *> next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : ownable_t(), prev(nullptr), next(nullptr) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// We need to know if buckets have been rehashed to a new table.  We do this
+  /// by making the head of each bucket a `sentinel_t`, and adding a `closed`
+  /// bool.  Note that the tail of each bucket's list is just a node_t.
+  struct sentinel_t : node_t {
+    /// Track if this sentinel is for a bucket that has been rehashed
+    ///
+    /// NB: Could we use `prev` to indicated `closed`
+    FIELD<bool> closed; // Has it been rehashed?
+
+    /// Construct a sentinel_t
+    sentinel_t() : node_t(), closed(false) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~sentinel_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const, and
+  /// values are only accessed while the node is locked, so neither is a
+  /// tm_field.
+  struct data_t : node_t {
+    const K key; // The key of this key/value pair
+    V val;       // The value of this key/value pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key The key that is stored in this node
+    /// @param _val The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~data_t() {}
+  };
+
+  /// An array of lists, along with its size
+  ///
+  /// NB: to avoid indirection, the array is in-lined into the tbl_t.  To make
+  ///     this compatible with SMR, tbl_t must be ownable.
+  class tbl_t : public ownable_t {
+    using bucket_t = FIELD<sentinel_t *>;
+
+    /// Construct a table
+    ///
+    /// @param `_size` The desired size of the table
+    tbl_t(uint64_t _size) : size(_size) {}
+
+  public:
+    const uint64_t size; // The size of the table
+    bucket_t tbl[];      // The buckets of the table
+
+    /// Allocate a tbl_t of size `size`
+    ///
+    /// @param size The desired size
+    /// @param tx   The calling operation's descriptor
+    ///
+    /// @return A table, all of whose buckets are set to null
+    static tbl_t *make(uint64_t size, WSTEP &tx) {
+      tbl_t *tbl = (tbl_t *)malloc(sizeof(tbl_t) + size * sizeof(bucket_t));
+      auto ret = new (tbl) tbl_t(size);
+      for (size_t i = 0; i < size; ++i)
+        ret->tbl[i].sSet(nullptr, tx);
+      return ret;
+    }
+  };
+
+  ownable_t *tbl_orec;    // An orec for protecting `active` and `frozen`
+  FIELD<tbl_t *> active;  // The active table
+  FIELD<tbl_t *> frozen;  // The frozen table
+  std::hash<K> _pre_hash; // A weak hash function for converting keys to ints
+  const uint64_t RESIZE_THRESHOLD; // Max bucket size before resizing
+
+  /// A pair consisting of a pointer and an orec version.
+  struct node_ver_t {
+    node_t *_obj = nullptr; // The start of a bucket
+    uint64_t _ver = 0;      // NB: _ver may not be related to _obj
+  };
+
+  /// Result of trying to resize a bucket
+  enum resize_result_t {
+    CANNOT_ACQUIRE,  // Couldn't get orec... retry
+    ALREADY_RESIZED, // Already resized by another thread
+    RESIZE_OK        // Bucket successfully resized
+  };
+
+  /// Given a key, determine the bucket into which it should go.  As in the Liu
+  /// hash, we do not change the hash function when we resize, we just change
+  /// the number of bits to use
+  ///
+  /// @param key  The key to hash
+  /// @param size The size of the table into which this should be hashed
+  ///
+  /// @return An integer between 0 and size
+  uint64_t table_hash(HYPOL *me, const K &key, const uint64_t size) const {
+    return me->hash(_pre_hash(key)) % size;
+  }
+
+public:
+  /// Default construct a map as having a valid active table.
+  ///
+  /// NB: This constructor calls std::terminate if the provided size is not a
+  ///     power of 2.
+  ///
+  /// @param me  The operation that is creating this umap
+  /// @param cfg A config object with `buckets` and `resize_threshold`
+  dlist_carumap(HYPOL *me, auto *cfg)
+      : tbl_orec(new ownable_t()), RESIZE_THRESHOLD(cfg->resize_threshold) {
+    // Enforce power-of-2 initial size
+    if (std::popcount(cfg->buckets) != 1)
+      throw("cfg->buckets should be power of 2");
+
+    // Create an initial active table in which all of the buckets are
+    // initialized but empty (null <- head <-> tail -> null).
+    WSTEP tx(me);
+    active.sSet(tbl_t::make(cfg->buckets, tx), tx);
+    for (size_t i = 0; i < cfg->buckets; ++i)
+      active.sGet(tx)->tbl[i].sSet(create_list(tx), tx);
+    // NB: since all buckets are initialized, nobody will ever go to the
+    //     frozen table, so we can leave it as null
+    frozen.sSet(nullptr, tx);
+  }
+
+private:
+  /// Create a dlist with head and tail sentinels
+  ///
+  /// @param tx A writing TM context.  Even though this code can't fail, we need
+  ///           the context in order to use tm_field correctly.
+  ///
+  /// @return A pointer to the head sentinel of the list
+  sentinel_t *create_list(WSTEP &tx) {
+    // NB: By default, a node's prev and next will be nullptr, which is what we
+    //     want for head->prev and tail->next.
+    auto head = new sentinel_t();
+    auto tail = new node_t();
+    head->next.sSet(tail, tx);
+    tail->prev.sSet(head, tx);
+    return head;
+  }
+
+  /// `resize()` is an internal method for changing the size of the active
+  /// table. Strictly speaking, it should be called `expand`, because for now we
+  /// only support expansion, not contraction.  When `insert()` discovers that
+  /// it has made a bucket "too big", it will continue to do its insertion and
+  /// then, after linearizing, it will call `resize()`.  `remove()` does not
+  /// currently call `resize()`.
+  ///
+  /// At a high level, `resize()` is supposed to be open-nested and not to incur
+  /// any blocking, except due to orec conflicts.  We accomplish this through
+  /// laziness and stealing.  resize() finishes the /last/ resize, moves the
+  /// `active` table to `frozen`, and installs a new `active` table.  Subsequent
+  /// operations will do most of the migrating.
+  ///
+  /// @param me    The calling thread's descriptor
+  /// @param a_ver The version of `active` when the resize was triggered
+  void resize(HYPOL *me, uint64_t a_ver) {
+    // Get the current active and frozen tables, and the frozen table size
+    tbl_t *ft = nullptr, *at = nullptr;
+    {
+      RSTEP tx(me);
+      ft = frozen.sGet(tx);
+      at = active.sGet(tx);
+      if (!tx.check_continuation(tbl_orec, a_ver))
+        return; // someone else must be starting a resize, so we can quit
+    }
+
+    // If ft is null, then there's no frozen table, so things will be easy
+    if (ft == nullptr) {
+      WSTEP tx(me);
+
+      // Make and initialize the table *before* acquiring orecs, to minimize the
+      // critical section.  The table is 2x as big.
+      auto new_tbl = tbl_t::make(at->size * 2, tx);
+
+      // Lock the table, move it from `active` to `frozen`, then install the new
+      // table.
+      if (!tx.acquire_continuation(tbl_orec, a_ver)) {
+        // NB: new_tbl is private.  We don't need SMR
+        delete new_tbl;
+        return; // Someone else is resizing, and that's good enough for `me`
+      }
+      frozen.sSet(at, tx);
+      active.sSet(new_tbl, tx);
+      return;
+    }
+
+    // Migrate everything out of frozen, remove the frozen table, and retry
+    //
+    // NB: prepare_resize removes the frozen table.  That will change a_ver, so
+    //     we need to capture the new a_ver value so that our next attempt won't
+    //     fail erroneously.
+    a_ver = prepare_resize(me, a_ver, ft, at);
+    if (a_ver == 0)
+      return; // Someone else finished resizing for `me`
+
+    resize(me, a_ver); // Try again now that it's clean
+  }
+
+  /// Finish one lazy resize, so that another may begin.
+  ///
+  /// This really just boils down to migrating everything from `frozen` to
+  /// `active` and then nulling `frozen` and reclaiming it.
+  ///
+  /// NB: This code takes the "frozen" and "active" tables as arguments.
+  ///     Consequently, we don't care about arbitrary delays.  If a thread calls
+  ///     this, rehashes half the table, and then suspends, another thread can
+  ///     rehash everything else and install a new active table.  When the first
+  ///     thread wakes, it'll find a bunch of empty buckets, and it'll be safe.
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param a_ver  The active table version when this was called
+  /// @param f_tbl  The "frozen table", really the "source" table
+  /// @param a_tbl  The "active table", really the "destination" table
+  ///
+  /// @return {0}       if another thread stole the job of nulling `frozen`.
+  ///                   When this happens, there must be a concurrent resize,
+  ///                   and since both are trying to do the same thing (expand),
+  ///                   the one who receives {0} can just get out of the other's
+  ///                   way
+  ///         {integer} the new orec version of `active`
+  uint64_t prepare_resize(HYPOL *me, uint64_t a_ver, tbl_t *f_tbl,
+                          tbl_t *a_tbl) {
+    // NB: Right now, next_index == completed.  If we randomized the start
+    //     point, concurrent calls to prepare_resize() would contend less
+    uint64_t next_index = 0; // Next bucket to migrate
+    uint64_t completed = 0;  // Number of buckets migrated
+
+    // Migrate all data from `frozen` to `active`
+    while (completed != f_tbl->size) {
+      uint64_t bucket_orec = 0;
+      sentinel_t *bucket = nullptr;
+      {
+        RSTEP tx(me);
+
+        // Try to rehash the next bucket
+        bucket = f_tbl->tbl[next_index].sGet(tx);
+        bucket_orec = tx.check_orec(bucket);
+        if (bucket_orec == HYPOL::END_OF_TIME) {
+          continue;
+        }
+      }
+
+      auto res = rehash_expand_bucket(me, bucket, bucket_orec, next_index,
+                                      f_tbl->size, a_tbl);
+      {
+        RSTEP tx(me);
+        // If we can't acquire all nodes in this bucket, try again, because it
+        // might just mean someone else was doing an operation in the bucket.
+        if (res == CANNOT_ACQUIRE) {
+          continue;
+        }
+        // If this bucket is already rehashed by others, there is a chance that
+        // the current resize phase is finished, so check
+        if (res == ALREADY_RESIZED) {
+          // check if the active table version changed since resize() was
+          // called, if so, we know resize is finished, return
+          if (!tx.check_continuation(tbl_orec, a_ver)) {
+            return 0;
+          }
+        }
+      }
+
+      // Move to the next bucket
+      ++next_index;
+      ++completed;
+    }
+
+    // Uninstall the `frozen` table, since it has been emptied.  Save the commit
+    // time, so we can validate tbl_orec later.
+    tbl_t *old;
+    {
+      BEGIN_WO(me);
+      if (wo.inheritOrec(tbl_orec, a_ver)) {
+        old = f_tbl;
+        frozen.xSet(wo, tbl_orec, nullptr);
+      } else
+        return 0;
+    }
+    auto last_commit_time = me->get_last_wo_end_time();
+
+    // Reclaim `old`'s buckets, then `old` itself
+    {
+      BEGIN_WO(me);
+      for (size_t i = 0; i < f_tbl->size; i++) {
+        // use singleton_reclaim to reclaim head and tail of each bucket
+        auto head = old->tbl[i].xGet(wo, tbl_orec);
+        auto tail = head->next.xGet(wo, head);
+        wo.reclaim(head);
+        wo.reclaim(tail);
+      }
+      wo.reclaim(old);
+    }
+    return last_commit_time;
+  }
+
+  /// Get a pointer to the bucket in the active table that holds `key`.  This
+  /// may cause some rehashing to happen.
+  ///
+  /// NB: The pattern here is unconventional.  get_bucket() is the first step in
+  ///     WSTEP transactions.  If it doesn't rehash, then the caller WSTEP
+  ///     continues its operation.  If it does rehash, then the caller WSTEP
+  ///     commits and restarts, which is a poor-man's open-nested transaction.
+  ///     If it encounters an inconsistency, the caller WSTEP should "abort" by
+  ///     unwinding and restarting. In the third case, this returns *while
+  ///     holding an orec*
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key whose bucket is sought
+  /// @param tx  An active WSTEP transaction
+  ///
+  /// @return On success, a pointer to the head of a bucket, along with
+  ///         `tbl_orec`'s value.  {nullptr, 0} on any rehash or inconsistency
+  std::pair<node_ver_t, uint64_t> get_bucket(HYPOL *me, const K &key) {
+    // Get the head of the appropriate bucket in the active table
+    //
+    // NB: Validate or else a_tbl[a_idx] could be out of bounds
+    uint64_t f_bucket_orec = 0, f_idx = 0;
+    sentinel_t *f_bucket = nullptr;
+    tbl_t *f_tbl = nullptr, *a_tbl = nullptr;
+
+    while (true) {
+      RSTEP tx(me);
+      a_tbl = active.sGet(tx);
+      uint64_t a_ver = tx.check_orec(tbl_orec);
+      if (a_ver == HYPOL::END_OF_TIME) {
+        continue;
+      }
+
+      auto a_idx = table_hash(me, key, a_tbl->size);
+      auto a_bucket = a_tbl->tbl[a_idx].sGet(tx); // NB: caller will validate
+
+      if (a_bucket) {
+        auto b_ver = tx.check_orec(a_bucket);
+        if (b_ver == HYPOL::END_OF_TIME) {
+          continue;
+        }
+        return {{a_bucket, b_ver}, a_ver}; // not null --> no resize needed
+      }
+
+      // Find the bucket in the frozen table that needs rehashing
+      f_tbl = frozen.sGet(tx);
+      if (tx.check_orec(tbl_orec) == HYPOL::END_OF_TIME) {
+        continue;
+      }
+
+      f_idx = table_hash(me, key, f_tbl->size);
+      f_bucket = f_tbl->tbl[f_idx].sGet(tx);
+      f_bucket_orec = tx.check_orec(f_bucket);
+      if (f_bucket_orec == HYPOL::END_OF_TIME) {
+        continue;
+      }
+      break;
+    }
+
+    // while(true){
+    //   WSTEP tx(me);
+    //   if(!tx.acquire_continuation(f_bucket, f_bucket_orec))
+    //     continue;
+    // }
+
+    // Rehash it, tell caller to commit so the rehash appears to be open nested
+    //
+    // NB: if the rehash fails, it's due to someone else rehashing, which is OK
+    rehash_expand_bucket(me, f_bucket, f_bucket_orec, f_idx, f_tbl->size,
+                         a_tbl);
+    return {{nullptr, 0}, 0};
+  }
+
+  /// Re-hash one list in the frozen table into two lists in the active table
+  ///
+  /// @param me     The calling thread's descriptor
+  /// @param f_list A pointer to an (acquired!) list head in the frozen table
+  /// @param f_idx  The index of flist in the frozen table
+  /// @param f_size The size of the frozen table
+  /// @param a_tbl  A reference to the active table
+  /// @param tx     An active WSTEP transaction
+  ///
+  /// @return RESIZE_OK       - The frozen bucket was rehashed into `a_tbl`
+  ///         ALREADY_RESIZED - The frozen bucket was empty
+  ///         CANNOT_ACQUIRE  - The operation could not acquire all orecs
+  resize_result_t rehash_expand_bucket(HYPOL *me, sentinel_t *f_list,
+                                       uint64_t f_list_orec, uint64_t f_idx,
+                                       uint64_t f_size, tbl_t *a_tbl) {
+    WSTEP tx(me);
+    if (!tx.acquire_continuation(f_list, f_list_orec))
+      return CANNOT_ACQUIRE;
+    // Stop if this bucket is already rehashed
+    if (f_list->closed.sGet(tx)) // true is effectively const, skip validation
+      return ALREADY_RESIZED;
+    // Fail if we cannot acquire all nodes in f_list
+    if (!list_acquire_all(f_list, tx))
+      return CANNOT_ACQUIRE;
+
+    // Shuffle nodes from f_list into two new lists that will go into `a_tbl`
+    auto l1 = create_list(tx), l2 = create_list(tx);
+    auto curr = f_list->next.sGet(tx);
+    while (curr->next.sGet(tx) != nullptr) {
+      auto next = curr->next.sGet(tx);
+      auto data = static_cast<data_t *>(curr);
+      auto dest = table_hash(me, data->key, a_tbl->size) == f_idx ? l1 : l2;
+      auto succ = dest->next.sGet(tx);
+      dest->next.sSet(data, tx);
+      data->next.sSet(succ, tx);
+      data->prev.sSet(dest, tx);
+      succ->prev.sSet(data, tx);
+      curr = next;
+    }
+    // curr is tail, set head->tail
+    f_list->next.sSet(curr, tx);
+    // put the lists into the active table, close the frozen bucket
+    a_tbl->tbl[f_idx].sSet(l1, tx);
+    a_tbl->tbl[f_idx + f_size].sSet(l2, tx);
+    f_list->closed.sSet(true, tx);
+    return RESIZE_OK;
+  }
+
+  /// Acquire all of the nodes in the list starting at `head`, including the
+  /// head and tail sentinels
+  ///
+  /// @param head The head of the list whose nodes should be acquired
+  /// @param tail The calling WSTEP transaction
+  ///
+  /// @return true if all nodes are acquired, false otherwise
+  bool list_acquire_all(node_t *head, WSTEP &tx) {
+    node_t *curr = head;
+    while (curr) {
+      if (!tx.acquire_consistent(curr))
+        return false;
+      curr = curr->next.sGet(tx);
+    }
+    return true;
+  }
+
+  /// Given the head sentinel of a list, search through the list to find the
+  /// node with key `key`, if such a node exists in the list.  If it doesn't,
+  /// then return the head pointer, along with a count of non-sentinel nodes in
+  /// the list
+  ///
+  /// @param key  The key for which we are searching
+  /// @param head The start of the list to search
+  /// @param tx   An active WSTEP transaction
+  ///
+  /// @return {nullptr, 0}  if the transaction discovered an inconsistency
+  ///         {head, count} if the key was not found
+  ///         {node, 0}     if the key was found at `node`
+  std::pair<node_ver_t, uint64_t> list_get_or_head(HYPOL *me, const K &key,
+                                                   sentinel_t *head,
+                                                   uint64_t head_orec) {
+    RSTEP tx(me);
+    // Get the head's successor; on any inconsistency, return.
+    auto curr = head->next.sGet(tx);
+    if (!tx.check_continuation(head, head_orec)) {
+      return {{nullptr, 0}, 0};
+    }
+    // uint64_t head_orec = tx.check_orec(head);
+    // if (head_orec == HYPOL::END_OF_TIME)
+    //   return {nullptr, 0};
+
+    uint64_t count = 0; // Number of nodes encountered during the loop
+
+    while (true) {
+      // if we reached the tail, return the head
+      if (curr->next.sGet(tx) == nullptr) {
+        if (!tx.check_continuation(head, head_orec)) {
+          return {{nullptr, 0}, 0};
+        }
+        return {{head, head_orec},
+                count}; // No validation: tail's next is effectively const
+      }
+
+      // return curr if it has a matching key
+      if (static_cast<data_t *>(curr)->key == key) {
+        auto c_orec = tx.check_orec(curr);
+        if (c_orec == HYPOL::END_OF_TIME)
+          return {{nullptr, 0}, 0};
+        return {{curr, c_orec}, 0};
+      }
+
+      // read `next` consistently
+      //
+      // NB: We could skip this, and just validate before `return {curr, 0}`
+      auto next = curr->next.sGet(tx);
+      if (tx.check_orec(curr) == HYPOL::END_OF_TIME)
+        return {{nullptr, 0}, 0};
+      curr = next;
+      ++count;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HYPOL *me, const K &key, V &val) {
+    while (true) {
+
+      // Get the bucket in `active` where `key` should be.  "Abort" and retry on
+      // any inconsistency; commit and retry if `get_bucket` resized
+      auto [bucket_pair, _] = get_bucket(me, key);
+      auto bucket = bucket_pair._obj;
+      auto bucket_orec = bucket_pair._ver;
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      auto [node_pair, __] = list_get_or_head(
+          me, key, static_cast<sentinel_t *>(bucket), bucket_orec);
+      auto node = node_pair._obj;
+      // If we got back null, there was an inconsistency, so retry
+      if (!node) {
+        continue;
+      }
+
+      // If we got back the head, return false
+      if (node == bucket) {
+        return false;
+      }
+
+      if (std::is_scalar<V>::value) {
+        RSTEP tx(me);
+        data_t *dn = static_cast<data_t *>(node);
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (tx.check_orec(node) == HYPOL::END_OF_TIME) {
+          continue;
+        }
+        val = val_copy;
+        return true;
+      } else {
+        WSTEP tx(me);
+        // Acquire, read, unwind (because no writes!)
+        if (!tx.acquire_continuation(node, node_pair._ver)) {
+          continue;
+        }
+        val = static_cast<data_t *>(node)->val;
+        tx.unwind();
+        return true;
+      }
+    }
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(HYPOL *me, const K &key, V &val) {
+    // If we discover that a bucket becomes too full, we'll insert, linearize,
+    // and then resize in a new transaction before returning.  Tracking
+    // `active`'s version prevents double-resizing under concurrency.
+    uint64_t a_ver = 0;
+    while (true) {
+
+      auto [bucket_pair, a_version] = get_bucket(me, key);
+      auto bucket = bucket_pair._obj;
+      if (!bucket)
+        continue;
+      a_ver = a_version;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      auto [node_pair, count] =
+          list_get_or_head(me, key, static_cast<sentinel_t *>(bucket_pair._obj),
+                           bucket_pair._ver);
+      auto node = node_pair._obj;
+      // If we got back null, there was an inconsistency, so retry
+      if (!node) {
+        continue;
+      }
+
+      // If we didn't get the head, the key already exists, so return false
+      if (node != bucket_pair._obj) {
+        return false;
+      }
+      BEGIN_WO(me);
+      // Lock the node and its successor
+      if (!wo.inheritOrec(node, node_pair._ver)) {
+        continue;
+      }
+      auto next = node->next.xGet(wo, node);
+
+      // Stitch in a new node
+      data_t *new_dn = new data_t(key, val);
+      new_dn->next.xSet(wo, new_dn, next);
+      new_dn->prev.xSet(wo, new_dn, node);
+      node->next.xSet(wo, node, new_dn);
+      next->prev.xSet(wo, next, new_dn);
+      if (count >= RESIZE_THRESHOLD)
+        break; // need to resize!
+      return true;
+    }
+
+    resize(me, a_ver);
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(HYPOL *me, const K &key) {
+    while (true) {
+      // Get the bucket in `active` where `key` should be.  Abort and retry on
+      // any inconsistency; commit and retry if `get_bucket` resized
+      auto [bucket_pair, _] = get_bucket(me, key);
+      auto bucket = bucket_pair._obj;
+      if (!bucket)
+        continue;
+
+      // Find the node in `bucket` that matches `key`.  If it can't be found,
+      // we'll get the head node.
+      //
+      // NB: While `bucket` has not been reclaimed, `active.tbl` may have
+      //     changed.  Fortunately, list_get_or_head will validate it.
+      auto [node_pair, __] = list_get_or_head(
+          me, key, static_cast<sentinel_t *>(bucket), bucket_pair._ver);
+      auto node = node_pair._obj;
+      WSTEP tx(me);
+      // If we got back the head, return false
+      if (node == bucket) {
+        tx.unwind(); // because we didn't update shared memory
+        return false;
+      }
+
+      // If the `node` is null, list_get_or_head failed and we need to retry
+      // Otherwise, it's unowned and the keys match, so lock `node` and its
+      // neighbors, else retry
+      if (!node || !tx.acquire_continuation(node, node_pair._ver) ||
+          !tx.acquire_aggressive(node->prev.sGet(tx)) ||
+          !tx.acquire_aggressive(node->next.sGet(tx))) {
+        tx.unwind();
+        continue;
+      }
+
+      // unstitch it
+      auto pred = node->prev.sGet(tx), succ = node->next.sGet(tx);
+      pred->next.sSet(succ, tx);
+      succ->prev.sSet(pred, tx);
+      tx.reclaim(node);
+      return true;
+    }
+  }
+};
diff --git a/artifact/ds/hybrid/rbtree_omap_drop.h b/artifact/ds/hybrid/rbtree_omap_drop.h
new file mode 100644
index 0000000..4b1aca5
--- /dev/null
+++ b/artifact/ds/hybrid/rbtree_omap_drop.h
@@ -0,0 +1,599 @@
+#pragma once
+
+/// An ordered map, implemented as a balanced, internal binary search tree. This
+/// map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param HYPOL      A thread descriptor type, for safe memory reclamation
+/// @param dummy_key  A default key to use
+/// @param dummy_val  A default value to use
+template <typename K, typename V, class HYPOL, K dummy_key, V dummy_val>
+class rbtree_omap_drop {
+  using WOSTM = typename HYPOL::WOSTM;
+  using ROSTM = typename HYPOL::ROSTM;
+  using RSTEP = typename HYPOL::RSTEP;
+  using WSTEP = typename HYPOL::WSTEP;
+  using ownable_t = typename HYPOL::ownable_t;
+  template <typename T> using FIELD = typename HYPOL::template sxField<T>;
+
+  /// An easy-to-remember way of indicating the left and right children
+  enum DIRS { LEFT = 0, RIGHT = 1 };
+
+  static const int RED = 0;   // Enum for red
+  static const int BLACK = 1; // Enum for black
+
+  /// nodes in a red/black tree
+  struct node_t : ownable_t {
+    FIELD<K> key;             // Key stored at this node
+    FIELD<V> val;             // Value stored at this node
+    FIELD<int> color;         // color (RED or BLACK)
+    FIELD<node_t *> parent;   // pointer to parent
+    FIELD<int> ID;            // 0/1 for left/right child
+    FIELD<node_t *> child[2]; // L/R children
+
+    /// basic constructor
+    node_t(WOSTM &wo, int color, K key, V val, node_t *parent, long ID,
+           node_t *child0, node_t *child1)
+        : key(key), val(val), color(color), parent(parent), ID(ID) {
+      child[0].xSet(wo, this, child0);
+      child[1].xSet(wo, this, child1);
+    }
+  };
+
+  node_t *sentinel; // The (sentinel) root node of the tree
+
+  /// The pair returned by get_leq; equivalent to the type in snapshots
+  struct leq_t {
+    node_t *_obj = nullptr; // The object
+    uint64_t _ver = 0;      // The observed version of the object
+  };
+
+  /// A pair holding a child node and its parent, with orec validation info
+  struct ret_pair_t {
+    leq_t child;  // The child
+    leq_t parent; // The parent of that child
+  };
+
+  /// Search for a `key` in the tree, and return the node holding it.  If the
+  /// key is not found, return the node that ought to be parent of the (not
+  /// found) `key`.
+  ///
+  /// NB: The caller is responsible for clearing the checkpoint stack before
+  ///     calling get_node().
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search for
+  ///
+  /// @return {found, orec}  if `key` is in the tree;
+  ///         {parent, orec} if `key` is not in the tree
+  ret_pair_t get_node(HYPOL *me, const K &key) const {
+    // This loop delineates the search transaction.  It commences from the end
+    // of the longest consistent prefix in the checkpoint stack
+    while (true) {
+      // Open a RO transaction to traverse downward to the target node:
+      leq_t parent = {nullptr, 0}, child = {nullptr, 0};
+      RSTEP tx(me);
+
+      // Validate the checkpoints to find a starting point.  When this is done,
+      // there must be at least one entry in the checkpoints (the sentinel), and
+      // it must be valid.
+      //
+      // NB: When this step is done, the curr->child relationship is validated,
+      //     but we haven't read any of child's fields, or checked child's orec.
+      //     Every checkpointed node must be valid at the time of checkpointing.
+
+      // If stack is empty or only holds sentinel, start from {sentinel, root}
+      if (me->snapshots.size() <= 1) {
+        parent._obj = sentinel;
+        child._obj = parent._obj->child[LEFT].sGet(tx);
+        parent._ver = tx.check_orec(parent._obj);
+        if (parent._ver == HYPOL::END_OF_TIME)
+          continue; // retry
+        me->snapshots.clear();
+        me->snapshots.push_back({parent._obj, parent._ver});
+      }
+      // If the stack is larger, we can find the longest valid prefix
+      else {
+        // Trim the stack to a set of consistent checkpoints
+        for (auto cp = me->snapshots.begin(); cp != me->snapshots.end(); ++cp) {
+          if (!tx.check_continuation(cp->_obj, cp->_ver)) {
+            me->snapshots.reset(cp - me->snapshots.begin());
+            break; // the rest of the checkpoints aren't valid
+          }
+        }
+        // If we don't have more than a sentinel, restart
+        if (me->snapshots.size() <= 1)
+          continue;
+        // Use the key to choose a child of the last good checkpoint
+        //
+        // NB: top.key != key, because we never put a matching key into
+        //     snapshots, and if a remove caused a key to change, we'll fail to
+        //     validate that node.
+        auto top = me->snapshots.top();
+        parent = {static_cast<node_t *>(top._obj), top._ver};
+        auto parent_key = parent._obj->key.sGet(tx);
+        child._obj = parent._obj->child[(key < parent_key) ? 0 : 1].sGet(tx);
+        // Validate that the reads of parent were valid
+        if (!tx.check_continuation(parent._obj, parent._ver))
+          continue;
+      }
+
+      // Traverse downward from the parent until we find null child or `key`
+      while (true) {
+        // nullptr == not found, so stop.  Parent was valid, so return it
+        if (!child._obj)
+          return {{nullptr, 0}, parent};
+
+        // It's time to move downward.  Read fields of child, then validate it.
+        //
+        // NB: we may not use grandchild, but it's better to read it here
+        auto child_key = child._obj->key.sGet(tx);
+        auto grandchild =
+            child._obj->child[(key < child_key) ? LEFT : RIGHT].sGet(tx);
+        child._ver = tx.check_orec(child._obj);
+        if (child._ver == HYPOL::END_OF_TIME)
+          break; // retry
+
+        // If the child key matches, return {child, parent}.  We know both are
+        // valid (parent came from stack; we just checked child)
+        //
+        // NB: the snapshot code requires that no node with matching key goes
+        //     into `snapshots`
+        if (child_key == key)
+          return {child, parent};
+
+        // Otherwise add the child to the checkpoint stack and traverse downward
+        me->snapshots.push_back({child._obj, child._ver});
+        parent = child;
+        child = {grandchild, 0};
+      }
+    }
+  }
+
+  /// Given a node and its orec value, find the tree node that holds the key
+  /// that logically succeeds it (i.e., the leftmost descendent of the right
+  /// child)
+  ///
+  /// NB: The caller must ensure that `node` has a valid right child before
+  ///     calling this method
+  ///
+  /// @param me   The calling thread's descriptor
+  /// @param node An object and orec value to use as the starting point
+  ///
+  /// @return {{found, orec}, {parent, orec}} if no inconsistency occurs
+  ///         {{nullptr, 0},  {nullptr, 0}}   on any consistency violation
+  ret_pair_t get_succ_pair(HYPOL *me, leq_t &node) {
+    // NB: We expect the successor to be relatively close to the node, so we
+    //     don't bother with checkpoints.  However, we are willing to retry,
+    //     since it's unlikely that `node` itself will change.
+    while (true) {
+      RSTEP tx(me);
+      // Ensure `node` is not deleted before reading its fields
+      if (!tx.check_continuation(node._obj, node._ver))
+        return {{nullptr, 0}, {nullptr, 0}};
+
+      // Read the right child, ensure consistency
+      leq_t parent = node, child = {node._obj->child[RIGHT].sGet(tx), 0};
+      if (!tx.check_continuation(node._obj, node._ver))
+        return {{nullptr, 0}, {nullptr, 0}};
+
+      // Find the leftmost non-null node in the tree rooted at child
+      while (true) {
+        auto next = child._obj->child[LEFT].sGet(tx);
+        child._ver = tx.check_orec(child._obj);
+        if (child._ver == HYPOL::END_OF_TIME)
+          break; // retry
+        // If next is null, `child` is the successor.  Otherwise keep traversing
+        if (!next)
+          return {child, parent};
+        parent = child;
+        child = {next, 0};
+      }
+    }
+  }
+
+public:
+  /// Construct a tree by creating a sentinel node at the top
+  rbtree_omap_drop(HYPOL *me, auto *) {
+    BEGIN_WO(me);
+    sentinel = new node_t(wo, BLACK, dummy_key, dummy_val, nullptr, 0, nullptr,
+                          nullptr);
+  }
+
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(HYPOL *me, const K &key, V &val) const {
+    me->snapshots.clear();
+    while (true) {
+      // Get the node that holds `key`, if it is present. If it isn't present,
+      // we'll get the parent of where it would be.  Whatever we get is
+      // validated, so if it's the sentinel, we're done.
+      auto curr = get_node(me, key).child;
+      if (curr._obj == nullptr)
+        return false;
+
+      // Use an optimistic read if V can be read atomically
+      if (std::is_scalar<V>::value) {
+        RSTEP tx(me);
+        auto *dn = curr._obj;
+        auto dn_key = dn->key.sGet(tx);
+        // TODO: Use atomic_ref?
+        V val_copy = reinterpret_cast<std::atomic<V> *>(&dn->val)->load(
+            std::memory_order_acquire);
+        if (!tx.check_continuation(curr._obj, curr._ver))
+          continue;
+        if (dn_key != key)
+          return false;
+        val = val_copy;
+        return true;
+      } else {
+        // Using STM here is really easy, and a setjmp is more scalable than
+        // having to CAS...
+        //
+        // TODO: In HandSTM, do we have a hidden requirement that values are
+        //       always scalar?  If so, we don't even need this...
+        BEGIN_RO(me);
+        if (ro.inheritOrec(curr._obj, curr._ver)) {
+          val = curr._obj->val.xGet(ro, curr._obj);
+          return true;
+        } // else commit and repeat the while loop :)
+      }
+    }
+  }
+
+  // insert a node with k/v as its pair if no such key exists in the tree
+  bool insert(HYPOL *me, const K &key, V &val) {
+    me->snapshots.clear();
+    while (true) {
+      // Find insertion point using an STMCAS step.  If child isn't null, `key`
+      // is already present, so we can finish without another STEP or STM
+      auto [child_, parent_] = get_node(me, key);
+      if (child_._obj != nullptr)
+        return false;
+
+      BEGIN_WO(me);
+
+      // Make this WOSTM a continuation of the preceding RSTEP
+      if (!wo.inheritOrec(parent_._obj, parent_._ver))
+        continue;
+
+      // The remaining code needs to know if we're inserting to the left or
+      // right of leq._obj.  If we're at sentinel, it's left.  Otherwise, use
+      // the key to decide.
+      node_t *curr = parent_._obj;
+      int cID = curr == sentinel ? 0 : (key < curr->key.xGet(wo, curr) ? 0 : 1);
+
+      node_t *child =
+          new node_t(wo, RED, key, val, curr, cID, nullptr, nullptr);
+      curr->child[cID].xSet(wo, curr, child);
+
+      // balance the tree
+      while (true) {
+        // Get the parent, grandparent, and their relationship
+        node_t *parent = child->parent.xGet(wo, child);
+        int pID = parent->ID.xGet(wo, parent);
+        node_t *gparent = parent->parent.xGet(wo, parent);
+
+        // Easy exit condition: no more propagation needed
+        if ((gparent == sentinel) || (BLACK == parent->color.xGet(wo, parent)))
+          return true;
+
+        // If parent's sibling is also red, we push red up to grandparent
+        node_t *psib = gparent->child[1 - pID].xGet(wo, gparent);
+        if ((psib != nullptr) && (RED == psib->color.xGet(wo, psib))) {
+          parent->color.xSet(wo, parent, BLACK);
+          psib->color.xSet(wo, psib, BLACK);
+          gparent->color.xSet(wo, gparent, RED);
+          child = gparent;
+          continue; // restart loop at gparent level
+        }
+
+        int cID = child->ID.xGet(wo, child);
+        if (cID != pID) {
+          // set child's child to parent's cID'th child
+          node_t *baby = child->child[1 - cID].xGet(wo, child);
+          parent->child[cID].xSet(wo, parent, baby);
+          if (baby != nullptr) {
+            baby->parent.xSet(wo, baby, parent);
+            baby->ID.xSet(wo, baby, cID);
+          }
+          // move parent into baby's position as a child of child
+          child->child[1 - cID].xSet(wo, child, parent);
+          parent->parent.xSet(wo, parent, child);
+          parent->ID.xSet(wo, parent, 1 - cID);
+          // move child into parent's spot as pID'th child of gparent
+          gparent->child[pID].xSet(wo, gparent, child);
+          child->parent.xSet(wo, child, gparent);
+          child->ID.xSet(wo, child, pID);
+          // now swap child with curr and fall through
+          node_t *temp = child;
+          child = parent;
+          parent = temp;
+        }
+
+        parent->color.xSet(wo, parent, BLACK);
+        gparent->color.xSet(wo, gparent, RED);
+        // promote parent
+        node_t *ggparent = gparent->parent.xGet(wo, gparent);
+        int gID = gparent->ID.xGet(wo, gparent);
+        node_t *ochild = parent->child[1 - pID].xGet(wo, parent);
+        // make gparent's pIDth child ochild
+        gparent->child[pID].xSet(wo, gparent, ochild);
+        if (ochild != nullptr) {
+          ochild->parent.xSet(wo, ochild, gparent);
+          ochild->ID.xSet(wo, ochild, pID);
+        }
+        // make gparent the 1-pID'th child of parent
+        parent->child[1 - pID].xSet(wo, parent, gparent);
+        gparent->parent.xSet(wo, gparent, parent);
+        gparent->ID.xSet(wo, gparent, 1 - pID);
+        // make parent the gIDth child of ggparent
+        ggparent->child[gID].xSet(wo, ggparent, parent);
+        parent->parent.xSet(wo, parent, ggparent);
+        parent->ID.xSet(wo, parent, gID);
+      }
+
+      // now just set the root to black
+      node_t *root = sentinel->child[0].xGet(wo, sentinel);
+      if (root->color.xGet(wo, root) != BLACK)
+        root->color.xSet(wo, root, BLACK);
+      return true;
+    }
+  }
+
+  // remove the node with k as its key if it exists in the tree
+  bool remove(HYPOL *me, const K &key) {
+    me->snapshots.clear();
+    while (true) {
+      // Find insertion point using an STMCAS step.  If child is null, `key` is
+      // not present, so we can finish without another STEP or STM.
+      auto [child_, parent_] = get_node(me, key);
+      if (child_._obj == nullptr)
+        return false;
+
+      // If the found node has two children, then we're going to need to swap it
+      // with its successor.  That could mean a big traversal, so let's use an
+      // RSTEP instead of jumping right into a WOSTM that has to validate its
+      // read set.
+
+      // First, an RSTEP to see if it has two children
+      node_t *l = nullptr, *r = nullptr;
+      {
+        RSTEP tx(me);
+        r = child_._obj->child[1].sGet(tx);
+        l = child_._obj->child[1].sGet(tx);
+        if (!tx.check_continuation(child_._obj, child_._ver))
+          continue;
+      }
+
+      // If so, then an RSTEP to get the successor and successor parent
+      leq_t successor = {nullptr, 0}, successor_parent = {nullptr, 0};
+      if (r != nullptr && l != nullptr) {
+        auto [succ, s_parent] = get_succ_pair(me, child_);
+        if (!succ._obj)
+          continue;
+        successor = succ;
+        successor_parent = s_parent;
+      }
+
+      {
+        BEGIN_WO(me);
+
+        // Make this WOSTM a continuation of the preceding RSTEP
+        if (!wo.inheritOrec(child_._obj, child_._ver))
+          continue;
+
+        // NB: We get segfaults if we don't also inheritOrec on the parent.  We
+        //     need to investigate this further.
+        if (!wo.inheritOrec(parent_._obj, parent_._ver))
+          continue;
+
+        // find key
+        node_t *curr = child_._obj;
+
+        // If `curr` has two children, we need to swap it with its successor
+        if (l != nullptr && r != nullptr) {
+          // First we have to make `wo` a continuation of the other RSTEP
+          if (!wo.inheritOrec(successor._obj, successor._ver))
+            continue;
+          if (!wo.inheritOrec(successor_parent._obj, successor_parent._ver))
+            continue;
+
+          curr->key.xSet(wo, curr,
+                         successor._obj->key.xGet(wo, successor._obj));
+          curr->val.xSet(wo, curr,
+                         successor._obj->val.xGet(wo, successor._obj));
+          curr = successor._obj;
+          parent_ = successor_parent;
+        }
+
+        // extract x from the tree and prep it for deletion
+        node_t *parent = parent_._obj;
+        node_t *child =
+            curr->child[(curr->child[0].xGet(wo, curr) != nullptr) ? 0 : 1]
+                .xGet(wo, curr);
+        int xID = curr->ID.xGet(wo, curr);
+        parent->child[xID].xSet(wo, parent, child);
+        if (child != nullptr) {
+          child->parent.xSet(wo, child, parent);
+          child->ID.xSet(wo, child, xID);
+        }
+
+        // fix black height violations
+        if ((BLACK == curr->color.xGet(wo, curr)) && (child != nullptr)) {
+          if (RED == child->color.xGet(wo, child)) {
+            curr->color.xSet(wo, curr, RED);
+            child->color.xSet(wo, child, BLACK);
+          }
+        }
+
+        // rebalance... be sure to save the deletion target!
+        node_t *to_delete = curr;
+        while (true) {
+          parent = curr->parent.xGet(wo, curr);
+          if ((parent == sentinel) || (RED == curr->color.xGet(wo, curr)))
+            break;
+          int cID = curr->ID.xGet(wo, curr);
+          node_t *sibling = parent->child[1 - cID].xGet(wo, parent);
+
+          // we'd like y's sibling s to be black
+          // if it's not, promote it and recolor
+          if (RED == sibling->color.xGet(wo, sibling)) {
+            /*
+                Bp          Bs
+               / \         / \
+              By  Rs  =>  Rp  B2
+              / \        / \
+             B1 B2     By  B1
+           */
+            parent->color.xSet(wo, parent, RED);
+            sibling->color.xSet(wo, sibling, BLACK);
+            // promote sibling
+            node_t *gparent = parent->parent.xGet(wo, parent);
+            int pID = parent->ID.xGet(wo, parent);
+            node_t *nephew = sibling->child[cID].xGet(wo, sibling);
+            // set nephew as 1-cID child of parent
+            parent->child[1 - cID].xSet(wo, parent, nephew);
+            nephew->parent.xSet(wo, nephew, parent);
+            nephew->ID.xSet(wo, nephew, 1 - cID);
+            // make parent the cID child of the sibling
+            sibling->child[cID].xSet(wo, sibling, parent);
+            parent->parent.xSet(wo, parent, sibling);
+            parent->ID.xSet(wo, parent, cID);
+            // make sibling the pID child of gparent
+            gparent->child[pID].xSet(wo, gparent, sibling);
+            sibling->parent.xSet(wo, sibling, gparent);
+            sibling->ID.xSet(wo, sibling, pID);
+            // reset sibling
+            sibling = nephew;
+          }
+
+          // Handle when the far nephew is red
+          node_t *n = sibling->child[1 - cID].xGet(wo, sibling);
+          if ((n != nullptr) && (RED == (n->color.xGet(wo, n)))) {
+            /*
+               ?p          ?s
+               / \         / \
+              By  Bs  =>  Bp  Bn
+             / \         / \
+            ?1 Rn      By  ?1
+            */
+            sibling->color.xSet(wo, sibling, parent->color.xGet(wo, parent));
+            parent->color.xSet(wo, parent, BLACK);
+            n->color.xSet(wo, n, BLACK);
+            // promote sibling
+            node_t *gparent = parent->parent.xGet(wo, parent);
+            int pID = parent->ID.xGet(wo, parent);
+            node_t *nephew = sibling->child[cID].xGet(wo, sibling);
+            // make nephew the 1-cID child of parent
+            parent->child[1 - cID].xSet(wo, parent, nephew);
+            if (nephew != nullptr) {
+              nephew->parent.xSet(wo, nephew, parent);
+              nephew->ID.xSet(wo, nephew, 1 - cID);
+            }
+            // make parent the cID child of the sibling
+            sibling->child[cID].xSet(wo, sibling, parent);
+            parent->parent.xSet(wo, parent, sibling);
+            parent->ID.xSet(wo, parent, cID);
+            // make sibling the pID child of gparent
+            gparent->child[pID].xSet(wo, gparent, sibling);
+            sibling->parent.xSet(wo, sibling, gparent);
+            sibling->ID.xSet(wo, sibling, pID);
+            break; // problem solved
+          }
+
+          n = sibling->child[cID].xGet(wo, sibling);
+          if ((n != nullptr) && (RED == (n->color.xGet(wo, n)))) {
+            /*
+                 ?p          ?p
+                 / \         / \
+               By  Bs  =>  By  Bn
+                   / \           \
+                  Rn B1          Rs
+                                   \
+                                   B1
+            */
+            sibling->color.xSet(wo, sibling, RED);
+            n->color.xSet(wo, n, BLACK);
+            // promote n
+            node_t *gneph = n->child[1 - cID].xGet(wo, n);
+            // make gneph the cID child of sibling
+            sibling->child[cID].xSet(wo, sibling, gneph);
+            if (gneph != nullptr) {
+              gneph->parent.xSet(wo, gneph, sibling);
+              gneph->ID.xSet(wo, gneph, cID);
+            }
+            // make sibling the 1-cID child of n
+            n->child[1 - cID].xSet(wo, n, sibling);
+            sibling->parent.xSet(wo, sibling, n);
+            sibling->ID.xSet(wo, sibling, 1 - cID);
+            // make n the 1-cID child of parent
+            parent->child[1 - cID].xSet(wo, parent, n);
+            n->parent.xSet(wo, n, parent);
+            n->ID.xSet(wo, n, 1 - cID);
+            // swap sibling and `n`
+            node_t *temp = sibling;
+            sibling = n;
+            n = temp;
+
+            // now the far nephew is red... copy of code from above
+            sibling->color.xSet(wo, sibling, parent->color.xGet(wo, parent));
+            parent->color.xSet(wo, parent, BLACK);
+            n->color.xSet(wo, n, BLACK);
+            // promote sibling
+            node_t *gparent = parent->parent.xGet(wo, parent);
+            int pID = parent->ID.xGet(wo, parent);
+            node_t *nephew = sibling->child[cID].xGet(wo, sibling);
+            // make nephew the 1-cID child of parent
+            parent->child[1 - cID].xSet(wo, parent, nephew);
+            if (nephew != nullptr) {
+              nephew->parent.xSet(wo, nephew, parent);
+              nephew->ID.xSet(wo, nephew, 1 - cID);
+            }
+            // make parent the cID child of the sibling
+            sibling->child[cID].xSet(wo, sibling, parent);
+            parent->parent.xSet(wo, parent, sibling);
+            parent->ID.xSet(wo, parent, cID);
+            // make sibling the pID child of gparent
+            gparent->child[pID].xSet(wo, gparent, sibling);
+            sibling->parent.xSet(wo, sibling, gparent);
+            sibling->ID.xSet(wo, sibling, pID);
+
+            break; // problem solved
+          }
+
+          /*
+               ?p          ?p
+               / \         / \
+             Bx  Bs  =>  Bp  Rs
+                 / \         / \
+                B1 B2      B1  B2
+           */
+
+          sibling->color.xSet(wo, sibling, RED); // propagate upwards
+
+          // advance to parent and balance again
+          curr = parent;
+        }
+
+        // if curr was red, this fixes the balance
+        if (curr->color.xGet(wo, curr) == RED)
+          curr->color.xSet(wo, curr, BLACK);
+
+        // free the node and return
+        wo.reclaim(to_delete);
+
+        return true;
+      }
+    }
+  }
+};
diff --git a/artifact/ds/include/ca_umap_list_adapter.h b/artifact/ds/include/ca_umap_list_adapter.h
new file mode 100644
index 0000000..a71edd7
--- /dev/null
+++ b/artifact/ds/include/ca_umap_list_adapter.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <vector>
+
+// STM Non-resizable Hash Table
+
+/// A straightforward non-resizable hashtable. This map supports
+/// get(), insert(), remove(), and size() operations.
+///
+/// @param K      The type of the keys stored in this map
+/// @param V      The type of the values stored in this map
+/// @param STMCAS The STMCAS implementation (PO or PS)
+/// @param OMAP   An ordered map type to use as each bucket
+///
+/// NB: OMAP must be templated on <K, V, STMCAS>
+template <typename K, typename V, class STMCAS, class OMAP>
+class ca_umap_list_adapter_t {
+  OMAP **buckets;             // The OMAPs that act as the buckets in the table.
+  const uint64_t num_buckets; // The number of buckets in the table.
+
+public:
+  /// Create a non-resizable hash table with the specified number of buckets.
+  ///
+  /// @param me  The operation that is constructing the table.
+  /// @param cfg A configuration object with a `buckets` field
+  ca_umap_list_adapter_t(STMCAS *me, auto *cfg) : num_buckets(cfg->buckets) {
+    buckets = (OMAP **)malloc(num_buckets * sizeof(OMAP *));
+
+    // Fill the "buckets" vector with singly-linked lists.
+    for (unsigned int i = 0; i < num_buckets; ++i)
+      buckets[i] = new OMAP(me, cfg);
+  }
+
+private:
+  std::hash<K> pre_hash;
+
+  /// Get the index of the bucket where the provided key belongs
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to hash.
+  ///
+  /// @return The hashed value of the key, modded by the number of buckets
+  int hash(STMCAS *me, const K key) {
+    return me->hash(pre_hash(key)) % num_buckets;
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(STMCAS *me, const K &key, V &val) {
+    return buckets[hash(me, key)]->get(me, key, val);
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(STMCAS *me, const K &key, V &val) {
+    return buckets[hash(me, key)]->insert(me, key, val);
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(STMCAS *me, const K &key) {
+    return buckets[hash(me, key)]->remove(me, key);
+  }
+};
diff --git a/artifact/ds/xSTM/dlist_omap.h b/artifact/ds/xSTM/dlist_omap.h
new file mode 100644
index 0000000..ad02fc2
--- /dev/null
+++ b/artifact/ds/xSTM/dlist_omap.h
@@ -0,0 +1,174 @@
+#pragma once
+
+#include "../../policies/xSTM/common/tm_api.h"
+
+// NB: We need an operator new().  It can just forward to malloc()
+TX_RENAME(_Znwm) void *my_new(std::size_t size) {
+  void *ptr = malloc(size);
+  return ptr;
+}
+
+/// An ordered map, implemented as a doubly-linked list.  This map supports
+/// get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param DESCRIPTOR A thread descriptor type, for safe memory reclamation
+template <typename K, typename V, class DESCRIPTOR> class dlist_omap {
+
+  /// A list node.  It has prev and next pointers, but no key or value.  It's
+  /// useful for sentinels, so that K and V don't have to be default
+  /// constructable.
+  struct node_t {
+    node_t *prev; // Pointer to predecessor
+    node_t *next; // Pointer to successor
+
+    /// Construct a node
+    node_t() : prev(nullptr), next(nullptr) { TX_CTOR; }
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~node_t() {}
+  };
+
+  /// A list node that also has a key and value.  Note that keys are const.
+  struct data_t : public node_t {
+    const K key; // The key of this key/value pair
+    V val;       // The value of this key/value pair
+
+    /// Construct a data_t
+    ///
+    /// @param _key The key that is stored in this node
+    /// @param _val The value that is stored in this node
+    data_t(const K &_key, const V &_val) : node_t(), key(_key), val(_val) {
+      TX_CTOR;
+    }
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~data_t() {}
+  };
+
+  node_t *const head; // The list head pointer
+  node_t *const tail; // The list tail pointer
+
+public:
+  /// Default construct a list by constructing and connecting two sentinel nodes
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object
+  dlist_omap(DESCRIPTOR *, auto *cfg) : head(new node_t()), tail(new node_t()) {
+    head->next = tail;
+    tail->prev = head;
+  }
+
+private:
+  /// get_leq is an inclusive predecessor query that returns the largest node
+  /// whose key is <= the provided key.  It can return the head sentinel, but
+  /// not the tail sentinel.
+  ///
+  /// @param key The key for which we are doing a predecessor query.
+  ///
+  /// @return The node that was found
+  node_t *get_leq(const K key) {
+    // Start at the head; read the next now, to avoid reading it in multiple
+    // iterations of the loop
+    node_t *curr = head;
+    auto *next = curr->next;
+
+    // Starting at `next`, search for key.  Breaking out of this will take us
+    // back to the top of the function.
+    while (true) {
+      // Case 1: `next` is tail --> stop the search at curr
+      if (next == tail)
+        return curr;
+
+      // read next's `next` and `key`
+      auto next_next = next->next;
+      auto nkey = static_cast<data_t *>(next)->key;
+
+      // Case 2: `next` is a data node: stop if next->key >= key
+      if (nkey > key)
+        return curr;
+      if (nkey == key)
+        return next;
+
+      // Case 3: keep traversing to `next`
+      curr = next;
+      next = next_next;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  Unused thread context
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(DESCRIPTOR *, const K &key, V &val) {
+    TX_RAII;
+    // get_leq will use a read-only transaction to find the largest node with
+    // a key <= `key`.
+    auto n = get_leq(key);
+
+    // Since we have EBR, we can read n.key without validating and fast-fail
+    // on key-not-found
+    if (n == head || static_cast<data_t *>(n)->key != key)
+      return false;
+
+    // NB: given EBR, we don't need to worry about n._obj being deleted, so
+    //     we don't need to validate before looking at the value
+    data_t *dn = static_cast<data_t *>(n);
+    val = dn->val;
+    return true;
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  Unused thread context
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(DESCRIPTOR *, const K &key, V &val) {
+    TX_RAII;
+    auto n = get_leq(key);
+    if (n != head && static_cast<data_t *>(n)->key == key)
+      return false;
+
+    auto next = n->next;
+
+    // stitch in a new node
+    data_t *new_dn = new data_t(key, val);
+    new_dn->next = next;
+    new_dn->prev = n;
+    n->next = new_dn;
+    next->prev = new_dn;
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  Unused thread context
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(DESCRIPTOR *, const K &key) {
+    TX_RAII;
+    auto n = get_leq(key);
+    if (n == head || static_cast<data_t *>(n)->key != key)
+      return false;
+
+    // unstitch it
+    auto pred = n->prev, succ = n->next;
+    pred->next = succ;
+    succ->prev = pred;
+    delete (n);
+    return true;
+  }
+};
diff --git a/artifact/ds/xSTM/ibst_omap.h b/artifact/ds/xSTM/ibst_omap.h
new file mode 100644
index 0000000..cd234fa
--- /dev/null
+++ b/artifact/ds/xSTM/ibst_omap.h
@@ -0,0 +1,260 @@
+#pragma once
+
+#include "../../policies/xSTM/common/tm_api.h"
+
+// NB: We need an operator new().  It can just forward to malloc()
+TX_RENAME(_Znwm) void *my_new(std::size_t size) {
+  void *ptr = malloc(size);
+  return ptr;
+}
+
+/// An ordered map, implemented as an unbalanced, internal binary search tree.
+/// This map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param DESCRIPTOR A thread descriptor type, for safe memory reclamation
+template <typename K, typename V, class DESCRIPTOR> class ibst_omap {
+
+  /// An easy-to-remember way of indicating the left and right children
+  enum DIRS { LEFT = 0, RIGHT = 1 };
+
+  /// node_t is the base type for all tree nodes.  It doesn't have key/value
+  /// fields.
+  struct node_t {
+    /// The node's children.  Be sure to use LEFT and RIGHT to index it
+    node_t *children[2];
+
+    /// Construct a node_t.  This should only be called from a writer
+    /// transaction
+    ///
+    /// @param _left  The left child of this node
+    /// @param _right The right child of this node
+    node_t(node_t *_left = nullptr, node_t *_right = nullptr) {
+      TX_CTOR;
+      children[LEFT] = _left;
+      children[RIGHT] = _right;
+    }
+  };
+
+  /// A pair holding a child node and its parent
+  struct ret_pair_t {
+    node_t *child;  // The child
+    node_t *parent; // The parent of that child
+  };
+
+  /// Our tree uses a sentinel root node, so that we always have a valid node
+  /// for which to compute an orec.  The sentinel's *LEFT* child is the true
+  /// root of the tree.  That is, logically sentinel has the value "TOP".
+  node_t *sentinel;
+
+  /// data_t is the type for all internal and leaf nodes in the data structure.
+  /// It extends the base type with a key and value.
+  ///
+  /// NB: keys are *not* const, because we want to overwrite nodes instead of
+  ///     swapping them
+  struct data_t : public node_t {
+    K key; // The key stored in this node
+    V val; // The value stored in this node
+
+    /// Construct a node
+    ///
+    /// @param _left left child of the node
+    /// @param _right right child of the node
+    /// @param _key the key of the node
+    /// @param _val the value of the node
+    data_t(node_t *_left, node_t *_right, const K &_key, V &_val)
+        : node_t(_left, _right), key(_key), val(_val) {
+      TX_CTOR;
+    }
+  };
+
+public:
+  /// Default construct an empty tree
+  ///
+  /// @param me  The operation that is constructing the list
+  /// @param cfg A configuration object
+  ibst_omap(DESCRIPTOR *, auto *cfg) {
+    // NB: Even though the constructor is operating on private data, it needs a
+    //     TM context in order to use tm_fields
+    sentinel = new node_t();
+  }
+
+private:
+  /// Search for a `key` in the tree, and return the node holding it, as well
+  /// as the node's parent.  If the key is not found, return null, and the
+  /// node that ought to be parent of the (not found) `key`.
+  ///
+  /// NB: The caller is responsible for clearing the checkpoint stack before
+  ///     calling get_node().
+  ///
+  /// @param key The key to search for
+  ///
+  /// @return {found, parent} if `key` is in the tree
+  ///         {nullptr, parent} if `key` is not in the tree
+  ret_pair_t get_node(const K &key) {
+    // Traverse downward to the target node:
+    node_t *parent = sentinel;
+    node_t *child = parent->children[LEFT];
+
+    // Traverse downward from the parent until we find null child or `key`
+    while (true) {
+      // nullptr == not found, so stop.  We know parent was valid, so we can
+      // just return it
+      if (!child)
+        return {nullptr, parent};
+
+      // It's time to move downward.  Read fields of child and grandchild
+      //
+      // NB: we may not use grandchild, but it's better to read it here
+      auto child_key = static_cast<data_t *>(child)->key;
+      auto grandchild = child->children[(key < child_key) ? LEFT : RIGHT];
+
+      // If the child key matches, return {child, parent}.  We know both are
+      // valid (parent came from stack; we just checked child)
+      //
+      // NB: the snapshotting code requires that no node with matching key
+      //     goes into `snapshots`
+      if (child_key == key)
+        return {child, parent};
+
+      // Otherwise traverse downward
+      parent = child;
+      child = grandchild;
+    }
+  }
+
+  /// Given a node and its orec value, find the tree node that holds the key
+  /// that logically succeeds it (i.e., the leftmost descendent of the right
+  /// child)
+  ///
+  /// NB: The caller must ensure that `node` has a valid right child before
+  ///     calling this method
+  ///
+  /// @param node An object and orec value to use as the starting point
+  ///
+  /// @return {{found, orec}, {parent, orec}} if no inconsistency occurs
+  ///         {{nullptr, 0},  {nullptr, 0}}   on any consistency violation
+  ret_pair_t get_succ_pair(node_t *node) {
+    // Read the right child
+    node_t *parent = node, *child = node->children[RIGHT];
+
+    // Find the leftmost non-null node in the tree rooted at child
+    while (true) {
+      auto next = child->children[LEFT];
+      // If next is null, `child` is the successor.  Otherwise keep traversing
+      if (!next)
+        return {child, parent};
+      parent = child;
+      child = next;
+    }
+  }
+
+public:
+  /// Search the data structure for a node with key `key`.  If not found, return
+  /// false.  If found, return true, and set `val` to the value associated with
+  /// `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key to search
+  /// @param val A ref parameter for returning key's value, if found
+  ///
+  /// @return True if the key is found, false otherwise.  The reference
+  ///         parameter `val` is only valid when the return value is true.
+  bool get(DESCRIPTOR *, const K &key, V &val) {
+    TX_RAII;
+    // Get the node that holds `key`, if it is present, and also its parent.
+    // If it isn't present, we'll get a null pointer.  That corresponds to a
+    // consistent read of the parent, which means we already linearized and
+    // we're done
+    auto [curr, _] = get_node(key);
+    if (curr == nullptr)
+      return false;
+
+    // read the value
+    auto dn = static_cast<data_t *>(curr);
+    val = dn->val;
+    return true;
+  }
+
+  /// Create a mapping from the provided `key` to the provided `val`, but only
+  /// if no such mapping already exists.  This method does *not* have upsert
+  /// behavior for keys already present.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to create
+  /// @param val The value for the mapping to create
+  ///
+  /// @return True if the value was inserted, false otherwise.
+  bool insert(DESCRIPTOR *, const K &key, V &val) {
+    TX_RAII;
+    auto [child, parent] = get_node(key);
+    if (child)
+      return false;
+    // We must have a null child and a valid parent.  If it's sentinel, we
+    // must insert as LEFT.  Otherwise, compute which child to set.
+    auto cID = (parent == sentinel ? LEFT : RIGHT) &
+               (key > static_cast<data_t *>(parent)->key);
+    auto new_child = new data_t(nullptr, nullptr, key, val);
+    parent->children[cID] = new_child;
+    return true;
+  }
+
+  /// Clear the mapping involving the provided `key`.
+  ///
+  /// @param me  The calling thread's descriptor
+  /// @param key The key for the mapping to eliminate
+  ///
+  /// @return True if the key was found and removed, false otherwise
+  bool remove(DESCRIPTOR *, const K &key) {
+    TX_RAII;
+    auto [target, parent] = get_node(key);
+    if (target == nullptr)
+      return false;
+
+    // Read the target node's children
+    data_t *t_child[2];
+    t_child[RIGHT] = static_cast<data_t *>(target->children[RIGHT]);
+    t_child[LEFT] = static_cast<data_t *>(target->children[LEFT]);
+
+    // If either child is null, and if the parent is still valid, then we can
+    // unstitch the target, link the parent to a grandchild and we're done.
+    if (!t_child[LEFT] || !t_child[RIGHT]) {
+      // Acquire the (possibly null) grandchild to link to the parent
+      auto gID = t_child[LEFT] ? LEFT : RIGHT;
+
+      // Which child of the parent is target?
+      auto cID = parent->children[LEFT] == target ? LEFT : RIGHT;
+
+      // Unstitch and reclaim
+      parent->children[cID] = t_child[gID];
+      delete (target);
+      return true;
+    }
+
+    // `target` has two children.  WLOG, the leftmost descendent of the right
+    // child is `target`'s successor, and must have at most one child.  We
+    // want to put that node's key and value into `target`, and then remove
+    // that node by setting its parent's LEFT to its RIGHT (which might be
+    // null).
+    auto [succ, s_parent] = get_succ_pair(target);
+
+    // If target's successor is target's right child, then target._ver must
+    // equal s_parent._ver.  As long as we lock target before we try
+    // to lock s_parent, we'll get the check for free.
+
+    // Copy `succ`'s key/value into `target`
+    static_cast<data_t *>(target)->key = static_cast<data_t *>(succ)->key;
+    static_cast<data_t *>(target)->val = static_cast<data_t *>(succ)->val;
+
+    // Unstitch `succ` by setting its parent's left to its right
+    // Case 1: there are intermediate nodes between target and successor
+    if (s_parent != target)
+      s_parent->children[LEFT] = succ->children[RIGHT];
+    // Case 2: target is successor's parent
+    else
+      s_parent->children[RIGHT] = succ->children[RIGHT];
+    delete (succ);
+    return true;
+  }
+};
diff --git a/artifact/ds/xSTM/rbtree_omap.h b/artifact/ds/xSTM/rbtree_omap.h
new file mode 100644
index 0000000..e964f50
--- /dev/null
+++ b/artifact/ds/xSTM/rbtree_omap.h
@@ -0,0 +1,370 @@
+#pragma once
+
+#include "../../policies/xSTM/common/tm_api.h"
+
+// NB: We need an operator new().  It can just forward to malloc()
+TX_RENAME(_Znwm) void *my_new(std::size_t size) {
+  void *ptr = malloc(size);
+  return ptr;
+}
+
+/// An ordered map, implemented as a balanced, internal binary search tree. This
+/// map supports get(), insert(), and remove() operations.
+///
+/// @param K          The type of the keys stored in this map
+/// @param V          The type of the values stored in this map
+/// @param DESCRIPTOR A thread descriptor type, for safe memory reclamation
+/// @param dummy_key  A default key to use
+/// @param dummy_val  A default value to use
+template <typename K, typename V, class DESCRIPTOR, K dummy_key, V dummy_val>
+class rbtree_omap {
+  /// An enum for node colors
+  enum Color { RED, BLACK };
+
+  /// nodes in a red/black tree
+  struct node_t {
+    K key;            // Key stored at this node
+    V val;            // Value stored at this node
+    Color color;      // color (RED or BLACK)
+    node_t *parent;   // pointer to parent
+    int ID;           // 0 or 1 to indicate if left or right child
+    node_t *child[2]; // pointers to children
+
+    /// basic constructor
+    node_t(Color color, K key, V val, node_t *parent, long ID, node_t *child0,
+           node_t *child1)
+        : key(key), val(val), color(color), parent(parent), ID(ID) {
+      TX_CTOR;
+      child[0] = child0;
+      child[1] = child1;
+    }
+  };
+
+  node_t *sentinel; // The (sentinel) root node of the tree
+
+public:
+  /// Construct a tree by creating a sentinel node at the head
+  rbtree_omap(DESCRIPTOR *, auto *)
+      : sentinel(new node_t(BLACK, dummy_key, dummy_val, nullptr, 0, nullptr,
+                            nullptr)) {}
+
+  // binary search for the node that has v as its value
+  bool get(DESCRIPTOR *, const K &key, V &val) const {
+    void *dummy;
+    bool res = false;
+    TX_PRIVATE_STACK_REGION(&dummy);
+    {
+      TX_RAII;
+      const node_t *curr = sentinel->child[0];
+      while (curr != nullptr && curr->key != key)
+        curr = curr->child[(key < curr->key) ? 0 : 1];
+      res = (curr != nullptr) && (curr->key == key);
+      if (res)
+        val = curr->val;
+    }
+    return res;
+  }
+
+  // insert a node with k/v as its pair if no such key exists in the tree
+  bool insert(DESCRIPTOR *, const K &key, V &val) {
+    void *dummy;
+    bool res = false;
+    TX_PRIVATE_STACK_REGION(&dummy);
+    {
+      TX_RAII;
+      // find insertion point
+      node_t *curr = sentinel;
+      int cID = 0;
+      node_t *child = curr->child[cID];
+      while (child != nullptr) {
+        long ckey = child->key;
+        if (ckey == key)
+          return false;
+        cID = key < ckey ? 0 : 1;
+        curr = child;
+        child = curr->child[cID];
+      }
+
+      // make a red node and connect it to `curr`
+      res = true;
+      child = new node_t(RED, key, val, curr, cID, nullptr, nullptr);
+      curr->child[cID] = child;
+
+      // balance the tree
+      while (true) {
+        // Get the parent, grandparent, and their relationship
+        node_t *parent = child->parent;
+        int pID = parent->ID;
+        node_t *gparent = parent->parent;
+
+        // Easy exit condition: no more propagation needed
+        if ((gparent == sentinel) || (BLACK == parent->color))
+          break;
+
+        // If parent's sibling is also red, we push red up to grandparent
+        node_t *psib = gparent->child[1 - pID];
+        if ((psib != nullptr) && (RED == psib->color)) {
+          parent->color = BLACK;
+          psib->color = BLACK;
+          gparent->color = RED;
+          child = gparent;
+          continue; // restart loop at gparent level
+        }
+
+        int cID = child->ID;
+        if (cID != pID) {
+          // set child's child to parent's cID'th child
+          node_t *baby = child->child[1 - cID];
+          parent->child[cID] = baby;
+          if (baby != nullptr) {
+            baby->parent = parent;
+            baby->ID = cID;
+          }
+          // move parent into baby's position as a child of child
+          child->child[1 - cID] = parent;
+          parent->parent = child;
+          parent->ID = 1 - cID;
+          // move child into parent's spot as pID'th child of gparent
+          gparent->child[pID] = child;
+          child->parent = gparent;
+          child->ID = pID;
+          // now swap child with curr and fall through
+          node_t *temp = child;
+          child = parent;
+          parent = temp;
+        }
+
+        parent->color = BLACK;
+        gparent->color = RED;
+        // promote parent
+        node_t *ggparent = gparent->parent;
+        int gID = gparent->ID;
+        node_t *ochild = parent->child[1 - pID];
+        // make gparent's pIDth child ochild
+        gparent->child[pID] = ochild;
+        if (ochild != nullptr) {
+          ochild->parent = gparent;
+          ochild->ID = pID;
+        }
+        // make gparent the 1-pID'th child of parent
+        parent->child[1 - pID] = gparent;
+        gparent->parent = parent;
+        gparent->ID = 1 - pID;
+        // make parent the gIDth child of ggparent
+        ggparent->child[gID] = parent;
+        parent->parent = ggparent;
+        parent->ID = gID;
+      }
+
+      // now just set the root to black
+      node_t *root = sentinel->child[0];
+      if (root->color != BLACK)
+        root->color = BLACK;
+    }
+
+    return res;
+  }
+
+  // remove the node with k as its key if it exists in the tree
+  bool remove(DESCRIPTOR *, const K &key) {
+    TX_RAII;
+    // find key
+    node_t *curr = sentinel->child[0];
+
+    while (curr != nullptr) {
+      int ckey = curr->key;
+      if (ckey == key)
+        break;
+      curr = curr->child[key < ckey ? 0 : 1];
+    }
+
+    // if we didn't find v, we're done
+    if (curr == nullptr)
+      return false;
+
+    // If `curr` has two children, we need to swap it with its successor
+    if ((curr->child[1] != nullptr) && ((curr->child[0]) != nullptr)) {
+      node_t *leftmost = curr->child[1];
+      while (leftmost->child[0] != nullptr)
+        leftmost = leftmost->child[0];
+      curr->key = leftmost->key;
+      curr->val = leftmost->val;
+      curr = leftmost;
+    }
+
+    // extract x from the tree and prep it for deletion
+    node_t *parent = curr->parent;
+    node_t *child = curr->child[(curr->child[0] != nullptr) ? 0 : 1];
+    int xID = curr->ID;
+    parent->child[xID] = child;
+    if (child != nullptr) {
+      child->parent = parent;
+      child->ID = xID;
+    }
+
+    // fix black height violations
+    if ((BLACK == curr->color) && (child != nullptr)) {
+      if (RED == child->color) {
+        curr->color = RED;
+        child->color = BLACK;
+      }
+    }
+
+    // rebalance... be sure to save the deletion target!
+    node_t *to_delete = curr;
+    while (true) {
+      parent = curr->parent;
+      if ((parent == sentinel) || (RED == curr->color))
+        break;
+      int cID = curr->ID;
+      node_t *sibling = parent->child[1 - cID];
+
+      // we'd like y's sibling s to be black
+      // if it's not, promote it and recolor
+      if (RED == sibling->color) {
+        /*
+            Bp          Bs
+           / \         / \
+          By  Rs  =>  Rp  B2
+          / \        / \
+         B1 B2     By  B1
+       */
+        parent->color = RED;
+        sibling->color = BLACK;
+        // promote sibling
+        node_t *gparent = parent->parent;
+        int pID = parent->ID;
+        node_t *nephew = sibling->child[cID];
+        // set nephew as 1-cID child of parent
+        parent->child[1 - cID] = nephew;
+        nephew->parent = parent;
+        nephew->ID = (1 - cID);
+        // make parent the cID child of the sibling
+        sibling->child[cID] = parent;
+        parent->parent = sibling;
+        parent->ID = cID;
+        // make sibling the pID child of gparent
+        gparent->child[pID] = sibling;
+        sibling->parent = gparent;
+        sibling->ID = pID;
+        // reset sibling
+        sibling = nephew;
+      }
+
+      // Handle when the far nephew is red
+      node_t *n = sibling->child[1 - cID];
+      if ((n != nullptr) && (RED == (n->color))) {
+        /*
+           ?p          ?s
+           / \         / \
+          By  Bs  =>  Bp  Bn
+         / \         / \
+        ?1 Rn      By  ?1
+        */
+        sibling->color = parent->color;
+        parent->color = BLACK;
+        n->color = BLACK;
+        // promote sibling
+        node_t *gparent = parent->parent;
+        int pID = parent->ID;
+        node_t *nephew = sibling->child[cID];
+        // make nephew the 1-cID child of parent
+        parent->child[1 - cID] = nephew;
+        if (nephew != nullptr) {
+          nephew->parent = parent;
+          nephew->ID = 1 - cID;
+        }
+        // make parent the cID child of the sibling
+        sibling->child[cID] = parent;
+        parent->parent = sibling;
+        parent->ID = cID;
+        // make sibling the pID child of gparent
+        gparent->child[pID] = sibling;
+        sibling->parent = gparent;
+        sibling->ID = pID;
+        break; // problem solved
+      }
+
+      n = sibling->child[cID];
+      if ((n != nullptr) && (RED == (n->color))) {
+        /*
+             ?p          ?p
+             / \         / \
+           By  Bs  =>  By  Bn
+               / \           \
+              Rn B1          Rs
+                               \
+                               B1
+        */
+        sibling->color = RED;
+        n->color = BLACK;
+        // promote n
+        node_t *gneph = n->child[1 - cID];
+        // make gneph the cID child of sibling
+        sibling->child[cID] = gneph;
+        if (gneph != nullptr) {
+          gneph->parent = sibling;
+          gneph->ID = cID;
+        }
+        // make sibling the 1-cID child of n
+        n->child[1 - cID] = sibling;
+        sibling->parent = n;
+        sibling->ID = 1 - cID;
+        // make n the 1-cID child of parent
+        parent->child[1 - cID] = n;
+        n->parent = parent;
+        n->ID = 1 - cID;
+        // swap sibling and `n`
+        node_t *temp = sibling;
+        sibling = n;
+        n = temp;
+
+        // now the far nephew is red... copy of code from above
+        sibling->color = (parent->color);
+        parent->color = BLACK;
+        n->color = BLACK;
+        // promote sibling
+        node_t *gparent = parent->parent;
+        int pID = parent->ID;
+        node_t *nephew = sibling->child[cID];
+        // make nephew the 1-cID child of parent
+        parent->child[1 - cID] = nephew;
+        if (nephew != nullptr) {
+          nephew->parent = parent;
+          nephew->ID = 1 - cID;
+        }
+        // make parent the cID child of the sibling
+        sibling->child[cID] = parent;
+        parent->parent = sibling;
+        parent->ID = cID;
+        // make sibling the pID child of gparent
+        gparent->child[pID] = sibling;
+        sibling->parent = gparent;
+        sibling->ID = pID;
+
+        break; // problem solved
+      }
+
+      /*
+           ?p          ?p
+           / \         / \
+         Bx  Bs  =>  Bp  Rs
+             / \         / \
+            B1 B2      B1  B2
+       */
+
+      sibling->color = RED; // propagate upwards
+
+      // advance to parent and balance again
+      curr = parent;
+    }
+
+    // if curr was red, this fixes the balance
+    curr->color = BLACK;
+
+    // free the node and return
+    free(to_delete);
+
+    return true;
+  }
+};
diff --git a/artifact/ds/xSTM/rbtree_tl2_omap.h b/artifact/ds/xSTM/rbtree_tl2_omap.h
new file mode 100644
index 0000000..e1142ae
--- /dev/null
+++ b/artifact/ds/xSTM/rbtree_tl2_omap.h
@@ -0,0 +1,428 @@
+/* =============================================================================
+ *
+ * rbtree.h
+ * -- Red-black balanced binary search tree
+ *
+ * =============================================================================
+ *
+ * Copyright (C) Sun Microsystems Inc., 2006.  All Rights Reserved.
+ * Authors: Dave Dice, Nir Shavit, Ori Shalev.
+ *
+ * STM: Transactional Locking for Disjoint Access Parallelism
+ *
+ * Transactional Locking II,
+ * Dave Dice, Ori Shalev, Nir Shavit
+ * DISC 2006, Sept 2006, Stockholm, Sweden.
+ */
+
+#pragma once
+
+#include "../../policies/xSTM/common/tm_api.h"
+
+template <typename K, typename V, class DESCRIPTOR, K dummy_key, V dummy_val>
+class rbtree_tl2_omap {
+
+  static const int RED = 0;
+  static const int BLACK = 1;
+
+  struct node_t {
+    K key;
+    V val;
+    node_t *p;
+    node_t *l;
+    node_t *r;
+    long c;
+    char dummy[64];
+  };
+
+  node_t *root;
+  char dummy[64];
+
+  node_t *lookup(K k) {
+    node_t *p = this->root;
+    while (p != nullptr) {
+      if (k == p->key) {
+        return p;
+      }
+      p = (k < p->key) ? p->l : p->r;
+    }
+    return nullptr;
+  }
+
+  void rotateLeft(node_t *x) {
+    node_t *r = x->r;
+    node_t *rl = r->l;
+    x->r = rl;
+    if (rl != nullptr) {
+      rl->p = (x);
+    }
+
+    node_t *xp = (((((x))->p)));
+    ((r)->p) = (xp);
+    if (xp == nullptr) {
+      ((this)->root) = (r);
+    } else if ((((((xp))->l))) == x) {
+      ((xp)->l) = (r);
+    } else {
+      ((xp)->r) = (r);
+    }
+    ((r)->l) = (x);
+    ((x)->p) = (r);
+  }
+
+  void rotateRight(node_t *x) {
+    node_t *l = (((((x))->l)));
+    node_t *lr = (((((l))->r)));
+    ((x)->l) = (lr);
+    if (lr != nullptr) {
+      ((lr)->p) = (x);
+    }
+    node_t *xp = (((((x))->p)));
+    ((l)->p) = (xp);
+    if (xp == nullptr) {
+      ((this)->root) = (l);
+    } else if ((((((xp))->r))) == x) {
+      ((xp)->r) = (l);
+    } else {
+      ((xp)->l) = (l);
+    }
+    ((l)->r) = (x);
+    ((x)->p) = (l);
+  }
+
+  node_t *parentOf(node_t *n) { return (n ? (((((n))->p))) : nullptr); }
+
+  node_t *leftOf(node_t *n) { return (n ? (((((n))->l))) : nullptr); }
+
+  node_t *rightOf(node_t *n) { return (n ? (((((n))->r))) : nullptr); }
+
+  long colorOf(node_t *n) { return (n ? (long)(((((n))->c))) : BLACK); }
+
+  void setColor(node_t *n, long c) {
+    if (n != nullptr) {
+      ((n)->c) = (c);
+    }
+  }
+
+  void fixAfterInsertion(node_t *x) {
+    ((x)->c) = (RED);
+    while (x != nullptr && x != (((((this))->root)))) {
+      node_t *xp = (((((x))->p)));
+      if (((xp)->c) != RED) {
+        break;
+      }
+
+      if (parentOf(x) == leftOf(parentOf(parentOf(x)))) {
+        node_t *y = rightOf(parentOf(parentOf(x)));
+        if (colorOf(y) == RED) {
+          setColor(parentOf(x), BLACK);
+          setColor(y, BLACK);
+          setColor(parentOf(parentOf(x)), RED);
+          x = parentOf(parentOf(x));
+        } else {
+          if (x == rightOf(parentOf(x))) {
+            x = parentOf(x);
+            rotateLeft(x);
+          }
+          setColor(parentOf(x), BLACK);
+          setColor(parentOf(parentOf(x)), RED);
+          if (parentOf(parentOf(x)) != nullptr) {
+            rotateRight(parentOf(parentOf(x)));
+          }
+        }
+      } else {
+        node_t *y = leftOf(parentOf(parentOf(x)));
+        if (colorOf(y) == RED) {
+          setColor(parentOf(x), BLACK);
+          setColor(y, BLACK);
+          setColor(parentOf(parentOf(x)), RED);
+          x = parentOf(parentOf(x));
+        } else {
+          if (x == leftOf(parentOf(x))) {
+            x = parentOf(x);
+            rotateRight(x);
+          }
+          setColor(parentOf(x), BLACK);
+          setColor(parentOf(parentOf(x)), RED);
+          if (parentOf(parentOf(x)) != nullptr) {
+            rotateLeft(parentOf(parentOf(x)));
+          }
+        }
+      }
+    }
+    node_t *ro = (((((this))->root)));
+    if (((ro)->c) != BLACK) {
+      ((ro)->c) = (BLACK);
+    }
+  }
+
+  node_t *insertIt(K k, V v, node_t *n) {
+    node_t *t = (((((this))->root)));
+    if (t == nullptr) {
+      if (n == nullptr) {
+        return nullptr;
+      }
+
+      ((n)->l) = (nullptr);
+      ((n)->r) = (nullptr);
+      ((n)->p) = (nullptr);
+      ((n)->key) = (k);
+      ((n)->val) = (v);
+      ((n)->c) = (BLACK);
+      ((this)->root) = (n);
+      return nullptr;
+    }
+
+    for (;;) {
+      if (k == t->key) {
+        return t;
+      } else if (k < t->key) {
+        node_t *tl = (((((t))->l)));
+        if (tl != nullptr) {
+          t = tl;
+        } else {
+          ((n)->l) = (nullptr);
+          ((n)->r) = (nullptr);
+          ((n)->key) = (k);
+          ((n)->val) = (v);
+          ((n)->p) = (t);
+          ((t)->l) = (n);
+          fixAfterInsertion(n);
+          return nullptr;
+        }
+      } else {
+        node_t *tr = (((((t))->r)));
+        if (tr != nullptr) {
+          t = tr;
+        } else {
+          ((n)->l) = (nullptr);
+          ((n)->r) = (nullptr);
+          ((n)->key) = (k);
+          ((n)->val) = (v);
+          ((n)->p) = (t);
+          ((t)->r) = (n);
+          fixAfterInsertion(n);
+          return nullptr;
+        }
+      }
+    }
+  }
+
+  node_t *successor(node_t *t) {
+    if (t == nullptr) {
+      return nullptr;
+    } else if ((((((t))->r))) != nullptr) {
+      node_t *p = (((((t))->r)));
+      while ((((((p))->l))) != nullptr) {
+        p = (((((p))->l)));
+      }
+      return p;
+    } else {
+      node_t *p = (((((t))->p)));
+      node_t *ch = t;
+      while (p != nullptr && ch == (((((p))->r)))) {
+        ch = p;
+        p = (((((p))->p)));
+      }
+      return p;
+    }
+  }
+
+  void fixAfterDeletion(node_t *x) {
+    while (x != (((((this))->root))) && colorOf(x) == BLACK) {
+      if (x == leftOf(parentOf(x))) {
+        node_t *sib = rightOf(parentOf(x));
+        if (colorOf(sib) == RED) {
+          setColor(sib, BLACK);
+          setColor(parentOf(x), RED);
+          rotateLeft(parentOf(x));
+          sib = rightOf(parentOf(x));
+        }
+        if (colorOf(leftOf(sib)) == BLACK && colorOf(rightOf(sib)) == BLACK) {
+          setColor(sib, RED);
+          x = parentOf(x);
+        } else {
+          if (colorOf(rightOf(sib)) == BLACK) {
+            setColor(leftOf(sib), BLACK);
+            setColor(sib, RED);
+            rotateRight(sib);
+            sib = rightOf(parentOf(x));
+          }
+          setColor(sib, colorOf(parentOf(x)));
+          setColor(parentOf(x), BLACK);
+          setColor(rightOf(sib), BLACK);
+          rotateLeft(parentOf(x));
+
+          x = (((((this))->root)));
+        }
+      } else {
+        node_t *sib = leftOf(parentOf(x));
+        if (colorOf(sib) == RED) {
+          setColor(sib, BLACK);
+          setColor(parentOf(x), RED);
+          rotateRight(parentOf(x));
+          sib = leftOf(parentOf(x));
+        }
+        if (colorOf(rightOf(sib)) == BLACK && colorOf(leftOf(sib)) == BLACK) {
+          setColor(sib, RED);
+          x = parentOf(x);
+        } else {
+          if (colorOf(leftOf(sib)) == BLACK) {
+            setColor(rightOf(sib), BLACK);
+            setColor(sib, RED);
+            rotateLeft(sib);
+            sib = leftOf(parentOf(x));
+          }
+          setColor(sib, colorOf(parentOf(x)));
+          setColor(parentOf(x), BLACK);
+          setColor(leftOf(sib), BLACK);
+          rotateRight(parentOf(x));
+
+          x = (((((this))->root)));
+        }
+      }
+    }
+
+    if (x != nullptr && ((x)->c) != BLACK) {
+      ((x)->c) = (BLACK);
+    }
+  }
+
+  node_t *delete_node(node_t *p) {
+
+    if ((((((p))->l))) != nullptr && (((((p))->r))) != nullptr) {
+      node_t *s = successor(p);
+      ((p)->key) = ((((((s))->key))));
+      ((p)->val) = ((((((s))->val))));
+      p = s;
+    }
+
+    node_t *replacement =
+        (((((((p))->l))) != nullptr) ? (((((p))->l))) : (((((p))->r))));
+
+    if (replacement != nullptr) {
+
+      ((replacement)->p) = ((((((p))->p))));
+      node_t *pp = (((((p))->p)));
+      if (pp == nullptr) {
+        ((this)->root) = (replacement);
+      } else if (p == (((((pp))->l)))) {
+        ((pp)->l) = (replacement);
+      } else {
+        ((pp)->r) = (replacement);
+      }
+
+      ((p)->l) = (nullptr);
+      ((p)->r) = (nullptr);
+      ((p)->p) = (nullptr);
+
+      if (((p)->c) == BLACK) {
+        fixAfterDeletion(replacement);
+      }
+    } else if ((((((p))->p))) == nullptr) {
+      ((this)->root) = (nullptr);
+    } else {
+      if (((p)->c) == BLACK) {
+        fixAfterDeletion(p);
+      }
+      node_t *pp = (((((p))->p)));
+      if (pp != nullptr) {
+        if (p == (((((pp))->l)))) {
+          ((pp)->l) = (nullptr);
+        } else if (p == (((((pp))->r)))) {
+          ((pp)->r) = (nullptr);
+        }
+        ((p)->p) = (nullptr);
+      }
+    }
+    return p;
+  }
+
+  long compareKeysDefault(const void *a, const void *b) {
+    return ((long)a - (long)b);
+  }
+
+  void releaseNode(node_t *n) { free(n); }
+
+  void freeNode(node_t *n) {
+    if (n) {
+      freeNode(n->l);
+      freeNode(n->r);
+      releaseNode(n);
+    }
+  }
+
+  node_t *getNode() {
+    node_t *n = (node_t *)malloc(sizeof(*n));
+    return n;
+  }
+
+public:
+  rbtree_tl2_omap(DESCRIPTOR *me, auto *cfg) : root(nullptr) {}
+
+  void rbtree_free(rbtree_tl2_omap *r) {
+    freeNode(r->root);
+    free(r);
+  }
+
+  bool insert(DESCRIPTOR *me, const K &key, V &val) {
+    void *dummy;
+    TX_PRIVATE_STACK_REGION(&dummy);
+    {
+      TX_RAII;
+      node_t *node = getNode();
+      node_t *ex = insertIt(key, val, node);
+      if (ex != nullptr) {
+        releaseNode(node);
+      }
+      return ((ex == nullptr) ? true : false);
+    }
+  }
+
+  bool remove(DESCRIPTOR *me, const K &key) {
+    void *dummy;
+    TX_PRIVATE_STACK_REGION(&dummy);
+    {
+      TX_RAII;
+      node_t *node = nullptr;
+      node = lookup(key);
+      if (node != nullptr) {
+        node = delete_node(node);
+      }
+      if (node != nullptr) {
+        releaseNode(node);
+      }
+      return ((node != nullptr) ? true : false);
+    }
+  }
+
+  bool rbtree_update(K key, V val) {
+    node_t *nn = getNode();
+    node_t *ex = insert(key, val, nn);
+    if (ex != nullptr) {
+      ((ex)->val) = (val);
+      releaseNode(nn);
+      return true;
+    }
+    return false;
+  }
+
+  V get(DESCRIPTOR *me, const K &key, V &val) {
+    void *dummy;
+    TX_PRIVATE_STACK_REGION(&dummy);
+    {
+      TX_RAII;
+      node_t *n = lookup(key);
+      if (n != nullptr) {
+        val = ((n)->val);
+        return true;
+      }
+      return false;
+    }
+  }
+
+  long rbtree_contains(K key) {
+    node_t *n = lookup(key);
+    return (n != nullptr);
+  }
+};
diff --git a/artifact/policies/README.md b/artifact/policies/README.md
new file mode 100644
index 0000000..cf941a2
--- /dev/null
+++ b/artifact/policies/README.md
@@ -0,0 +1,78 @@
+# Synchronization Policies
+
+This folder stores the source code for the synchronization policies that are
+discussed in the paper.
+
+## baseline Policy
+
+This is not really a synchronization policy.  It holds the per-thread
+functionality that is shared among the baseline lock-free and lock-based
+algorithms that we use in our comparison.  This includes things like a
+per-thread pseudorandom number generator, a hash function, and safe memory
+reclamation.  Note that these features are also present in each of our
+synchronization policies.
+
+## exoTM Mechanism
+
+This folder contains the implementation of the exoTM synchronization mechanism.
+
+## handSTM Policy
+
+This folder contains the handSTM policy.  It uses the exoTM mechanism for its
+synchronization.  There are five categories of policy:
+
+- eager_c1: encounter-time locking, undo logging, check-once orecs
+- eager_c2: encounter-time locking, undo logging, check-twice orecs
+- wb_c1: encounter-time locking, redo logging, check-once orecs
+- wb_c2: encounter-time locking, redo logging, check-twice orecs
+- lazy: commit-time locking, redo logging, check-once orecs
+
+Note that each can be instantiated with per-object (PO) or per-stripe (PS)
+orecs.
+
+## hybrid Policy
+
+This folder contains the hybrid policy (handSTM + STMCAS).  It uses the exoTM
+mechanism for its synchronization.  There are three categories of policy:
+
+- wb_c1: encounter-time locking, redo logging, check-once orecs
+- wb_c2: encounter-time locking, redo logging, check-twice orecs
+- lazy: commit-time locking, redo logging, check-once orecs
+
+Note that each can be instantiated with per-object (PO) or per-stripe (PS)
+orecs.
+
+## STMCAS Policy
+
+This folder contains the STMCAS policy.  It uses the exoTM mechanism for its
+synchronization.  It can be instantiated with per-object (PO) or per-stripe (PS)
+orecs.
+
+## xSTM Policy
+
+This folder holds a modified version of the
+[llvm-transmem](https://github.com/mfs409/llvm-transmem) plugin, which provides
+support for TM in C++.  It consists of two sub-components: a plugin (`plugin/`)
+for llvm-15, which instruments transactions, and a set of libraries (`libs/`)
+that implement various TM algorithms.
+
+The folder is modified in the following ways:
+
+- Some names are updated to "xSTM"
+- The plugin has been ported to llvm-15
+- The plugin has support for an RAII interface, which is faster than the
+  original lambda interface.
+- Additional libraries have been added, which use the exoTM mechanism instead of
+  other implementations of orecs and clocks.
+- We removed some TM algorithms (PTM, HTM)
+- We moved some common libraries out of the folder, if they are shared by our
+  exoTM-based policies.
+
+Details about this plugin can be found in the following paper: 
+
+"Simplifying Transactional Memory Support in C++", by PanteA Zardoshti, Tingzhe
+Zhou, Pavithra Balaji, Michel L. Scott, and Michael Spear. ACM Transactions on
+Architecture and Code Optimization (TACO), 2019.
+
+Note that these are all per-stripe (PS) policies, because that is the only
+appropriate way to map orecs to program data in a language-level STM for C++.
\ No newline at end of file
diff --git a/artifact/policies/STMCAS/include/base.h b/artifact/policies/STMCAS/include/base.h
new file mode 100644
index 0000000..c7bc561
--- /dev/null
+++ b/artifact/policies/STMCAS/include/base.h
@@ -0,0 +1,98 @@
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+
+#include "../../exoTM/exotm.h"
+#include "../../include/minivector.h"
+
+#include "../../include/hash.h"
+#include "../../include/orec_policies.h"
+#include "../../include/rdtsc_rand.h"
+#include "../../include/timestamp_smr.h"
+
+/// base_t holds common fields and methods for STMCAS policies.
+///
+/// base_t is a re-usable descriptor.  This means that it can have some
+/// dynamic memory allocation internally.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP> class base_t {
+  using orec_t = exotm_t::orec_t;                                // Orec type
+  using OrecPolicy = OP<timestamp_smr_t::reclaimable_t, orec_t>; // Orec policy
+
+public:
+  /// The maximum value an orec can ever have
+  static const auto END_OF_TIME = exotm_t::END_OF_TIME;
+
+  /// ownable_t from OP, but with a zero-argument constructor
+  struct ownable_t : public OrecPolicy::ownable_t {
+    /// Construct an ownable_t
+    ownable_t() : OrecPolicy::ownable_t(_globals.op) {}
+  };
+
+protected:
+  /// A packet holding all globals for STMCAS
+  struct global_t {
+    timestamp_smr_t::global_t smr;    // Globals for safe memory reclamation
+    typename OrecPolicy::global_t op; // Globals for the orec policy
+  };
+
+  static global_t _globals; // lightweight singleton-like access to the globals
+
+  exotm_t exo;         // The thread's exoTM context
+  timestamp_smr_t smr; // The safe memory reclamation context
+  rdtsc_rand_t rng;    // A random number generator
+
+public:
+  /// A pair consisting of an ownable and its version.  We use this for
+  /// snapshots: _ver is the time at which _obj was last accessed (modified).
+  ///
+  /// NB: For PS orec configurations, we could have SMR at the granularity of
+  ///     steps, but then this would also need an orec reference, since _obj
+  ///     could be deleted between steps.
+  struct snapshot_t {
+    ownable_t *_obj; // An object
+    uint64_t _ver;   // A version number associated with `_obj`
+  };
+
+  minivector<snapshot_t> snapshots; // Nodes observed during a ds operation
+
+protected:
+  /// Construct a base_t
+  base_t() : exo(), smr(_globals.smr) {}
+
+public:
+  /// Return the time when this thread's last write step committed
+  uint64_t get_last_wo_end_time() { return exo.get_last_wo_end_time(); }
+
+  /// Start an operation (notify SMR)
+  void op_begin() { smr.enter(); }
+
+  /// End an operation (notify SMR)
+  void op_end() { smr.exit(_globals.smr); }
+
+  /// A good hash function.  Works nicely to "finalize" after std::hash().
+  ///
+  /// @param val The value to hash
+  ///
+  /// @return A 64-bit hash value
+  uint64_t hash(size_t val) { return mix13_hash(val); }
+
+  /// Produce a random number from a thread-local generator
+  int rand() { return rng.rand(); }
+
+private:
+  // NB:  In order for ccds to be able to do has-a instead of is-a, we need a
+  //      friend relationship
+  friend class ccds_t;
+};
+
+/// STMCAS_GLOBALS_INITIALIZER should be called once, in the main C++ file of a
+/// program.  It defines the globals used by STMCAS policies, so that we can be
+/// sure that any globals declared in this file are defined in a .o file.
+/// Failure to use this correctly will lead to link errors.
+#define STMCAS_GLOBALS_INITIALIZER                                             \
+  template <template <typename, typename> typename T>                          \
+  typename base_t<T>::global_t base_t<T>::_globals;
diff --git a/artifact/policies/STMCAS/include/field.h b/artifact/policies/STMCAS/include/field.h
new file mode 100644
index 0000000..ff1129a
--- /dev/null
+++ b/artifact/policies/STMCAS/include/field.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <atomic>
+
+/// stmcas_field_t is a wrapper around simple types so that they can only be
+/// accessed via STMCAS.
+///
+/// NB: It is the programmer's responsibility to ensure that these fields are
+///     accessed correctly.  We seek razor-thin overheads, so the programmer
+///     needs to validate after a read, or acquire before a write.
+///
+/// @tparam T       The type to store
+/// @tparam STMCAS  The STMCAS version to use
+template <typename T, typename STMCAS> class stmcas_field_t {
+  std::atomic<T> _val; // The value.  Must be atomic per C++ memory model.
+
+public:
+  /// Construct an stmcas_field_t
+  ///
+  /// @param val The initial value
+  explicit stmcas_field_t(T val) : _val(val) {}
+
+  /// Default-construct an stmcas_field_t
+  explicit stmcas_field_t() : _val() {}
+
+  /// Read the field from a RSTEP or WSTEP.  The caller is responsible
+  /// for validating the orec.
+  ///
+  /// @param STEP An unused parameter to restrict access to *STEP contexts
+  ///
+  /// @return The current value
+  T get(typename STMCAS::STEP &) const {
+    // TODO:  We're only concerned about x86 for now, so we'll just use
+    //        memory_order_acquire.  On ARM, we would want to use a relaxed
+    //        read, and have a thread fence later on (i.e., before the
+    //        validate).
+    return _val.load(std::memory_order_acquire);
+  }
+
+  /// Write the field from a WSTEP.  The caller must ensure the
+  /// corresponding orec is owned before calling this.
+  ///
+  /// NB: memory_order_relaxed, because we assume it is owned
+  ///
+  /// @param val   The new value
+  /// @param WSTEP An unused parameter to restrict access to WSTEP contexts
+  void set(T val, typename STMCAS::WSTEP &) {
+    _val.store(val, std::memory_order_relaxed);
+  }
+};
diff --git a/artifact/policies/STMCAS/include/raii.h b/artifact/policies/STMCAS/include/raii.h
new file mode 100644
index 0000000..731b6ff
--- /dev/null
+++ b/artifact/policies/STMCAS/include/raii.h
@@ -0,0 +1,100 @@
+#pragma once
+
+/// STEP is the base for the RSTEP and WSTEP RAII wrappers for the exoTM API
+template <class DESCRIPTOR> struct Step {
+protected:
+  DESCRIPTOR *op; // The thread descriptor for this operation
+
+  /// Construct by recording the descriptor
+  ///
+  /// @param me The thread descriptor
+  Step(DESCRIPTOR *me) : op(me) {}
+
+public:
+  /// Check if an object's orec value is still `val`
+  ///
+  /// @param obj The object whose orec is being checked
+  /// @param val The expected value of obj's orec
+  ///
+  /// @return True if it still matches, false otherwise
+  bool check_continuation(typename DESCRIPTOR::ownable_t *obj, uint64_t val) {
+    return this->op->exo.check_continuation(obj->orec(), val);
+  }
+
+  /// Validate that an object's orec is usable by the step
+  ///
+  /// @param obj The object that we want to use
+  ///
+  /// @return END_OF_TIME if obj's orec is not usable, else the orec version
+  uint64_t check_orec(typename DESCRIPTOR::ownable_t *obj) {
+    return this->op->exo.check_orec(obj->orec());
+  }
+
+  /// Return the start time of the step
+  uint64_t get_start_time() { return this->op->exo.get_start_time(); }
+};
+
+/// RO is an RAII object for managing read-only steps
+template <class DESCRIPTOR> struct RStep : Step<DESCRIPTOR> {
+  /// Construct to start a read-only step
+  ///
+  /// @param me The thread descriptor
+  RStep(DESCRIPTOR *me) : Step<DESCRIPTOR>(me) { this->op->exo.ro_begin(); }
+
+  /// Destruct the object to end the reading step
+  ~RStep() { this->op->exo.ro_end(); }
+};
+
+/// WO is an RAII object for managing writing steps
+template <class DESCRIPTOR> struct WStep : Step<DESCRIPTOR> {
+  /// Construct to start a writing step
+  ///
+  /// @param me The thread descriptor
+  WStep(DESCRIPTOR *me) : Step<DESCRIPTOR>(me) { this->op->exo.wo_begin(); }
+
+  /// Destruct the object to end the writing step
+  ~WStep() { this->op->exo.wo_end(); }
+
+  /// Acquire obj's orec, but only if its orec matches val
+  ///
+  /// @param obj The object whose orec we want to acquire
+  /// @param val The value that object's orec must have
+  ///
+  /// @return True if the object's orec is successfully acquired
+  bool acquire_continuation(typename DESCRIPTOR::ownable_t *obj, uint64_t val) {
+    return this->op->exo.acquire_continuation(obj->orec(), val);
+  }
+
+  /// Acquire obj's orec, but only if it is consistent with the start time of
+  /// this step.
+  ///
+  /// @param obj The object whose orec we want to acquire
+  ///
+  /// @return True if the orec was acquired, false otherwise
+  bool acquire_consistent(typename DESCRIPTOR::ownable_t *obj) {
+    return this->op->exo.acquire_consistent(obj->orec());
+  }
+
+  /// Acquire obj's orec, even if its orec would be inconsistent with the
+  /// step
+  ///
+  /// @param obj The object whose orec we want to acquire
+  ///
+  /// @return True if object's orec is successfully acquired
+  bool acquire_aggressive(typename DESCRIPTOR::ownable_t *obj) {
+    return this->op->exo.acquire_aggressive(obj->orec());
+  }
+
+  /// Unwind the step, so that it can be restarted
+  void unwind() { this->op->exo.unwind(); }
+
+  /// Schedule an object for reclamation.  This should only be called from
+  /// writing steps that won't unwind.
+  ///
+  /// NB: The programmer can only reclaim from WSTEPs, not from RSTEPs.
+  ///
+  /// @param obj The object to reclaim
+  void reclaim(typename DESCRIPTOR::ownable_t *obj) {
+    this->op->smr.reclaim(obj);
+  }
+};
diff --git a/artifact/policies/STMCAS/stmcas.h b/artifact/policies/STMCAS/stmcas.h
new file mode 100644
index 0000000..c5cc4f3
--- /dev/null
+++ b/artifact/policies/STMCAS/stmcas.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "include/base.h"
+#include "include/field.h"
+#include "include/raii.h"
+
+/// stmcas_t implements the STMCAS policy on top of exoTM.  All of the
+/// functionality is in the included files.  This class's only job is to put all
+/// the pieces together in a single object, with appropriate language-level
+/// protection.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct stmcas_t : public base_t<OP> {
+  using STEP = Step<stmcas_t>;   // RAII RSTEP/WSTEP base
+  using RSTEP = RStep<stmcas_t>; // RAII RSTEP manager
+  using WSTEP = WStep<stmcas_t>; // RAII WSTEP manager
+
+  /// Construct an stmcas_t
+  stmcas_t() : base_t<OP>() {}
+
+  /// The type for fields that are shared and protected by STMCAS
+  template <typename T> struct sField : public stmcas_field_t<T, stmcas_t> {
+    /// Construct an sField
+    ///
+    /// @param val The initial value
+    explicit sField(T val) : stmcas_field_t<T, stmcas_t>(val) {}
+
+    /// Default-construct an sField
+    explicit sField() : stmcas_field_t<T, stmcas_t>() {}
+  };
+
+private:
+  friend STEP;
+  friend WSTEP;
+  friend RSTEP;
+  friend class ccds_t;
+};
diff --git a/artifact/policies/baseline/thread.h b/artifact/policies/baseline/thread.h
new file mode 100644
index 0000000..1f20317
--- /dev/null
+++ b/artifact/policies/baseline/thread.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+#include "../include/hash.h"
+#include "../include/rdtsc_rand.h"
+#include "../include/timestamp_smr.h"
+
+/// thread_t provides the union of all per-thread functionality required by the
+/// locking and lock-free algorithms in our baseline:
+/// - A per-thread pseudorandom number generator
+/// - A good hash function
+/// - A per-thread context for a safe memory reclamation algorithm
+///
+/// Note that our primary goals are (1) to normalize as much as possible among
+/// implementations, and (2) to normalize as much as possible in the benchmark
+/// code.  Thus while some baseline algorithms do not require some (or any) of
+/// the above features, we provide them in this common object anyway.
+__thread int tid;
+class thread_t {
+
+  /// global_t tracks all state shared among threads
+  struct global_t {
+    timestamp_smr_t::global_t smr; // Globals used for safe memory reclamation
+  };
+
+  static global_t _globals; /// All of the globals for stmcas_po
+  timestamp_smr_t smr;      // The safe memory reclamation context
+  rdtsc_rand_t rng;         // A random number generator
+
+public:
+  int tid;
+  /// ownable_t is how STMCAS_PO maps locations to orecs.  For now, all fields
+  /// of an object will map to a single orec. All objects that are synchronized
+  /// by STMCAS will inherit from ownable_t.  When ownable_t constructs, it will
+  /// embed an orec in the object.
+  ///
+  /// Note that ownable_t is a reclaimable_t, and therefore it can be used with
+  /// our safe memory reclamation
+  struct reclaimable_t : timestamp_smr_t::reclaimable_t {
+    /// Construct an object that can be reclaimed by timestamp_smr_t
+    reclaimable_t() {}
+
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~reclaimable_t() {}
+  };
+
+  /// Construct a thread_t
+  thread_t(int _tid = 0) : smr(_globals.smr), tid(_tid) {}
+  /// Start an operation (notify SMR)
+  void op_begin() { smr.enter(); }
+
+  /// End an operation (notify SMR)
+  void op_end() { smr.exit(_globals.smr); }
+
+  /// A good hash function.  You should use std::hash to produce a hashed val,
+  /// and then this will run a good hash on the result.
+  ///
+  /// @param hashed_val A size_t produced by std::hash
+  ///
+  /// @return A 64-bit hash value
+  uint64_t hash(size_t hashed_val) { return mix13_hash(hashed_val); }
+
+  /// Produce a random number from a thread-local generator
+  ///
+  /// NB: This is a convenience for things like skiplists, where we need
+  ///     per-thread PRNGs
+  int rand() { return rng.rand(); }
+
+  /// Schedule an object for reclamation
+  ///
+  /// @param obj The object to reclaim
+  void reclaim(timestamp_smr_t::reclaimable_t *obj) { smr.reclaim(obj); }
+};
+
+/// THREAD_T_GLOBALS_INITIALIZER should be called once, in the main C++ file
+/// of a program.  It defines the globals used by THREAD_T, so that we can be
+/// sure that any globals declared in this file are defined in a .o file.
+/// Failure to use this correctly will lead to link errors.
+#define THREAD_T_GLOBALS_INITIALIZER                                           \
+  typename thread_t::global_t thread_t::_globals;
diff --git a/artifact/policies/exoTM/exotm.h b/artifact/policies/exoTM/exotm.h
new file mode 100644
index 0000000..4ae5927
--- /dev/null
+++ b/artifact/policies/exoTM/exotm.h
@@ -0,0 +1,302 @@
+#pragma once
+
+#include <atomic>
+#include <climits>
+#include <x86intrin.h>
+
+#include "../include/minivector.h"
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+/// exotm_t encapsulates all of the state and functionality needed by a thread
+/// that uses the exoTM transactional mechanisms. This includes per-thread state
+/// and the global clock.
+///
+/// The exotm_t class can be thought of as a "reusable descriptor".  This is
+/// important, because it means we can tolerate a small amount of dynamic memory
+/// allocation within its implementation.
+///
+/// exoTM is just a mechanism.  It supports reading orecs and safely writing
+/// them.  It does not deal with program data, validation, safe memory
+/// reclamation, etc.  Policies built on top of exoTM need to handle all of
+/// that.
+///
+/// exotm_t uses rdtsc for its global clock.  This introduces some subtle
+/// ordering requirements at begin time.  It also decreases the benefit of
+/// check-twice orec protocols.
+///
+/// exotm_t does not specify where its orecs live.  A policy may choose to
+/// maintain a table of orecs, or to place orecs in objects.  It should be
+/// possible for programmers to associate multiple orecs with different parts of
+/// an object.
+///
+/// TODO: Should we create an exoTM variant that uses a GV1 clock?
+class exotm_t {
+  static const uint64_t LOCK_BIT = 1ULL << 63; // MSB is the lock bit for orecs
+
+public:
+  /// A special value that is larger than any value that rdtsc will return, and
+  /// that won't be mistaken for a pointer.
+  static const uint64_t END_OF_TIME = ULLONG_MAX;
+
+  /// For communicating how a policy wants orecs released during an unwind
+  enum UNWIND_TYPES { ROLLBACK_ORECS, BUMP_ORECS };
+
+  /// The ownership record type
+  ///
+  /// NB: In order for exoTM to protect its mechanisms while still letting
+  ///     policies embed orecs in objects, orec_t is public, its constructor is
+  ///     public, and its fields are private.
+  class orec_t {
+    friend exotm_t;
+
+    std::atomic<uintptr_t> curr; // The current value of the orec
+    uintptr_t prev;              // Prior version of `curr`, for easy rollback
+  public:
+    /// Default construct an orec as unheld with version 0
+    orec_t() : curr(0), prev(0) {}
+  };
+
+private:
+  std::atomic<uint64_t> start_time; // This operation's start time, or EOT
+  minivector<orec_t *> locks;       // All orecs held by the current transaction
+  const uint64_t my_lock;           // This thread's unique lock word
+  uint64_t last_wo_end_time = 0;    // Time of last wo_end
+  bool unwound = false;             // Are we between unwind() and wo_end()?
+
+public:
+  /// Construct a thread's exoTM context
+  exotm_t()
+      : start_time(END_OF_TIME),
+        my_lock(LOCK_BIT | reinterpret_cast<uintptr_t>(this)) {}
+
+  /// Start using exoTM to read orecs
+  void ro_begin() {
+    // Read the hardware clock, with sufficient (platform-defined) fencing to
+    // ensure that all orecs will be read *after* this clock read.
+    //
+    // TODO:  Investigate coupling this clock with the SMR's clock
+    uint64_t time = get_time_relaxed();
+    start_time.exchange(time);
+  }
+
+  /// Stop using exoTM to read orecs
+  void ro_end() {
+    // TODO:  Investigate if this store could be skipped, to save a fence
+    start_time = END_OF_TIME;
+  }
+
+  /// Ensure that `orec`'s time is <= `this.start_time` and `orec` isn't locked.
+  ///
+  /// @param orec The orec to check
+  ///
+  /// @return If the orec is too new or locked by someone other than the caller,
+  ///         then END_OF_TIME.  Otherwise, the observed value of the orec will
+  ///         be returned.
+  uint64_t check_orec(const orec_t *orec) {
+    // NB: this is a seqlock read acquire... can't be relaxed
+    auto res = orec->curr.load(std::memory_order_acquire);
+    return (res <= start_time || res == my_lock) ? res : END_OF_TIME;
+  }
+
+  /// Ensure that `orec`'s value is still `val`
+  ///
+  /// @param orec The orec to check
+  /// @param val  The expected value of the orec
+  ///
+  /// @return true if the orec value equals val, false otherwise
+  static bool check_continuation(const orec_t *orec, uint64_t val) {
+    // NB: this is a seqlock read acquire... can't be relaxed
+    return orec->curr.load(std::memory_order_acquire) <= val;
+  }
+
+  /// A specialization of `check_orec` that also returns the lock state
+  ///
+  /// @param orec   The orec to check
+  /// @param locked A ref param to indicate if the location is locked
+  ///
+  /// @return If the orec is too new or locked by someone other than the caller,
+  ///         then END_OF_TIME.  Otherwise, the observed value of the orec will
+  ///         be returned.
+  uint64_t check_orec(const orec_t *orec, bool &locked) {
+    // NB: this is a seqlock read acquire... can't be relaxed
+    auto res = orec->curr.load(std::memory_order_acquire);
+    locked = res & LOCK_BIT;
+    return (res <= start_time || res == my_lock) ? res : END_OF_TIME;
+  }
+
+  /// A specialization of `check_continuation` that also returns if the caller
+  /// owns the orec.
+  ///
+  /// @param orec The orec to check
+  /// @param val  The expected value of the orec
+  /// @param mine A ref param to report if the caller is the owner
+  ///
+  /// @return true if the orec value equals val, false otherwise
+  bool check_continuation(const orec_t *orec, uint64_t val, bool &mine) {
+    // NB: this is a seqlock read acquire... can't be relaxed
+    auto res = orec->curr.load(std::memory_order_acquire);
+    mine = res == my_lock;
+    return res <= val;
+  }
+
+  /// Start using exoTM to read and write orecs
+  void wo_begin() {
+    // Read the hardware clock, just like in ro_begin()
+    uint64_t time = get_time_relaxed();
+    start_time.exchange(time);
+    // Mark that we're not unwinding
+    //
+    // NB: we set it here so hopefully the compiler can propagate it
+    unwound = false;
+  }
+
+  /// Acquire an orec, only if its version is consistent with `this.start_time`
+  ///
+  /// @param orec The orec to acquire
+  ///
+  /// @return true if the orec was acquired, false otherwise
+  bool acquire_consistent(orec_t *orec) {
+    // Relaxed load is OK: we're going to CAS it
+    auto val = orec->curr.load(std::memory_order_relaxed);
+    if (val == my_lock)
+      return true;
+    if (unlikely(val > start_time)) // NB: subsumes the LOCK_BIT check
+      return false;
+    if (unlikely(!orec->curr.compare_exchange_strong(val, my_lock)))
+      return false;
+    orec->prev = val;
+    locks.push_back(orec);
+    return true;
+  }
+
+  /// A specialization of `acquire_consistent` that also reports if the orec was
+  /// acquired before this call was made.
+  ///
+  /// @param orec   The orec to acquire
+  /// @param locked A ref param to indicate if the location is locked
+  ///
+  /// @return true if the orec was acquired, false otherwise
+  bool acquire_consistent(orec_t *orec, bool &locked) {
+    // Relaxed load is OK: we're going to CAS it
+    auto val = orec->curr.load(std::memory_order_relaxed);
+    if (unlikely(val == my_lock)) {
+      locked = true;
+      return true;
+    }
+    if (unlikely(val > start_time)) {
+      locked = val & LOCK_BIT;
+      return false;
+    }
+    if (unlikely(!orec->curr.compare_exchange_strong(val, my_lock)))
+      return false;
+    orec->prev = val;
+    locks.push_back(orec);
+    return true;
+  }
+
+  /// Acquire an orec, but only if its value is still `val`
+  ///
+  /// @param orec The orec to acquire
+  /// @param val  The expected value of the orec
+  ///
+  /// @return true if the orec was acquired, false otherwise
+  bool acquire_continuation(orec_t *orec, uint64_t val) {
+    // Relaxed load is OK: we're going to CAS it
+    auto orec_val = orec->curr.load(std::memory_order_relaxed);
+    if (unlikely(orec_val > val))
+      return orec_val == my_lock;
+    if (unlikely(!orec->curr.compare_exchange_strong(orec_val, my_lock)))
+      return false;
+    orec->prev = orec_val;
+    locks.push_back(orec);
+    return true;
+  }
+
+  /// Report if the current operation has acquired any orecs
+  bool has_orecs() { return !locks.empty(); }
+
+  /// Try to acquire an orec, without comparing its value to `this.start_time`.
+  /// Return false on failure.
+  ///
+  /// @param orec The orec to acquire
+  ///
+  /// @return true if the orec was acquired, false otherwise
+  bool acquire_aggressive(orec_t *orec) {
+    // Relaxed load is OK: we're going to CAS it
+    auto val = orec->curr.load(std::memory_order_relaxed);
+    if (unlikely(val & LOCK_BIT)) // if it's locked, it had better be mine!
+      return (val == my_lock);
+    if (likely(orec->curr.compare_exchange_strong(val, my_lock))) {
+      orec->prev = val;
+      locks.push_back(orec);
+      return true;
+    }
+    return false;
+  }
+
+  /// Stop using exoTM to write orecs, by advancing orec values to a new time
+  ///
+  /// NB: It is safe to call this after calling unwind.  That's nice for RAII
+  ///     interfaces to exoTM.
+  void wo_end() {
+    // If we've unwound, just exit
+    if (unwound) {
+      unwound = false;
+      return;
+    }
+
+    // Read the clock.  We need an mfence before the rdtsc, so the write can't
+    // be relaxed. (https://www.felixcloutier.com/x86/rdtscp)
+    start_time = END_OF_TIME; // mfence
+    last_wo_end_time = get_time_relaxed();
+
+    // NB: There's an (essential) data dependence from the clock read to the
+    //     lock release
+    for (auto o : locks)
+      o->curr.store(last_wo_end_time, std::memory_order_relaxed);
+    locks.clear();
+  }
+
+  /// Undo writes to orecs.  Calling this will effectively transform wo_end into
+  /// a no-op.
+  ///
+  /// @param how An enum indicating if orecs need to be bumped up (i.e., check
+  ///            twice orecs with undo logging) or if they can be reset to their
+  ///            old value (commit-time writeback).  There is no option for
+  ///            "silent store" (undo + check-once orecs); if you need that, you
+  ///            might as well commit.
+  void unwind(UNWIND_TYPES how = ROLLBACK_ORECS) {
+    unwound = true;
+    start_time = END_OF_TIME; // mfence, so releases can be relaxed
+    if (how == ROLLBACK_ORECS) {
+      for (auto o : locks)
+        o->curr.store(o->prev, std::memory_order_relaxed);
+    } else {
+      // NB: since we're using rdtsc, o->prev+1 won't exceed the clock
+      for (auto o : locks)
+        o->curr.store(o->prev + 1, std::memory_order_relaxed);
+    }
+    locks.clear();
+  }
+
+  /// Report the value returned by ro_begin() or wo_begin()
+  uint64_t get_start_time() { return start_time; }
+
+  /// Report the time of the last wo_end()
+  uint64_t get_last_wo_end_time() { return last_wo_end_time; }
+
+private:
+  /// Use rdtscp to get the hardware clock cycle count with strong read ordering
+  ///
+  /// This is currently unused, because rdtsc suffices.
+  static uintptr_t get_time_strong() {
+    unsigned int dummy;
+    // NB: there's a fence "before" this, so it happens after preceding reads
+    return __rdtscp(&dummy);
+  }
+
+  /// Use rdtsc to get the hardware clock cycle count without ordering
+  static uintptr_t get_time_relaxed() { return __rdtsc(); }
+};
diff --git a/artifact/policies/handSTM/eager_c1.h b/artifact/policies/handSTM/eager_c1.h
new file mode 100644
index 0000000..26b7089
--- /dev/null
+++ b/artifact/policies/handSTM/eager_c1.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "include/field.h"
+#include "include/raii.h"
+#include "include/undo_base.h"
+
+/// eager_c1_t is an HandSTM policy with the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Encounter-time locking with undo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct eager_c1_t : public undo_base_t<OP, true> {
+  using STM = Stm<eager_c1_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<eager_c1_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<eager_c1_t>; // RAII WOSTM manager
+
+  /// Construct an eager_c1_t
+  eager_c1_t() : undo_base_t<OP, true>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename undo_base_t<OP, true>::ownable_t;
+  using UNDO_T = undolog_t::undo_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HandSTM
+  template <typename T> struct xField : public eager_c1_field<T, eager_c1_t> {
+    /// Construct an xField
+    ///
+    /// @param val The initial value
+    explicit xField(T val) : eager_c1_field<T, eager_c1_t>(val) {}
+
+    /// Default construct an xField
+    explicit xField() : eager_c1_field<T, eager_c1_t>() {}
+  };
+
+private:
+  // These friends ensure that the rest of the API can access the parts of this
+  // that they need
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class eager_field_t;
+  template <typename T, typename DESCRIPTOR> friend class eager_c1_field;
+};
diff --git a/artifact/policies/handSTM/eager_c2.h b/artifact/policies/handSTM/eager_c2.h
new file mode 100644
index 0000000..628529b
--- /dev/null
+++ b/artifact/policies/handSTM/eager_c2.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "include/field.h"
+#include "include/raii.h"
+#include "include/undo_base.h"
+
+/// eager_c2_t is an HandSTM policy with the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-twice orecs
+/// - Encounter-time locking with undo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct eager_c2_t : public undo_base_t<OP, false> {
+  using STM = Stm<eager_c2_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<eager_c2_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<eager_c2_t>; // RAII WOSTM manager
+
+  /// Construct an eager_c2_t
+  eager_c2_t() : undo_base_t<OP, false>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename undo_base_t<OP, false>::ownable_t;
+  using UNDO_T = undolog_t::undo_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HandSTM
+  template <typename T> struct xField : public eager_c2_field<T, eager_c2_t> {
+    /// Construct an xField
+    ///
+    /// @param val The initial value
+    explicit xField(T val) : eager_c2_field<T, eager_c2_t>(val) {}
+
+    /// Default construct an xField
+    explicit xField() : eager_c2_field<T, eager_c2_t>() {}
+  };
+
+private:
+  // These friends ensure that the rest of the API can access the parts of this
+  // that they need
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class eager_field_t;
+  template <typename T, typename DESCRIPTOR> friend class eager_c2_field;
+};
diff --git a/artifact/policies/handSTM/include/field.h b/artifact/policies/handSTM/include/field.h
new file mode 100644
index 0000000..53d77f8
--- /dev/null
+++ b/artifact/policies/handSTM/include/field.h
@@ -0,0 +1,651 @@
+#pragma once
+
+#include <cstdint>
+
+/// field_base_t has the code that is shared among all of our HandSTM policies'
+/// field implementations
+///
+/// @tparam T          The type that is tored in this field_base_t
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR> class field_base_t {
+protected:
+  T _val; // The value.  It's going to be cast to atomic by the undolog.
+
+  /// Construct a field_base_t
+  ///
+  /// @param val The initial value
+  explicit field_base_t(T val) : _val(val) {}
+
+  /// Default-construct a field_base_t
+  explicit field_base_t() : _val() {}
+
+  /// OP is a workaround so that all of the different field types do not need to
+  /// be friends of the RAII objects
+  DESCRIPTOR &OP(typename DESCRIPTOR::STM &tx) { return *tx.op; }
+
+public:
+  /// Write to shared memory (captured / not-actually-shared memory)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set_cap(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+               T val) {
+    _val = val;
+  }
+};
+
+/// eager_field_t extends field_base_t with code that is shared among the eager
+/// HandSTM policies' field implementations
+///
+/// @tparam T          The type that is stored in this eager_c1_field @tparam
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class eager_field_t : public field_base_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+protected:
+  /// Construct an eager_c1_field
+  ///
+  /// @param val The initial value
+  explicit eager_field_t(T val) : field_base_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct an eager_c1_field
+  explicit eager_field_t() : field_base_t<T, DESCRIPTOR>() {}
+
+public:
+  /// Read from shared memory (middle read in a sequence of reads of `o` by
+  /// `tx`, without any control flow / computed addresses)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get_in_seq(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    return op(tx).undolog.safe_read(&this->_val);
+  }
+
+  /// Read from shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    return op(tx).undolog.safe_read(&this->_val);
+  }
+
+  /// Write to shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+           T val) {
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      bool locked = false;
+      if (op(tx).exo.acquire_consistent(o->orec(), locked)) {
+        typename DESCRIPTOR::UNDO_T u;
+        u.initFromAddr(&this->_val);
+        op(tx).undolog.push_back(u);
+        op(tx).undolog.safe_write(&this->_val, val);
+        return;
+      }
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Write to shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+                T val) {
+    typename DESCRIPTOR::UNDO_T u;
+    u.initFromAddr(&this->_val);
+    op(tx).undolog.push_back(u);
+    op(tx).undolog.safe_write(&this->_val, val);
+  }
+};
+
+/// eager_c1_field is a wrapper around simple types so that they can only be
+/// accessed via HandSTM.
+///
+/// @tparam T          The type that is stored in this eager_c1_field
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class eager_c1_field : public eager_field_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct an eager_c1_field
+  ///
+  /// @param val The initial value
+  explicit eager_c1_field(T val) : eager_field_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct an eager_c1_field
+  explicit eager_c1_field() : eager_field_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    while (true) {
+      // read the location, then orec
+      T from_mem = op(tx).undolog.safe_read(&this->_val);
+      bool locked = false;
+      auto post = op(tx).exo.check_orec(o->orec(), locked);
+      // If validation passes, then we can log it and return
+      if (post != DESCRIPTOR::EOT) {
+        if (!locked)
+          op(tx).readset.push_back(o->orec());
+        return from_mem;
+      }
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // read the location, then orec
+    T from_mem = op(tx).undolog.safe_read(&this->_val);
+    // If validation fails, abort, else return the value without logging
+    if (op(tx).exo.check_orec(o->orec()) == DESCRIPTOR::EOT)
+      op(tx).abort();
+    return from_mem;
+  }
+};
+
+/// eager_c2_field is a wrapper around simple types so that they can only be
+/// accessed via HandSTM.
+///
+/// @tparam T          The type that is stored in this eager_c1_field
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class eager_c2_field : public eager_field_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct an eager_c2_field
+  ///
+  /// @param val The initial value
+  explicit eager_c2_field(T val) : eager_field_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct an eager_c2_field
+  explicit eager_c2_field() : eager_field_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    while (true) {
+      // Pre-check the orec, and record if it's locked
+      bool locked = false;
+      auto pre = op(tx).exo.check_orec(o->orec(), locked);
+      // read the location, then orec
+      T from_mem = op(tx).undolog.safe_read(&this->_val);
+      // return if location is owned by me
+      if (locked && pre != DESCRIPTOR::EOT)
+        return from_mem; // owned by me: don't need another check
+      auto post = op(tx).exo.check_orec(o->orec());
+      // If validation passes, then we can log it and return
+      if (pre == post && pre != DESCRIPTOR::EOT) {
+        op(tx).readset.push_back(o->orec());
+        return from_mem;
+      }
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Pre-check the orec, and record if it's locked
+    bool locked = false;
+    auto pre = op(tx).exo.check_orec(o->orec(), locked);
+    // read the location, then orec
+    T from_mem = op(tx).undolog.safe_read(&this->_val);
+    // return if location is owned by me
+    if (locked && pre != DESCRIPTOR::EOT)
+      return from_mem; // owned by me: don't need another check
+    auto post = op(tx).exo.check_orec(o->orec());
+    // If validation fails, then even on a transient pre/post issue, orec
+    // bumps are going to cause us to abort in validation, so just abort now
+    if (pre != post || pre == DESCRIPTOR::EOT)
+      op(tx).abort();
+    // Validation succeeded, so return the value without logging
+    return from_mem;
+  }
+};
+
+/// lazy_field is a wrapper around simple types so that they can only be
+/// accessed via HandSTM.
+///
+/// @tparam T          The type that is stored in this eager_c1_field
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class lazy_field : public field_base_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct an lazy_field
+  ///
+  /// @param val The initial value
+  explicit lazy_field(T val) : field_base_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct an lazy_field
+  explicit lazy_field() : field_base_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Lookup in redo log
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // Start a loop to read a consistent value
+    while (true) {
+      // read the location, then orec
+      ret = op(tx).redolog.safe_read(&this->_val);
+
+      // If validation passes, then we can log it and return it
+      bool locked = false;
+      if (op(tx).exo.check_orec(o->orec(), locked) != DESCRIPTOR::EOT) {
+        op(tx).readset.push_back(o->orec());
+        return ret;
+      }
+
+      // wait if locked
+      while (locked)
+        op(tx).exo.check_orec(o->orec(), locked);
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Check log first
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // read the location, then orec.  If validation fails, we must abort.
+    ret = op(tx).redolog.safe_read(&this->_val);
+    if (op(tx).exo.check_orec(o->orec()) == DESCRIPTOR::EOT)
+      op(tx).abort();
+    return ret;
+  }
+
+  /// Read from shared memory (middle read in a sequence of reads of `o` by
+  /// `tx`, without any control flow / computed addresses)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get_in_seq(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Check log first
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // read the location, then orec.  If validation fails, we must abort.
+    return op(tx).redolog.safe_read(&this->_val);
+  }
+
+  /// Read from shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // NB: in lazy, we *never* own the orec, so there's no optimization here.
+    return get(tx, o);
+  }
+
+  /// Write to shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+           T val) {
+    op(tx).lockset.push_back(o->orec());
+    op(tx).redolog.insert(&this->_val, val);
+  }
+
+  /// Write to shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+                T val) {
+    op(tx).redolog.insert(&this->_val, val);
+  }
+};
+
+/// wb_field_t is a wrapper around simple types so that they can only be
+/// accessed via HandSTM.
+///
+/// @tparam T          The type that is stored in this eager_c1_field
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class wb_field_t : public field_base_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct a wb_field_t
+  ///
+  /// @param val The initial value
+  explicit wb_field_t(T val) : field_base_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct a wb_field_t
+  explicit wb_field_t() : field_base_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (middle read in a sequence of reads of `o` by
+  /// `tx`, without any control flow / computed addresses)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get_in_seq(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+    return op(tx).redolog.safe_read(&this->_val);
+  }
+
+  /// Read from shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+    return op(tx).redolog.safe_read(&this->_val);
+  }
+
+  /// Write to shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+           T val) {
+    // Put it in the redo log right away
+    op(tx).redolog.insert(&this->_val, val);
+
+    // Now either consistently get the lock, or else abort
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      bool locked = false;
+      if (op(tx).exo.acquire_consistent(o->orec(), locked))
+        return;
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Write to shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void set_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+                T val) {
+    op(tx).redolog.insert(&this->_val, val);
+  }
+};
+
+/// wb_c1_field is a wrapper around simple types so that they can only be
+/// accessed via HandSTM.
+///
+/// @tparam T          The type that is stored in this eager_c1_field
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class wb_c1_field : public wb_field_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct a wb_c1_field
+  ///
+  /// @param val The initial value
+  explicit wb_c1_field(T val) : wb_field_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct a wb_c1_field
+  explicit wb_c1_field() : wb_field_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Since this is check-once orecs, we can't really use the lock state to
+    // avoid redo log checks, so let's just check right away:
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // Start a loop to read a consistent value
+    while (true) {
+      // read the location, then orec
+      ret = op(tx).redolog.safe_read(&this->_val);
+
+      // If validation passes, then (maybe) log it and return it
+      bool locked = false;
+      if (op(tx).exo.check_orec(o->orec(), locked) != DESCRIPTOR::EOT) {
+        // Don't log the read if `tx` owns it
+        if (!locked)
+          op(tx).readset.push_back(o->orec());
+        return ret;
+      }
+
+      // abort if locked
+      while (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Check log first
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // read the location, then orec.  If validation fails, we must abort.
+    ret = op(tx).redolog.safe_read(&this->_val);
+    if (op(tx).exo.check_orec(o->orec()) == DESCRIPTOR::EOT)
+      op(tx).abort();
+    return ret;
+  }
+};
+
+/// wb_c2_field is a wrapper around simple types so that they can only be
+/// accessed via HandSTM.
+///
+/// @tparam T          The type that is stored in this eager_c1_field
+/// @tparam DESCRIPTOR The type of HandSTM policy using this field
+template <typename T, typename DESCRIPTOR>
+class wb_c2_field : public wb_field_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct a wb_c2_field
+  ///
+  /// @param val The initial value
+  explicit wb_c2_field(T val) : wb_field_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct a wb_c2_field
+  explicit wb_c2_field() : wb_field_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    while (true) {
+      T ret;
+      // Pre-check the orec, and record if it's locked
+      bool locked = false;
+      auto pre = op(tx).exo.check_orec(o->orec(), locked);
+      // If tx owns `o`, try the redo log, else return directly from memory
+      // without logging or double-checking the orec
+      if (pre != DESCRIPTOR::EOT && locked) {
+        if (op(tx).redolog.get(&this->_val, ret))
+          return ret;
+        else
+          return op(tx).redolog.safe_read(&this->_val);
+      }
+      ret = op(tx).redolog.safe_read(&this->_val);
+      auto post = op(tx).exo.check_orec(o->orec());
+      // If validation passes, then we can log it and return
+      if (pre == post && pre != DESCRIPTOR::EOT) {
+        op(tx).readset.push_back(o->orec());
+        return ret;
+      }
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_get(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    T ret;
+    // Pre-check the orec, and record if it's locked
+    bool locked = false;
+    auto pre = op(tx).exo.check_orec(o->orec(), locked);
+    // If tx owns `o`, try the redo log, else return directly from memory
+    // without logging or double-checking the orec
+    if (pre != DESCRIPTOR::EOT && locked) {
+      if (op(tx).redolog.get(&this->_val, ret))
+        return ret;
+      else
+        return op(tx).redolog.safe_read(&this->_val);
+    }
+    ret = op(tx).redolog.safe_read(&this->_val);
+    auto post = op(tx).exo.check_orec(o->orec());
+    // If validation fails, we should just abort, because it's unlikely that
+    // whoever owns it will abort and reset the orec to the value we logged in
+    // a prior get().
+    if (pre != post || pre == DESCRIPTOR::EOT)
+      op(tx).abort();
+    // Validation succeeded, so return the value without logging
+    return ret;
+  }
+};
\ No newline at end of file
diff --git a/artifact/policies/handSTM/include/raii.h b/artifact/policies/handSTM/include/raii.h
new file mode 100644
index 0000000..bec99e8
--- /dev/null
+++ b/artifact/policies/handSTM/include/raii.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <cstdint>
+#include <setjmp.h>
+
+/// STM is the base for the ROSTM and WOSTM RAII objects, which delineate
+/// HandSTM transactions
+template <class DESCRIPTOR> struct Stm {
+  // Fields need to be friends, so they can access `op`
+  template <typename T, typename D> friend class field_base_t;
+
+protected:
+  DESCRIPTOR *op; // The thread descriptor for this operation
+
+  /// Construct by recording the descriptor and jump buffer
+  ///
+  /// @param me The thread descriptor
+  /// @param jb The register checkpoint (jump buffer)
+  Stm(DESCRIPTOR *me, jmp_buf *jb) : op(me) { me->checkpoint = jb; }
+};
+
+/// ROSTM is an RAII object for read-only transactions
+template <class DESCRIPTOR> struct RoStm : Stm<DESCRIPTOR> {
+  /// Construct to start a transaction
+  ///
+  /// @param me The thread descriptor
+  /// @param jb The register checkpoint (jump buffer)
+  RoStm(DESCRIPTOR *me, jmp_buf *jb) : Stm<DESCRIPTOR>(me, jb) {
+    this->op->exo.ro_begin();
+  }
+
+  /// Destruct the object to commit the transaction
+  ~RoStm() {
+    this->op->exo.ro_end();
+    this->op->readset.clear();
+  }
+};
+
+/// WOSTM is an RAII object for writing transactions
+template <class DESCRIPTOR> struct WoStm : Stm<DESCRIPTOR> {
+  /// Construct to start a transaction
+  ///
+  /// @param me The thread descriptor
+  /// @param jb The register checkpoint (jump buffer)
+  WoStm(DESCRIPTOR *me, jmp_buf *jb) : Stm<DESCRIPTOR>(me, jb) {
+    this->op->exo.wo_begin();
+  }
+
+  /// Destruct the object to commit the transaction
+  ~WoStm() { this->op->commit(); }
+
+  /// Whenever a node is speculatively allocated, use this to log it
+  ///
+  /// @param node The ownable_t to log
+  ///
+  /// @return `node`, to facilitate chaining
+  template <class T> T *LOG_NEW(T *node) {
+    this->op->mallocs.push_back(node);
+    return node;
+  }
+
+  /// Schedule an object for reclamation if the transaction commits
+  ///
+  /// NB: It might seem odd that the reclamation is only for WOSTM, and that
+  ///     it's tied to the WOSTM object instead of DESCRIPTOR itself.  It works
+  ///     for now.
+  ///
+  /// @param obj The object to reclaim
+  void reclaim(typename DESCRIPTOR::ownable_t *obj) {
+    this->op->frees.push_back(obj);
+  }
+};
+
+/// Start a writing transaction by calling setjmp and then creating a WOSTM
+#define BEGIN_WO(op)                                                           \
+  jmp_buf checkpoint;                                                          \
+  setjmp(checkpoint);                                                          \
+  WOSTM wo(op, &checkpoint)
+
+/// Start a reading transaction by calling setjmp and then creating a ROSTM
+#define BEGIN_RO(op)                                                           \
+  jmp_buf checkpoint;                                                          \
+  setjmp(checkpoint);                                                          \
+  ROSTM ro(op, &checkpoint)
\ No newline at end of file
diff --git a/artifact/policies/handSTM/include/redo_base.h b/artifact/policies/handSTM/include/redo_base.h
new file mode 100644
index 0000000..edffe5c
--- /dev/null
+++ b/artifact/policies/handSTM/include/redo_base.h
@@ -0,0 +1,146 @@
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../exoTM/exotm.h"
+#include "../../include/hash.h"
+#include "../../include/orec_policies.h"
+#include "../../include/rdtsc_rand.h"
+#include "../../include/redolog_nocast.h"
+#include "../../include/timestamp_smr.h"
+
+/// redo_base_t has the common parts for building HandSTM algorithms that use
+/// redo logging.
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Redo log support
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// redo_base_t is a re-usable descriptor.  This means that it can have some
+/// dynamic memory allocation internally.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP> struct redo_base_t {
+  using orec_t = exotm_t::orec_t;                                // Orec type
+  using OrecPolicy = OP<timestamp_smr_t::reclaimable_t, orec_t>; // Orec policy
+  using REDOLOG = redolog_nocast_t<32>;                          // Redo log
+
+  /// ownable_t from OP, but with a zero-argument constructor.
+  struct ownable_t : public OrecPolicy::ownable_t {
+    /// Construct an ownable_t
+    ownable_t() : OrecPolicy::ownable_t(_globals.op) {}
+  };
+
+  /// A packet holding all globals for redo HandSTM policies
+  struct global_t {
+    timestamp_smr_t::global_t smr;    // Globals for safe memory reclamation
+    typename OrecPolicy::global_t op; // Globals for the orec policy
+  };
+
+  static global_t _globals; // lightweight singleton-like access to the globals
+
+  exotm_t exo;                     // The thread's exoTM context
+  timestamp_smr_t smr;             // The safe memory reclamation context
+  rdtsc_rand_t rng;                // A random number generator
+  jmp_buf *checkpoint;             // Register checkpoint, for aborts
+  minivector<orec_t *> readset;    // Orecs to validate
+  minivector<orec_t *> lockset;    // Locks to acquire
+  REDOLOG redolog;                 // A redo log, for replaying writes on commit
+  minivector<ownable_t *> mallocs; // pending allocations
+  minivector<ownable_t *> frees;   // pending reclaims
+
+  /// Construct a redo_base_t
+  redo_base_t() : exo(), smr(_globals.smr) {}
+
+  /// Acquire all un-acquired orecs from the lockset
+  void acquire_all() {
+    for (auto o : lockset)
+      if (!exo.acquire_consistent(o))
+        abort();
+  }
+
+  /// Ensure that all orecs that we've read have timestamps older than the start
+  /// time, unless we locked those orecs. If we locked the orec, we did so when
+  /// the time was smaller than our start time, so we're sure to be OK.
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset)
+      if (exo.check_orec(o) == exotm_t::END_OF_TIME)
+        abort();
+  }
+
+  /// Specialized version of validation for timestamp extension.  Compare
+  /// against old_start, not exo.start_time.
+  void validate(uint64_t old_start) {
+    for (auto o : readset) {
+      bool mine = false;
+      bool ok = exo.check_continuation(o, old_start, mine);
+      if (!ok && !mine)
+        abort();
+    }
+  }
+
+  /// Unwind the transaction
+  void abort() {
+    exo.unwind(exotm_t::ROLLBACK_ORECS); // roll back locks to release them
+
+    // reset all lists.  Note that we can free right away, without SMR.
+    frees.clear();
+    for (auto p : mallocs)
+      free(p);
+    mallocs.clear();
+    readset.clear();
+    redolog.clear();
+    lockset.clear();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Start an operation (notify SMR)
+  void op_begin() { smr.enter(); }
+
+  /// End an operation (notify SMR)
+  void op_end() { smr.exit(_globals.smr); }
+
+  /// A good hash function.  Works nicely to "finalize" after std::hash().
+  ///
+  /// @param val The value to hash
+  ///
+  /// @return A 64-bit hash value
+  uint64_t hash(size_t val) { return mix13_hash(val); }
+
+  /// Produce a random number from a thread-local generator
+  int rand() { return rng.rand(); }
+
+  /// Commit a writing transaction
+  void commit() {
+    // read-only fast-path
+    if (lockset.empty() && !exo.has_orecs()) {
+      exo.ro_end();
+      readset.clear();
+      return;
+    }
+
+    // Acquire locks, if there are any that aren't acquired yet, then validate
+    acquire_all();
+    validate();
+
+    // We're committed, so write-back, release locks, and clean up
+    redolog.writeback();
+    exo.wo_end();
+    mallocs.clear();
+    for (auto a : frees)
+      smr.reclaim(a); // Need SMR here!
+    frees.clear();
+    redolog.clear();
+    lockset.clear();
+    readset.clear();
+  }
+};
+
+/// HANDSTM_GLOBALS_INITIALIZER should be called once, in the main C++ file of a
+/// program.  It defines the globals used by redo HandSTM policies, so that we
+/// can be sure that any globals declared in this file are defined in a .o file.
+/// Failure to use this correctly will lead to link errors.
+#define HANDSTM_GLOBALS_INITIALIZER                                            \
+  template <template <typename, typename> typename T>                          \
+  typename redo_base_t<T>::global_t redo_base_t<T>::_globals
\ No newline at end of file
diff --git a/artifact/policies/handSTM/include/undo_base.h b/artifact/policies/handSTM/include/undo_base.h
new file mode 100644
index 0000000..aaea9b9
--- /dev/null
+++ b/artifact/policies/handSTM/include/undo_base.h
@@ -0,0 +1,146 @@
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../exoTM/exotm.h"
+#include "../../include/hash.h"
+#include "../../include/orec_policies.h"
+#include "../../include/rdtsc_rand.h"
+#include "../../include/timestamp_smr.h"
+#include "../../include/undolog.h"
+
+/// undo_base_t has the common parts for building HandSTM algorithms that use
+/// undo logging.
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Undo log support
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// undo_base_t is a re-usable descriptor.  This means that it can have some
+/// dynamic memory allocation internally.
+///
+/// @tparam OP                     The orec policy to use.
+/// @tparam ABORT_AS_SILENT_STORE  True if the policy needs silent stores on
+///                                abort (get new orec version), false if
+///                                bumping orecs by one is sufficient.
+template <template <typename, typename> typename OP, bool ABORT_AS_SILENT_STORE>
+class undo_base_t {
+  using orec_t = exotm_t::orec_t;                                // Orec type
+  using OrecPolicy = OP<timestamp_smr_t::reclaimable_t, orec_t>; // Orec Policy
+
+public:
+  /// ownable_t from OP, but with a zero-argument constructor.
+  struct ownable_t : public OrecPolicy::ownable_t {
+    /// Construct an ownable_t
+    ownable_t() : OrecPolicy::ownable_t(_globals.op) {}
+  };
+
+protected:
+  /// A packet storing all globals for undo HandSTM policies
+  struct global_t {
+    timestamp_smr_t::global_t smr;    // Globals for safe memory reclamation
+    typename OrecPolicy::global_t op; // Globals for the orec policy
+  };
+
+  static global_t _globals; // lightweight singleton-like access to the globals
+
+  exotm_t exo;                     // The thread's exoTM context
+  timestamp_smr_t smr;             // The safe memory reclamation context
+  rdtsc_rand_t rng;                // A random number generator
+  jmp_buf *checkpoint;             // Register checkpoint, for aborts
+  minivector<orec_t *> readset;    // Orecs to validate
+  undolog_t undolog;               // An undo log, for undoing writes on abort
+  minivector<ownable_t *> mallocs; // pending allocations
+  minivector<ownable_t *> frees;   // pending reclaims
+
+  /// Construct an undo_base_t
+  undo_base_t() : exo(), smr(_globals.smr) {}
+
+  /// Ensure that all orecs that we've read have timestamps older than the start
+  /// time, unless we locked those orecs. If we locked the orec, we did so when
+  /// the time was smaller than our start time, so we're sure to be OK.
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset)
+      if (exo.check_orec(o) == exotm_t::END_OF_TIME)
+        abort();
+  }
+
+  /// Specialized version of validation for timestamp extension.  Compare
+  /// against old_start, not exo.start_time.
+  void validate(uint64_t old_start) {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      bool mine = false;
+      bool ok = exo.check_continuation(o, old_start, mine);
+      if (!ok && !mine)
+        abort();
+    }
+  }
+
+  /// Unwind the transaction
+  void abort() {
+    undolog.undo_writes();
+    if (ABORT_AS_SILENT_STORE)
+      exo.wo_end(); // commit as silent store to release locks
+    else
+      exo.unwind(exotm_t::BUMP_ORECS); // bump locks to release them
+
+    // reset all lists.  Note that we can free right away, without SMR.
+    frees.clear();
+    for (auto p : mallocs)
+      free(p);
+    mallocs.clear();
+    readset.clear();
+    undolog.clear();
+    longjmp(*checkpoint, 1);
+  }
+
+public:
+  /// Start an operation (notify SMR)
+  void op_begin() { smr.enter(); }
+
+  /// End an operation (notify SMR)
+  void op_end() { smr.exit(_globals.smr); }
+
+  /// A good hash function.  Works nicely to "finalize" after std::hash().
+  ///
+  /// @param val The value to hash
+  ///
+  /// @return A 64-bit hash value
+  uint64_t hash(size_t val) { return mix13_hash(val); }
+
+  /// Produce a random number from a thread-local generator
+  int rand() { return rng.rand(); }
+
+protected:
+  /// Commit a writing transaction
+  void commit() {
+    // read-only fast-path
+    if (!exo.has_orecs()) {
+      exo.ro_end();
+      readset.clear();
+      return;
+    }
+
+    // Locks are already acquired, so just validate
+    validate();
+
+    // We're committed, so release locks and clean up
+    exo.wo_end();
+    mallocs.clear();
+    for (auto a : frees)
+      smr.reclaim(a); // Need SMR here!
+    frees.clear();
+    undolog.clear();
+    readset.clear();
+  }
+};
+
+/// HANDSTM_GLOBALS_INITIALIZER should be called once, in the main C++ file of a
+/// program.  It defines the globals used by undo HandSTM policies, so that we
+/// can be sure that any globals declared in this file are defined in a .o file.
+/// Failure to use this correctly will lead to link errors.
+#define HANDSTM_GLOBALS_INITIALIZER                                            \
+  template <template <typename, typename> typename T, bool B>                  \
+  typename undo_base_t<T, B>::global_t undo_base_t<T, B>::_globals
diff --git a/artifact/policies/handSTM/lazy.h b/artifact/policies/handSTM/lazy.h
new file mode 100644
index 0000000..c35013f
--- /dev/null
+++ b/artifact/policies/handSTM/lazy.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "include/field.h"
+#include "include/raii.h"
+#include "include/redo_base.h"
+
+/// lazy_t is an HandSTM policy with the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Commit-time locking with redo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// Note that hardware clocks + lazy --> no benefit to check-twice orecs, so we
+/// only have one lazy version of HandSTM.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct lazy_t : public redo_base_t<OP> {
+  using STM = Stm<lazy_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<lazy_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<lazy_t>; // RAII WOSTM manager
+
+  /// Construct a lazy_t
+  lazy_t() : redo_base_t<OP>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename redo_base_t<OP>::ownable_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HandSTM
+  template <typename T> struct xField : public lazy_field<T, lazy_t> {
+    /// Construct an xField
+    ///
+    /// @param val The initial value
+    explicit xField(T val) : lazy_field<T, lazy_t>(val) {}
+
+    /// Default construct an xField
+    explicit xField() : lazy_field<T, lazy_t>() {}
+  };
+
+private:
+  // These friends ensure that the rest of the API can access the parts of this
+  // that they need
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class lazy_field;
+};
diff --git a/artifact/policies/handSTM/wb_c1.h b/artifact/policies/handSTM/wb_c1.h
new file mode 100644
index 0000000..8abe07e
--- /dev/null
+++ b/artifact/policies/handSTM/wb_c1.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "include/field.h"
+#include "include/raii.h"
+#include "include/redo_base.h"
+
+/// wb_c1_t is an HandSTM policy with the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Encounter-time locking with redo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// Note that check-once orecs don't really let us use orec checks to filter out
+/// and avoid redolog lookups.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct wb_c1_t : public redo_base_t<OP> {
+  using STM = Stm<wb_c1_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<wb_c1_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<wb_c1_t>; // RAII WOSTM manager
+
+  /// Construct an wb_c1_t
+  wb_c1_t() : redo_base_t<OP>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename redo_base_t<OP>::ownable_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HandSTM
+  template <typename T> struct xField : public wb_c1_field<T, wb_c1_t> {
+    /// Construct an xField
+    ///
+    /// @param val The initial value
+    explicit xField(T val) : wb_c1_field<T, wb_c1_t>(val) {}
+
+    /// Default construct an xField
+    explicit xField() : wb_c1_field<T, wb_c1_t>() {}
+  };
+
+private:
+  // These friends ensure that the rest of the API can access the parts of this
+  // that they need
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class wb_field_t;
+  template <typename T, typename DESCRIPTOR> friend class wb_c1_field;
+};
diff --git a/artifact/policies/handSTM/wb_c2.h b/artifact/policies/handSTM/wb_c2.h
new file mode 100644
index 0000000..daf02fc
--- /dev/null
+++ b/artifact/policies/handSTM/wb_c2.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "include/field.h"
+#include "include/raii.h"
+#include "include/redo_base.h"
+
+/// wb_c2_t is an HandSTM policy with the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-twice orecs
+/// - Encounter-time locking with redo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct wb_c2_t : public redo_base_t<OP> {
+  using STM = Stm<wb_c2_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<wb_c2_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<wb_c2_t>; // RAII WOSTM manager
+
+  /// Construct an wb_c2_t
+  wb_c2_t() : redo_base_t<OP>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename redo_base_t<OP>::ownable_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HandSTM
+  template <typename T> struct xField : public wb_c2_field<T, wb_c2_t> {
+    /// Construct an xField
+    ///
+    /// @param val The initial value
+    explicit xField(T val) : wb_c2_field<T, wb_c2_t>(val) {}
+
+    /// Default construct an xField
+    explicit xField() : wb_c2_field<T, wb_c2_t>() {}
+  };
+
+private:
+  // These friends ensure that the rest of the API can access the parts of this
+  // that they need
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class wb_field_t;
+  template <typename T, typename DESCRIPTOR> friend class wb_c2_field;
+};
diff --git a/artifact/policies/hybrid/include/base.h b/artifact/policies/hybrid/include/base.h
new file mode 100644
index 0000000..fa35e9c
--- /dev/null
+++ b/artifact/policies/hybrid/include/base.h
@@ -0,0 +1,179 @@
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../exoTM/exotm.h"
+#include "../../include/hash.h"
+#include "../../include/orec_policies.h"
+#include "../../include/rdtsc_rand.h"
+#include "../../include/redolog_nocast.h"
+#include "../../include/timestamp_smr.h"
+
+/// base_t has the common parts for building hybrid policies that combine
+/// HandSTM with STMCAS:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Redo log support
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// base_t is a re-usable descriptor.  This means that it can have some dynamic
+/// memory allocation internally.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP> class base_t {
+  using orec_t = exotm_t::orec_t;                                // Orec type
+  using OrecPolicy = OP<timestamp_smr_t::reclaimable_t, orec_t>; // Orec policy
+  using REDOLOG = redolog_nocast_t<32>;                          // Redo log
+
+public:
+  /// The maximum value an orec can ever have
+  static const auto END_OF_TIME = exotm_t::END_OF_TIME;
+
+  /// ownable_t from OP, but with a zero-argument constructor
+  struct ownable_t : public OrecPolicy::ownable_t {
+    /// Construct an ownable_t
+    ownable_t() : OrecPolicy::ownable_t(_globals.op) {}
+  };
+
+protected:
+  /// A packet holding all globals for hybrid policies
+  struct global_t {
+    timestamp_smr_t::global_t smr;    // Globals for safe memory reclamation
+    typename OrecPolicy::global_t op; // Globals for the orec policy
+  };
+
+  static global_t _globals; // lightweight singleton-like access to the globals
+
+  exotm_t exo;                     // Per-thread ExoTM metadata
+  timestamp_smr_t smr;             // The safe memory reclamation context
+  rdtsc_rand_t rng;                // A random number generator
+  jmp_buf *checkpoint;             // Register checkpoint, for aborts
+  minivector<orec_t *> readset;    // Orecs to validate
+  minivector<orec_t *> lockset;    // Locks to acquire / locks that are acquired
+  REDOLOG redolog;                 // A redo log, for replaying writes on commit
+  minivector<ownable_t *> mallocs; // pending allocations
+  minivector<ownable_t *> frees;   // pending reclaims
+
+public:
+  /// A copy of snapshot_t from STMCAS
+  struct snapshot_t {
+    ownable_t *_obj; // An object
+    uint64_t _ver;   // A version number associated with `_obj`
+  };
+
+  minivector<snapshot_t> snapshots; // Nodes observed during a ds operation
+
+protected:
+  /// Construct a base_t
+  base_t() : exo(), smr(_globals.smr) {}
+
+  /// acquire_all(), copied from HandSTM::redo_base_t
+  void acquire_all() {
+    for (auto o : lockset)
+      if (!exo.acquire_consistent(o))
+        abort();
+  }
+
+  /// validate(), copied from HandSTM::redo_base_t
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      if (exo.check_orec(o) == exotm_t::END_OF_TIME)
+        abort();
+    }
+  }
+
+  /// validate(uint64_t), copied from HandSTM::redo_base_t
+  void validate(uint64_t old_start) {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      bool mine = false;
+      bool ok = exo.check_continuation(o, old_start, mine);
+      if (!ok && !mine)
+        abort();
+    }
+  }
+
+  /// abort(), copied from HandSTM::redo_base_t
+  void abort() {
+    exo.unwind(exotm_t::ROLLBACK_ORECS); // roll back locks to release them
+
+    // reset all lists.  Note that we can free right away, without SMR, because
+    // writes are in the redo log.
+    frees.clear();
+    for (auto p : mallocs)
+      free(p);
+    mallocs.clear();
+    readset.clear();
+    redolog.clear();
+    lockset.clear();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// commit(), copied from HandSTM::redo_base_t
+  void commit() {
+    // NB: Good use of RSTEP should ensure this never happens, but let's
+    // optimize just in case.
+    if (lockset.empty() && !exo.has_orecs()) {
+      exo.ro_end();
+      readset.clear();
+      return;
+    }
+    // Acquire locks, if there are any that aren't acquired yet, then validate
+    acquire_all();
+    validate();
+
+    // We're committed, so write-back, release locks, and clean up
+    redolog.writeback();
+    exo.wo_end();
+    mallocs.clear();
+    for (auto a : frees)
+      smr.reclaim(a);
+    frees.clear();
+    redolog.clear();
+    lockset.clear();
+    readset.clear();
+  }
+
+  /// In order for an STM step to follow an STMCAS step, we need to be able to
+  /// check orecs and add them to the read set.  This method provides that
+  /// ability.
+  ///
+  /// @param obj The ownable object whose orec needs to match some value
+  /// @param val The value that we must see
+  ///
+  /// @return True if the value matches and the STM can continue, false
+  ///         otherwise
+  bool inheritOrec(ownable_t *obj, uint64_t val) {
+    readset.push_back(obj->orec());
+    return exo.check_continuation(obj->orec(), val);
+  }
+
+public:
+  /// get_last_wo_end_time(), copied from STMCAS::base_t
+  uint64_t get_last_wo_end_time() { return exo.get_last_wo_end_time(); }
+
+  /// Start an operation (notify SMR)
+  void op_begin() { smr.enter(); }
+
+  /// End an operation (notify SMR)
+  void op_end() { smr.exit(_globals.smr); }
+
+  /// A good hash function.  Works nicely to "finalize" after std::hash().
+  ///
+  /// @param val The value to hash
+  ///
+  /// @return A 64-bit hash value
+  uint64_t hash(size_t val) { return mix13_hash(val); }
+
+  /// Produce a random number from a thread-local generator
+  int rand() { return rng.rand(); }
+};
+
+/// HYBRID_GLOBALS_INITIALIZER should be called once, in the main C++ file of a
+/// program.  It defines the globals used by hybrid policies, so that we can be
+/// sure that any globals declared in this file are defined in a .o file.
+/// Failure to use this correctly will lead to link errors.
+#define HYBRID_GLOBALS_INITIALIZER                                             \
+  template <template <typename, typename> typename T>                          \
+  typename base_t<T>::global_t base_t<T>::_globals
diff --git a/artifact/policies/hybrid/include/field.h b/artifact/policies/hybrid/include/field.h
new file mode 100644
index 0000000..f5e08d6
--- /dev/null
+++ b/artifact/policies/hybrid/include/field.h
@@ -0,0 +1,458 @@
+#pragma once
+
+#include <atomic>
+
+/// field_base_t has the code that is shared among all of our HandSTM policies'
+/// field implementations
+///
+/// @tparam T          The type that is tored in this field_base_t
+/// @tparam DESCRIPTOR The HandSTM policy descriptor type
+template <typename T, typename DESCRIPTOR> class field_base_t {
+protected:
+  T _val; // The value.  It's going to be cast to atomic by the undolog.
+
+  /// Construct a field_base_t
+  ///
+  /// @param val The initial value
+  explicit field_base_t(T val) : _val(val) {}
+
+  /// Default-construct a field_base_t
+  explicit field_base_t() : _val() {}
+
+  /// OP is a workaround so that all of the different field types do not need to
+  /// be friends of the RAII objects
+  DESCRIPTOR &OP(typename DESCRIPTOR::STM &tx) { return *tx.op; }
+
+public:
+  /// Read from shared memory (middle read in a sequence of reads of `o` by
+  /// `tx`, without any control flow / computed addresses)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T xGet_in_seq(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Check log first
+    T ret;
+    if (tx.op->redolog.get(&this->_val, ret))
+      return ret;
+
+    // read the location.  It'll get validated later
+    return tx.op->redolog.safe_read(&this->_val);
+  }
+
+  /// Write to shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void xSet_mine(typename DESCRIPTOR::WOSTM &tx,
+                 typename DESCRIPTOR::OWNABLE *o, T val) {
+    tx.op->redolog.insert(&this->_val, val);
+  }
+
+  /// Write to shared memory (captured / not-actually-shared memory)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void xSet_cap(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+                T val) {
+    this->_val = val;
+  }
+
+  /// Read the sField from a RSTEP or WSTEP.  The caller is responsible for
+  /// validating the orec.
+  ///
+  /// @param STEP An unused parameter to restrict access to *STEP contexts
+  ///
+  /// @return The current value
+  T sGet(typename DESCRIPTOR::STEP &) {
+    // NB: we're only concerned about x86 for now, so we'll just use
+    //     memory_order_acquire.  On ARM, we would want to use a relaxed read,
+    //     and have a thread fence later on (i.e., before the validate).
+    //
+    // NB: We don't specialize to RO_TM or WO_TM, because sometimes a WO_TM
+    //     will read without owning an object, and thus won't be able to do a
+    //     memory_order_relaxed read.
+    return std::atomic_ref(this->_val).load(std::memory_order_acquire);
+  }
+
+  /// Write the sField from a WSTEP.  The caller must ensure the corresponding
+  /// orec is owned before calling this.
+  ///
+  /// NB: memory_order_relaxed, because we assume it is owned
+  ///
+  /// @param val   The new value
+  /// @param WSTEP An unused parameter to restrict access to WSTEP contexts
+  void sSet(T val, typename DESCRIPTOR::WSTEP &) {
+    std::atomic_ref(this->_val).store(val, std::memory_order_relaxed);
+  }
+};
+
+/// lazy_field is a wrapper around simple types so that they can only be
+/// accessed via STMCAS steps or HandSTM transactions.
+///
+/// NB: When using STMCAS, the programmer has the same responsibilities as in
+///     regular STMCAS.
+///
+/// @tparam T          The type that is stored in this field
+/// @tparam DESCRIPTOR The type of HyPol policy using this field
+template <typename T, typename DESCRIPTOR>
+class lazy_field : public field_base_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct a lazy_field
+  ///
+  /// @param val The initial value
+  explicit lazy_field(T val) : field_base_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct a lazy_field
+  explicit lazy_field() : field_base_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  ///
+  /// TODO: Throughout HandSTM and HyPol, we should specialize get() for RO
+  /// versus WO, because RO never needs to do the costly lookups.
+  T xGet(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Lookup in redo log
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // Start a loop to read a consistent value
+    while (true) {
+      // read the location, then orec
+      ret = op(tx).redolog.safe_read(&this->_val);
+
+      // If validation passes, then we can log it and return it
+      bool locked = false;
+      if (op(tx).exo.check_orec(o->orec(), locked) != DESCRIPTOR::EOT) {
+        op(tx).readset.push_back(o->orec());
+        return ret;
+      }
+
+      // wait if locked
+      while (locked)
+        op(tx).exo.check_orec(o->orec(), locked);
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_xGet(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Check log first
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // read the location, then orec.  If validation fails, we must abort.
+    ret = op(tx).redolog.safe_read(&this->_val);
+    if (op(tx).exo.check_orec(o->orec()) == DESCRIPTOR::EOT)
+      op(tx).abort();
+    return ret;
+  }
+
+  /// Read from shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T xGet_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // NB: in lazy, we *never* own the orec, so there's no optimization here.
+    return xGet(tx, o);
+  }
+
+  /// Write to shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void xSet(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+            T val) {
+    op(tx).lockset.push_back(o->orec());
+    op(tx).redolog.insert(&this->_val, val);
+  }
+};
+
+/// wb_c1_field is a wrapper around simple types so that they can only be
+/// accessed via STMCAS steps or HandSTM transactions.
+///
+/// NB: When using STMCAS, the programmer has the same responsibilities as in
+/// regular STMCAS.
+///
+/// @tparam T          The type that is stored in this field
+/// @tparam DESCRIPTOR The type of HyPol policy using this field
+template <typename T, typename DESCRIPTOR>
+class wb_c1_field : public field_base_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct a wb_c1_field
+  ///
+  /// @param val The initial value
+  explicit wb_c1_field(T val) : field_base_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct a wb_c1_field
+  explicit wb_c1_field() : field_base_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  ///
+  /// TODO: Throughout HandSTM and HyPol, we should specialize get() for RO
+  /// versus WO, because RO never needs to do the costly lookups.
+  T xGet(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Since this is check-once orecs, we can't really use the lock state to
+    // avoid redo log checks, so let's just check right away:
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // Start a loop to read a consistent value
+    while (true) {
+      // read the location, then orec
+      ret = op(tx).redolog.safe_read(&this->_val);
+
+      // If validation passes, then (maybe) log it and return it
+      bool locked = false;
+      if (op(tx).exo.check_orec(o->orec(), locked) != exotm_t::END_OF_TIME) {
+        // Don't log the read if `tx` owns it
+        if (!locked)
+          op(tx).readset.push_back(o->orec());
+        return ret;
+      }
+
+      // abort if locked
+      while (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_xGet(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    // Check log first
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+
+    // read the location, then orec.  If validation fails, we must abort.
+    ret = op(tx).redolog.safe_read(&this->_val);
+    if (op(tx).exo.check_orec(o->orec()) == exotm_t::END_OF_TIME)
+      op(tx).abort();
+    return ret;
+  }
+
+  /// Read from shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T xGet_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+    return op(tx).redolog.safe_read(&this->_val);
+  }
+
+  /// Write to shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void xSet(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+            T val) {
+    // Put it in the redo log right away
+    op(tx).redolog.insert(&this->_val, val);
+
+    // Now either consistently get the lock, or else abort
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      bool locked = false;
+      if (op(tx).exo.acquire_consistent(o->orec(), locked))
+        return;
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+};
+
+/// wb_c2_field is a wrapper around simple types so that they can only be
+/// accessed via STMCAS steps or HandSTM transactions.
+///
+/// NB: When using STMCAS, the programmer has the same responsibilities as in
+/// regular STMCAS.
+///
+/// @tparam T          The type that is stored in this field
+/// @tparam DESCRIPTOR The type of HyPol policy using this field
+template <typename T, typename DESCRIPTOR>
+class wb_c2_field : public field_base_t<T, DESCRIPTOR> {
+  // A helper for bouncing through the base_t to get an op's descriptor
+  DESCRIPTOR &op(typename DESCRIPTOR::STM &tx) {
+    return field_base_t<T, DESCRIPTOR>::OP(tx);
+  }
+
+public:
+  /// Construct a wb_c2_field
+  ///
+  /// @param val The initial value
+  explicit wb_c2_field(T val) : field_base_t<T, DESCRIPTOR>(val) {}
+
+  /// Default-construct a wb_c2_field
+  explicit wb_c2_field() : field_base_t<T, DESCRIPTOR>() {}
+
+  /// Read from shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  ///
+  /// TODO: Throughout HandSTM and HyPol, we should specialize get() for RO
+  /// versus WO, because RO never needs to do the costly lookups.
+  T xGet(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    while (true) {
+      T ret;
+      // Pre-check the orec, and record if it's locked
+      bool locked = false;
+      auto pre = op(tx).exo.check_orec(o->orec(), locked);
+      // If tx owns `o`, try the redo log, else return directly from memory
+      // without logging or double-checking the orec
+      if (pre != exotm_t::END_OF_TIME && locked) {
+        if (op(tx).redolog.get(&this->_val, ret))
+          return ret;
+        else
+          return op(tx).redolog.safe_read(&this->_val);
+      }
+      ret = op(tx).redolog.safe_read(&this->_val);
+      auto post = op(tx).exo.check_orec(o->orec());
+      // If validation passes, then we can log it and return
+      if (pre == post && pre != exotm_t::END_OF_TIME) {
+        op(tx).readset.push_back(o->orec());
+        return ret;
+      }
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+
+  /// Read from shared memory (guaranteed not the first read to `o` by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T re_xGet(typename DESCRIPTOR::STM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    T ret;
+    // Pre-check the orec, and record if it's locked
+    bool locked = false;
+    auto pre = op(tx).exo.check_orec(o->orec(), locked);
+    // If tx owns `o`, try the redo log, else return directly from memory
+    // without logging or double-checking the orec
+    if (pre != exotm_t::END_OF_TIME && locked) {
+      if (op(tx).redolog.get(&this->_val, ret))
+        return ret;
+      else
+        return op(tx).redolog.safe_read(&this->_val);
+    }
+    ret = op(tx).redolog.safe_read(&this->_val);
+    auto post = op(tx).exo.check_orec(o->orec());
+    // If validation fails, we should just abort, because it's unlikely that
+    // whoever owns it will abort and reset the orec to the value we logged in
+    // a prior get().
+    if (pre != post || pre == exotm_t::END_OF_TIME)
+      op(tx).abort();
+    // Validation succeeded, so return the value without logging
+    return ret;
+  }
+
+  /// Read from shared memory (`o` is owned by `tx`)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  ///
+  /// @return The current value
+  T xGet_mine(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o) {
+    T ret;
+    if (op(tx).redolog.get(&this->_val, ret))
+      return ret;
+    return op(tx).redolog.safe_read(&this->_val);
+  }
+
+  /// Write to shared memory (general-purpose version)
+  ///
+  /// @param tx  The transaction performing this read
+  /// @param o   The ownable for this location (locates the orec)
+  /// @param val The new value
+  void xSet(typename DESCRIPTOR::WOSTM &tx, typename DESCRIPTOR::OWNABLE *o,
+            T val) {
+    // Put it in the redo log right away
+    op(tx).redolog.insert(&this->_val, val);
+
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      bool locked = false;
+      if (op(tx).exo.acquire_consistent(o->orec(), locked))
+        return;
+
+      // abort if locked
+      if (locked)
+        op(tx).abort();
+
+      // Extend the validity range, then try again
+      auto old_start = op(tx).exo.get_start_time();
+      op(tx).exo.wo_begin();
+      op(tx).validate(old_start);
+    }
+  }
+};
diff --git a/artifact/policies/hybrid/include/raii.h b/artifact/policies/hybrid/include/raii.h
new file mode 100644
index 0000000..fba48b8
--- /dev/null
+++ b/artifact/policies/hybrid/include/raii.h
@@ -0,0 +1,196 @@
+#pragma once
+
+#include <cstdint>
+#include <setjmp.h>
+
+/// STM is the base for the ROSTM and WOSTM RAII objects, which delineate
+/// HandSTM-like transactions
+template <class DESCRIPTOR> struct Stm {
+  // Fields need to be friends, so they can access `op`
+  template <typename T, typename D> friend class field_base_t;
+
+protected:
+  DESCRIPTOR *op; // The thread descriptor for this operation
+
+  /// Construct by recording the descriptor and jump buffer
+  ///
+  /// @param me The thread descriptor
+  /// @param jb The register checkpoint (jump buffer)
+  Stm(DESCRIPTOR *me, jmp_buf *jb) : op(me) { me->checkpoint = jb; }
+
+public:
+  /// Inherit an orec from a previous STMCAS step
+  ///
+  /// @param obj The ownable object whose orec needs to match some value
+  /// @param val The value that we must see
+  ///
+  /// @return True if the value matches and the STM can continue, false
+  ///         otherwise
+  bool inheritOrec(typename DESCRIPTOR::ownable_t *obj, uint64_t val) {
+    return op->inheritOrec(obj, val);
+  }
+};
+
+/// ROSTM is an RAII object for HandSTM-like read-only transactions
+template <class DESCRIPTOR> struct RoStm : Stm<DESCRIPTOR> {
+  /// Construct to start a transaction
+  ///
+  /// @param me The thread descriptor
+  /// @param jb The register checkpoint (jump buffer)
+  RoStm(DESCRIPTOR *me, jmp_buf *jb) : Stm<DESCRIPTOR>(me, jb) {
+    this->op->exo.ro_begin();
+  }
+
+  /// Destruct the object to commit the transaction
+  ~RoStm() {
+    this->op->exo.ro_end();
+    this->op->readset.clear();
+  }
+};
+
+/// WOSTM is an RAII object for HandSTM-like writing transactions
+template <class DESCRIPTOR> struct WoStm : Stm<DESCRIPTOR> {
+  /// Construct to start a transaction
+  ///
+  /// @param me The thread descriptor
+  /// @param jb The register checkpoint (jump buffer)
+  WoStm(DESCRIPTOR *me, jmp_buf *jb) : Stm<DESCRIPTOR>(me, jb) {
+    this->op->exo.wo_begin();
+  }
+
+  /// Destruct the object to commit the transaction
+  ~WoStm() { this->op->commit(); }
+
+  /// Whenever a node is speculatively allocated, use this to log it
+  ///
+  /// @param node The ownable_t to log
+  ///
+  /// @return `node`, to facilitate chaining
+  template <class T> T *LOG_NEW(T *node) {
+    this->op->mallocs.push_back(node);
+    return node;
+  }
+
+  /// Schedule an object for reclamation if the transaction commits
+  ///
+  /// NB: It might seem odd that the reclamation is only for WOSTM, and that
+  ///     it's tied to the WOSTM object instead of DESCRIPTOR itself.  It works
+  ///     for now.
+  ///
+  /// @param obj The object to reclaim
+  void reclaim(typename DESCRIPTOR::ownable_t *obj) {
+    this->op->frees.push_back(obj);
+  }
+};
+
+/// STEP is the base for the STMCAS-like RSTEP and WSTEP RAII wrappers for the
+/// exoTM API
+template <class DESCRIPTOR> struct Step {
+protected:
+  DESCRIPTOR *op; // The thread descriptor for this operation
+
+  /// The TM constructor just records the descriptor
+  ///
+  /// @param me The thread descriptor
+  Step(DESCRIPTOR *me) : op(me) {}
+
+public:
+  /// Check if an object's orec value is still `val`
+  ///
+  /// @param obj The object whose orec is being checked
+  /// @param val The expected value of obj's orec
+  ///
+  /// @return True if it still matches, false otherwise
+  bool check_continuation(typename DESCRIPTOR::ownable_t *obj, uint64_t val) {
+    return this->op->exo.check_continuation(obj->orec(), val);
+  }
+
+  /// Validate that an object's orec is usable by the step
+  ///
+  /// @param obj The object that we want to use
+  ///
+  /// @return END_OF_TIME if obj's orec is not usable, else the orec version
+  uint64_t check_orec(typename DESCRIPTOR::ownable_t *obj) {
+    return this->op->exo.check_orec(obj->orec());
+  }
+
+  /// Return the start time of the step
+  uint64_t get_start_time() { return this->op->exo.get_start_time(); }
+};
+
+/// RO is an RAII object for managing STMCAS-like read-only steps
+template <class DESCRIPTOR> struct RStep : Step<DESCRIPTOR> {
+  /// Construct to start a read-only step
+  ///
+  /// @param me The thread descriptor
+  RStep(DESCRIPTOR *me) : Step<DESCRIPTOR>(me) { this->op->exo.ro_begin(); }
+
+  /// Destruct the object to end the reading step
+  ~RStep() { this->op->exo.ro_end(); }
+};
+
+/// WO is an RAII object for managing STMCAS-like writing steps
+template <class DESCRIPTOR> struct WStep : Step<DESCRIPTOR> {
+  /// Construct to start a writing step
+  ///
+  /// @param me The thread descriptor
+  WStep(DESCRIPTOR *me) : Step<DESCRIPTOR>(me) { this->op->exo.wo_begin(); }
+
+  /// Destruct the object to end the writing step
+  ~WStep() { this->op->exo.wo_end(); }
+
+  /// Acquire obj's orec, but only if its orec matches val
+  ///
+  /// @param obj The object whose orec we want to acquire
+  /// @param val The value that object's orec must have
+  ///
+  /// @return True if the object's orec is successfully acquired
+  bool acquire_continuation(typename DESCRIPTOR::ownable_t *obj, uint64_t val) {
+    return this->op->exo.acquire_continuation(obj->orec(), val);
+  }
+
+  /// Acquire obj's orec, but only if it is consistent with the start time of
+  /// this step.
+  ///
+  /// @param obj The object whose orec we want to acquire
+  ///
+  /// @return True if the orec was acquired, false otherwise
+  bool acquire_consistent(typename DESCRIPTOR::ownable_t *obj) {
+    return this->op->exo.acquire_consistent(obj->orec());
+  }
+
+  /// Acquire obj's orec, even if its orec would be inconsistent with the
+  /// step
+  ///
+  /// @param obj The object whose orec we want to acquire
+  ///
+  /// @return True if object's orec is successfully acquired
+  bool acquire_aggressive(typename DESCRIPTOR::ownable_t *obj) {
+    return this->op->exo.acquire_aggressive(obj->orec());
+  }
+
+  /// Unwind the step, so that it can be restarted
+  void unwind() { this->op->exo.unwind(); }
+
+  /// Schedule an object for reclamation.  This should only be called from
+  /// writing steps that won't unwind.
+  ///
+  /// NB: The programmer can only reclaim from WSTEPs, not from RSTEPs.
+  ///
+  /// @param obj The object to reclaim
+  void reclaim(typename DESCRIPTOR::ownable_t *obj) {
+    this->op->smr.reclaim(obj);
+  }
+};
+
+/// Start a writing transaction by calling setjmp and then creating a WOSTM
+#define BEGIN_WO(op)                                                           \
+  jmp_buf checkpoint;                                                          \
+  setjmp(checkpoint);                                                          \
+  WOSTM wo(op, &checkpoint)
+
+/// Start a reading transaction by calling setjmp and then creating a ROSTM
+#define BEGIN_RO(op)                                                           \
+  jmp_buf checkpoint;                                                          \
+  setjmp(checkpoint);                                                          \
+  ROSTM ro(op, &checkpoint)
diff --git a/artifact/policies/hybrid/lazy.h b/artifact/policies/hybrid/lazy.h
new file mode 100644
index 0000000..60db1c3
--- /dev/null
+++ b/artifact/policies/hybrid/lazy.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "include/base.h"
+#include "include/field.h"
+#include "include/raii.h"
+
+/// lazy_t is a hybrid HandSTM+STMCAS policy:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Commit-time locking with redo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct lazy_t : public base_t<OP> {
+  using STM = Stm<lazy_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<lazy_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<lazy_t>; // RAII WOSTM manager
+  using STEP = Step<lazy_t>;   // RAII RSTEP/WSTEP base
+  using RSTEP = RStep<lazy_t>; // RAII RSTEP manager
+  using WSTEP = WStep<lazy_t>; // RAII WSTEP manager
+
+  /// Construct a lazy_t
+  lazy_t() : base_t<OP>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename base_t<OP>::ownable_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HyPol
+  template <typename T> struct sxField : public lazy_field<T, lazy_t> {
+    /// Construct an sxField
+    ///
+    /// @param val The initial value
+    explicit sxField(T val) : lazy_field<T, lazy_t>(val) {}
+
+    /// Default-construct an sxField
+    explicit sxField() : lazy_field<T, lazy_t>() {}
+  };
+
+private:
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  friend STEP;
+  friend WSTEP;
+  friend RSTEP;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class lazy_field;
+};
diff --git a/artifact/policies/hybrid/wb_c1.h b/artifact/policies/hybrid/wb_c1.h
new file mode 100644
index 0000000..ff00452
--- /dev/null
+++ b/artifact/policies/hybrid/wb_c1.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "include/base.h"
+#include "include/field.h"
+#include "include/raii.h"
+
+/// wb_c1_t is a hybrid HandSTM+STMCAS policy:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Encounter-time locking with redo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// Note that check-once orecs don't really let us use orec checks to filter out
+/// and avoid redolog lookups.
+///
+/// @tparam OP The orec policy to use.
+template <template <typename, typename> typename OP>
+struct wb_c1_t : public base_t<OP> {
+  using STM = Stm<wb_c1_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<wb_c1_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<wb_c1_t>; // RAII WOSTM manager
+  using STEP = Step<wb_c1_t>;   // RAII RSTEP/WSTEP base
+  using RSTEP = RStep<wb_c1_t>; // RAII RSTEP manager
+  using WSTEP = WStep<wb_c1_t>; // RAII WSTEP manager
+
+  /// Construct a wb_c1_t
+  wb_c1_t() : base_t<OP>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename base_t<OP>::ownable_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HyPol
+  template <typename T> struct sxField : public wb_c1_field<T, wb_c1_t> {
+    /// Construct an sxField
+    ///
+    /// @param val The initial value
+    explicit sxField(T val) : wb_c1_field<T, wb_c1_t>(val) {}
+
+    /// Default-construct an sxField
+    explicit sxField() : wb_c1_field<T, wb_c1_t>() {}
+  };
+
+private:
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  friend STEP;
+  friend WSTEP;
+  friend RSTEP;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class wb_c1_field;
+};
diff --git a/artifact/policies/hybrid/wb_c2.h b/artifact/policies/hybrid/wb_c2.h
new file mode 100644
index 0000000..e29f566
--- /dev/null
+++ b/artifact/policies/hybrid/wb_c2.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "include/base.h"
+#include "include/field.h"
+#include "include/raii.h"
+
+/// wb_c2_t is a hybrid HandSTM+STMCAS policy:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-twice orecs
+/// - Encounter-time locking with redo
+/// - No quiescence, but safe memory reclamation
+/// - Can be configured with per-object or per-stripe orecs
+///
+/// @tparam OP  The orec policy to use.
+template <template <typename, typename> typename OP>
+struct wb_c2_t : public base_t<OP> {
+  using STM = Stm<wb_c2_t>;     // RAII ROSTM/WOSTM base
+  using ROSTM = RoStm<wb_c2_t>; // RAII ROSTM manager
+  using WOSTM = WoStm<wb_c2_t>; // RAII WOSTM manager
+  using STEP = Step<wb_c2_t>;   // RAII RSTEP/WSTEP base
+  using RSTEP = RStep<wb_c2_t>; // RAII RSTEP manager
+  using WSTEP = WStep<wb_c2_t>; // RAII WSTEP manager
+
+  /// Construct a wb_c2_t
+  wb_c2_t() : base_t<OP>() {}
+
+private:
+  // Types needed by the (friend) field template, but not worth making public
+  using OWNABLE = typename base_t<OP>::ownable_t;
+  static const auto EOT = exotm_t::END_OF_TIME;
+
+public:
+  /// The type for fields that are shared and protected by HyPol
+  template <typename T> struct sxField : public wb_c2_field<T, wb_c2_t> {
+    /// Construct an sxField
+    ///
+    /// @param val The initial value
+    explicit sxField(T val) : wb_c2_field<T, wb_c2_t>(val) {}
+
+    /// Default-construct an sxField
+    explicit sxField() : wb_c2_field<T, wb_c2_t>() {}
+  };
+
+private:
+  friend STM;
+  friend ROSTM;
+  friend WOSTM;
+  friend STEP;
+  friend WSTEP;
+  friend RSTEP;
+  template <typename T, typename DESCRIPTOR> friend class field_base_t;
+  template <typename T, typename DESCRIPTOR> friend class wb_c2_field;
+};
diff --git a/artifact/policies/include/hash.h b/artifact/policies/include/hash.h
new file mode 100644
index 0000000..8fcc1f8
--- /dev/null
+++ b/artifact/policies/include/hash.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <cstdint>
+
+/// A simple, high-quality hash function
+///
+/// NB: This hash is based on "mix 13" from
+///     http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
+///
+/// @param x A 64-bit value to hash
+///
+/// @return The hash of the provided value
+uint64_t mix13_hash(uint64_t x) {
+  x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+  x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
+  x = x ^ (x >> 31);
+  return x;
+}
diff --git a/artifact/policies/include/minivector.h b/artifact/policies/include/minivector.h
new file mode 100644
index 0000000..9eddd9d
--- /dev/null
+++ b/artifact/policies/include/minivector.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+
+/// minivector is a self-growing array, like std::vector, but with less
+/// overhead.  It can only be used to store types with trivial constructors and
+/// destructors.
+///
+/// @tparam T The type of the elements stored in the minivector
+template <class T> class minivector {
+  T *items;       // The internal (dynamic) array of items
+  uint32_t cap;   // Maximum capacity of `items`
+  uint32_t count; // Number of used slots in `items`
+
+  /// Resize the array of items, and move current data into it
+  ///
+  /// NB: We assume that T has a trivial copy constructor, and thus we can
+  ///     memcpy the data
+  __attribute__((noinline)) void expand() {
+    auto temp = items;
+    cap *= 2;
+    items = new T[cap]();
+    memcpy(items, temp, sizeof(T) * count);
+    delete[] temp;
+  }
+
+public:
+  /// Construct an empty minivector with a default capacity
+  ///
+  /// NB: _cap must be positive.  The constructor will crash if `_cap` == 0
+  ///
+  /// @param _cap The initial capacity of the minivector
+  minivector(uint32_t _cap = 64) : items(new T[_cap]()), cap(_cap), count(0) {
+    if (_cap == 0)
+      std::terminate();
+  }
+
+  /// Reclaim memory when the minivector is destroyed
+  ~minivector() { delete[] items; }
+
+  /// Fast-clear the minivector
+  ///
+  /// NB: this works because `T` does not have a destructor
+  void clear() { count = 0; }
+
+  /// Pop multiple items from the minivector
+  ///
+  /// NB: The `newcount` parameter is not checked.  It is up to the caller to
+  ///     ensure that this never enlarges the minivector
+  ///
+  /// @param newcount The number of items that should remain in the minivector
+  void reset(uint32_t newcount) { count = newcount; }
+
+  /// Pop one element from the minivector
+  ///
+  /// NB: The current size of the minivector is not checked.  It is up to the
+  ///     caller to ensure that this never causes underflow.
+  void drop() { --count; }
+
+  /// Insert an element into the minivector
+  ///
+  /// We maintain the invariant that when insert() returns, there is always room
+  /// for one more element to be added.  This means we may expand() after
+  /// insertion, but doing so should be rare.
+  ///
+  /// @param data The element to insert
+  void push_back(T data) {
+    items[count] = data;
+    // If the list is full, double the capacity
+    if (++count == cap)
+      expand();
+  }
+
+  /// Getter to report the array size (to test for empty)
+  unsigned long size() const { return count; }
+
+  /// Return true if the minivector has no elements, false otherwise
+  bool empty() { return count == 0; }
+
+  /// Return the element at the top oif the stack, but don't remove it
+  T top() { return items[count - 1]; }
+
+  /// minivector iterator type
+  using iterator = T *;
+
+  /// minivector reverse iterator type
+  using reverse_iterator = std::reverse_iterator<iterator>;
+
+  /// Get an iterator to the start of the array
+  iterator begin() const { return items; }
+
+  /// Get an iterator to one past the end of the array
+  iterator end() const { return items + count; }
+
+  /// Get the starting point for a reverse iterator
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+
+  /// Get the ending point for a reverse iterator
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+};
diff --git a/artifact/policies/include/orec_policies.h b/artifact/policies/include/orec_policies.h
new file mode 100644
index 0000000..f9ce6c1
--- /dev/null
+++ b/artifact/policies/include/orec_policies.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include "hash.h"
+
+/// A policy that places orecs directly in reclaimable objects
+///
+/// @tparam SMR  The safe memory reclamation's reclaimable object
+/// @tparam OREC The orec type (presumably from exoTM)
+template <class SMR, class OREC> struct orec_po_t {
+  /// The global state for this policy
+  ///
+  /// NB: This policy does not require any global state
+  struct global_t {};
+
+  /// ownable_t places an orec into the object.  The object is compatible with
+  /// SMR
+  class ownable_t : public SMR {
+    OREC _orec; // the orec for this object is embedded directly in it
+
+  protected:
+    /// Construct an ownable_t
+    ///
+    /// @param _ An unused reference to this policy's global state
+    ownable_t(global_t &) {}
+
+  public:
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~ownable_t() {}
+
+    /// Return a reference to the ownable_t's orec
+    OREC *orec() { return &_orec; }
+  };
+};
+
+/// A policy that maps reclaimable objects to entries in a table of orecs
+///
+/// @tparam SMR  The safe memory reclamation's reclaimable object
+/// @tparam OREC The orec type (presumably from exoTM)
+template <class SMR, class OREC> struct orec_ps_t {
+  /// The global state for this policy
+  struct global_t {
+    static const int NUM_ORECS = 1048576; // The number of orecs to use
+    OREC orecs[NUM_ORECS];                // The table of orecs
+
+    /// Map an address to an orec table entry
+    ///
+    /// @param obj An ownable for which we require an orec address
+    ///
+    /// @return The address of the orec associated with the given key
+    OREC *get_orec(void *obj) {
+      return &orecs[mix13_hash((uintptr_t)obj) % NUM_ORECS];
+    }
+  };
+
+  /// ownable_t places a reference to an orec into the object.  The object is
+  /// compatible with SMR
+  ///
+  /// NB: By placing a reference into the object, we can use expensive hash
+  ///     functions, since we only compute them at object construction time.
+  class ownable_t : public SMR {
+    OREC *const _orec; // reference to the orec for this object
+
+  protected:
+    /// Construct an ownable_t
+    ///
+    /// @param globals The global state (where the orecs are)
+    ownable_t(global_t &globals) : _orec(globals.get_orec(this)) {}
+
+  public:
+    /// Destructor is a no-op, but it needs to be virtual because of inheritance
+    virtual ~ownable_t() {}
+
+    /// Return a reference to the ownable_t's orec
+    OREC *orec() { return _orec; }
+  };
+};
diff --git a/artifact/policies/include/rdtsc_rand.h b/artifact/policies/include/rdtsc_rand.h
new file mode 100644
index 0000000..170e58b
--- /dev/null
+++ b/artifact/policies/include/rdtsc_rand.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <cstdint>
+#include <x86intrin.h>
+
+/// A good-faith reimplementation of Fraser's PRNG.  We use the same magic
+/// constants, and we seed the PRNG with the result of rdtsc.
+class rdtsc_rand_t {
+  uint64_t seed; // The seed... should be 64 bits, even though we return 32 bits
+
+public:
+  /// Construct the PRNG by setting the seed to the value of rdtsc
+  rdtsc_rand_t() : seed(__rdtsc()) {}
+
+  /// Generate a random number and update the seed
+  uint32_t rand() { return (seed = (seed * 1103515245) + 12345); }
+};
diff --git a/artifact/policies/include/redolog_nocast.h b/artifact/policies/include/redolog_nocast.h
new file mode 100644
index 0000000..869975a
--- /dev/null
+++ b/artifact/policies/include/redolog_nocast.h
@@ -0,0 +1,332 @@
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <string.h>
+
+#include "hash.h"
+
+/// redolog_nocast_t combines a hash-based index with a vector, so that we can
+/// quickly find elements, and still iterate through the collection efficiently.
+///
+/// This version assumes the programmer does not have unions or casting that can
+/// lead to the same location being part of different writes/reads at different
+/// sizes.  Note that it still stores chunks of bytes to update, because that's
+/// still more efficient.  But those chunks aren't guaranteed to match 1:1 with
+/// orecs.
+///
+/// NB: CHUNKSIZE must be <= 64, and a power of 2.  It should be at least as
+///     large as the largest scalar datatype supported by the language.  16, 32,
+///     and 64 are probably the only reasonable values.
+///
+/// NB: CHUNKSIZE dictates the alignment that the compiler must obey.  E.g.,
+///     when it is 8, a scalar variable cannot cross an 8-byte boundary, or we
+///     will not be able to log it correctly.
+///
+/// NB: redolog_t cannot handle scalars with a size greater than 8 bytes
+template <int CHUNKSIZE = 32> class redolog_nocast_t {
+  /// MASK is for isolating/clearing the low bits of an address.
+  static const uintptr_t MASK = ((uintptr_t)CHUNKSIZE) - 1;
+
+  /// SPILL_FACTOR determines how full `ht` can get before resizing. It must be
+  /// >1.  The value 3 means that we resize when `vec` gets full enough that 1/3
+  /// of `ht`'s entries are valid.
+  static const int SPILL_FACTOR = 3;
+
+  /// Hash index entry type.
+  ///
+  /// NB: If vVer != ht.ver, treat the entry as "cleared" or "invalid"
+  struct index_t {
+    size_t vVer;     // last valid version number
+    uintptr_t cAddr; // (aligned) address where this chunk starts
+    size_t vIdx;     // index into the vector of chunks
+
+    /// Constructor: Note that version 0 is not allowed in the RedoLog
+    index_t() : vVer(0), cAddr(0), vIdx(0) {}
+  };
+
+  /// Vector entry type
+  struct writeback_chunk_t {
+    uintptr_t bAddr;         // aligned base address
+    uint64_t vBytes;         // Bitmask for which bytes are valid
+    uint8_t data[CHUNKSIZE]; // The actual data
+  };
+
+  /// The hash index structure
+  struct {
+    index_t *tbl; // The "hashtable" of the Redo Log
+    size_t len;   // Size of hashtable
+    size_t ver;   // For fast-clearing of the hash table
+    size_t shift; // used by the hash function
+  } ht;
+
+  /// The vector of chunks
+  struct {
+    writeback_chunk_t *chunks; // The "vector" of the Redo Log
+    size_t cap;                // Capacity of the vector
+    size_t count;              // Current # elements in vector
+  } vec;
+
+  /// This hash function is straight from CLRS (that's where the magic constant
+  /// comes from).
+  size_t hash(uintptr_t const key) const {
+    uint64_t r = mix13_hash(key);
+    return (size_t)((r & 0xFFFFFFFF) >> ht.shift);
+  }
+
+  /// Double the size of the index.  This *does not* allocate or free memory.
+  /// Callers should delete[] the index table, increment the table size, and
+  /// then reallocate it.
+  size_t doubleIndexLength() {
+    assert(ht.shift != 0 &&
+           "ERROR: redolog_t doesn't support an index this large");
+    ht.shift -= 1;
+    ht.len = 1 << (8 * sizeof(uint32_t) - ht.shift);
+    return ht.len;
+  }
+
+  /// Increase the size of the hash and rehash everything
+  __attribute__((noinline)) void rebuild() {
+    assert(ht.ver != 0 && "ERROR: the version should *never* be 0");
+
+    // double the index size
+    delete[] ht.tbl;
+    ht.tbl = new index_t[doubleIndexLength()];
+
+    // rehash the elements
+    for (size_t i = 0; i < vec.count; ++i) {
+      size_t h = hash(vec.chunks[i].bAddr);
+
+      // search for the next available slot
+      while (ht.tbl[h].vVer == ht.ver)
+        h = (h + 1) % ht.len;
+
+      ht.tbl[h].cAddr = vec.chunks[i].bAddr;
+      ht.tbl[h].vVer = ht.ver;
+      ht.tbl[h].vIdx = i;
+    }
+  }
+
+  /// Double the size of the vector if/when it becomes full
+  __attribute__((noinline)) void resize() {
+    writeback_chunk_t *temp = vec.chunks;
+    vec.cap *= 2;
+    vec.chunks =
+        (writeback_chunk_t *)malloc(vec.cap * sizeof(writeback_chunk_t));
+    memcpy(vec.chunks, temp, sizeof(writeback_chunk_t) * vec.count);
+    free(temp);
+  }
+
+  /// zero the hash on version# overflow... highly unlikely
+  __attribute__((noinline)) void reset_internal() {
+    memset(ht.tbl, 0, sizeof(index_t) * ht.len);
+    ht.ver = 1;
+  }
+
+public:
+  /// Construct a RedoLog by providing an initial capacity (default 64)
+  redolog_nocast_t(const size_t initial_capacity = 64)
+      : ht({nullptr, 0, 1, 8 * sizeof(uint32_t)}),
+        vec({nullptr, initial_capacity, 0}) {
+    // Find a good index length for the initial capacity of the list.
+    while (ht.len < SPILL_FACTOR * initial_capacity)
+      doubleIndexLength();
+    ht.tbl = new index_t[ht.len];
+    vec.chunks =
+        (writeback_chunk_t *)malloc(vec.cap * sizeof(writeback_chunk_t));
+  }
+
+  /// Reclaim the dynamically allocated parts of a RedoLog when we destroy it
+  ~redolog_nocast_t() {
+    delete[] ht.tbl;
+    free(vec.chunks);
+  }
+
+  /// Use linear probing to find the vector index of the chunk containing `key`,
+  /// or -1 on failure
+  int lookup(uintptr_t key) {
+    size_t h = hash(key);
+    while (ht.tbl[h].vVer == ht.ver) { // Chunk only valid if versions match
+      if (ht.tbl[h].cAddr == key)
+        return ht.tbl[h].vIdx;
+      /// NB: given SPILL_FACTOR, the search is guaranteed to halt
+      h = (h + 1) % ht.len;
+      continue;
+    }
+    return -1;
+  }
+
+  /// Fast check if the RedoLog is empty
+  bool isEmpty() const { return vec.count == 0; }
+
+  /// reserve is effectively the first half of an "upsert".  It finds the vector
+  /// entry into which a key should go, or makes that vector entry
+  ///
+  /// NB: we expect key's low bits to be masked to zero
+  size_t reserve(uintptr_t key) {
+    //  Find the slot that this address should hash to. If it is valid,
+    //  return the index. If we find an unused slot then it's a new
+    //  insertion.
+    size_t h = hash(key);
+    while (ht.tbl[h].vVer == ht.ver) {
+      if (ht.tbl[h].cAddr == key)
+        return ht.tbl[h].vIdx;
+      // keep probing... as in lookup, the search is guaranteed to halt
+      h = (h + 1) % ht.len;
+    }
+
+    // at this point, h is an unused position in `ht`
+    ht.tbl[h].cAddr = key;
+    ht.tbl[h].vVer = ht.ver;
+    size_t res = ht.tbl[h].vIdx = vec.count;
+
+    // initialize the chunk in `vec`, advance the count
+    vec.chunks[vec.count].bAddr = key;
+    vec.chunks[vec.count].vBytes = 0LL;
+    ++vec.count;
+
+    // resize vec if there's only one spot left
+    if (__builtin_expect(vec.count == vec.cap, false))
+      resize();
+
+    // if we reach our load-factor, rebuild the ht
+    if (__builtin_expect((vec.count * SPILL_FACTOR) >= ht.len, false))
+      rebuild();
+
+    return res;
+  }
+
+  /// fast-clear the hash by bumping the version number
+  void clear() {
+    vec.count = 0;
+    ht.ver += 1;
+    // if there is version number overflow, we'll need to do a heavyweight reset
+    // of the index
+    if (ht.ver != 0)
+      return;
+    reset_internal();
+  }
+
+  /// write the chunks of `vec` back to main memory in a manner that is atomic
+  /// WRT the C++ memory model.
+  void writeback() {
+    auto const rlx = std::memory_order_relaxed;
+    // We don't think that the order of the writes matters, so just go through
+    // the vector from first to last chunk
+    for (size_t i = 0; i < vec.count; ++i) {
+      // We must be extremely careful when writing a chunk back to memory,
+      // because we want these writes to be compatible with a concurrent
+      // operation that is speculatively reading (and maybe not validating). We
+      // can trust that the compiler isn't mis-aligning scalars, but we need to
+      // try to write in 8-byte chunks, then 4-byte chunks then 2-byte chunks,
+      // then 1-byte chunks.  If we don't do it with all that complexity, then
+      // we risk someone reading the result of a partial write.
+      int bytes = 0;
+      while (bytes < CHUNKSIZE) {
+        // Shift the mask, so that the current bytes' validity is in the low
+        // bits of m.
+        int m = vec.chunks[i].vBytes >> bytes;
+        // Do we have 64 valid bits to write, aligned at a 64-bit boundary?
+        if (((m & 0xFF) == 0xFF) && ((bytes % 8) == 0)) {
+          uint64_t *addr = (uint64_t *)(vec.chunks[i].bAddr + bytes);
+          uint64_t *data = (uint64_t *)(vec.chunks[i].data + bytes);
+          std::atomic_ref<uint64_t>(*addr).store(*data, rlx);
+          bytes += 8;
+        }
+        // Can we skip 8 bytes?
+        else if ((m & 0xFF) == 0) {
+          bytes += 8;
+        }
+        // Do we have 32 valid bits to write, aligned at a 32-bit boundary?
+        else if (((m & 0xF) == 0xF) && ((bytes % 4) == 0)) {
+          uint32_t *addr = (uint32_t *)(vec.chunks[i].bAddr + bytes);
+          uint32_t *data = (uint32_t *)(vec.chunks[i].data + bytes);
+          std::atomic_ref<uint32_t>(*addr).store(*data, rlx);
+          bytes += 4;
+        }
+        // Can we skip 4 bytes?
+        else if ((m & 0xF) == 0) {
+          bytes += 4;
+        }
+        // Do we have 16 valid bits to write, aligned at a 16-bit boundary?
+        else if (((m & 0x3) == 0x3) && ((bytes % 2) == 0)) {
+          uint16_t *addr = (uint16_t *)(vec.chunks[i].bAddr + bytes);
+          uint16_t *data = (uint16_t *)(vec.chunks[i].data + bytes);
+          std::atomic_ref<uint16_t>(*addr).store(*data, rlx);
+          bytes += 2;
+        }
+        // Can we skip 2 bytes?
+        else if ((m & 0x3) == 0) {
+          bytes += 2;
+        }
+        // Do we have 8 valid bits to write, aligned at an 8-bit boundary?
+        else if ((m & 0x1) == 0x1) {
+          uint8_t *addr = (uint8_t *)(vec.chunks[i].bAddr + bytes);
+          uint8_t *data = (uint8_t *)(vec.chunks[i].data + bytes);
+          std::atomic_ref<uint8_t>(*addr).store(*data, rlx);
+          bytes += 1;
+        }
+        // skip these 8 bits, because the mask says they are not valid
+        else {
+          bytes += 1;
+        }
+      }
+    }
+  }
+
+  /// type-specialized code for inserting an address/value pair into the RedoLog
+  template <typename T> void insert(T *addr, T val) {
+    // Align `addr` to CHUNKSIZE, look it up in `ht` to get a chunk in `vec`
+    uintptr_t chunk_addr = (uintptr_t)addr & ~MASK;
+    int vec_idx = reserve(chunk_addr);
+
+    // compute the address within the chunk where we're going to put `val`
+    uint64_t offset = (uintptr_t)addr & MASK;
+    uint8_t *dataptr = vec.chunks[vec_idx].data;
+    dataptr += offset;
+
+    // do a type-preserving write, and update the chunk's mask of valid bytes
+    T *tgt = (T *)dataptr;
+    *tgt = val;
+    uint64_t mask = (1UL << sizeof(T)) - 1;
+    vec.chunks[vec_idx].vBytes |= (mask << offset);
+  }
+
+  /// type-specialized code for looking up a value from the RedoLog
+  template <typename T> bool get(T *addr, T &val) {
+    // Quick exit if the log is empty
+    //
+    // TODO: Revisit this after specializing tm_field::get to RO vs WO
+    if (vec.count == 0)
+      return false;
+    // Align `addr` to CHUNKSIZE, and see if there is a chunk for it
+    uintptr_t key = (uintptr_t)addr & ~MASK;
+    int idx = lookup(key);
+    if (idx == -1)
+      return false; // no chunk == not found
+
+    // Check if the relevant bits are set in the chunk's mask
+    uint64_t offset = (uintptr_t)addr & MASK;
+    uint64_t nodemask = vec.chunks[idx].vBytes >> offset;
+    uint64_t mask = (1UL << sizeof(T)) - 1;
+    uint32_t livebits = mask & nodemask;
+    if (!livebits)
+      return false; // no bits are set, so this part of the chunk isn't valid
+
+    // Read the data to `val` and return true
+    uint8_t *dataptr = vec.chunks[idx].data;
+    dataptr += offset;
+    T *tgt = (T *)dataptr;
+    val = *tgt;
+    return true;
+  }
+
+  /// Perform a speculative read from memory, by casting `addr` to an atomic_ref
+  template <typename T> static T safe_read(T *addr) {
+    return std::atomic_ref<T>(*addr).load(std::memory_order_acquire);
+  }
+
+  /// Report the number of elements stored in the redo log
+  size_t size() { return vec.count; }
+};
diff --git a/artifact/policies/include/timestamp_smr.h b/artifact/policies/include/timestamp_smr.h
new file mode 100644
index 0000000..cc576a1
--- /dev/null
+++ b/artifact/policies/include/timestamp_smr.h
@@ -0,0 +1,141 @@
+#pragma once
+
+#include <atomic>
+#include <climits>
+#include <deque>
+#include <utility>
+#include <x86intrin.h>
+
+#include "minivector.h"
+
+/// timestamp_smr_t is a safe memory reclamation algorithm based on the use of
+/// timestamps.
+///
+/// The key idea behind timestamp_smr_t is that an in-flight operation can
+/// create a bundle of pointers that it would like to delete if it successfully
+/// completes.  If the operation fails, it will discard the bundle.  If the
+/// operation succeeds, it will associate a timestamp with that bundle of
+/// pointers, and add it to a large collection of timestamped pointers.  After
+/// every K additions of a bundle to the collection, the thread will sweep its
+/// collection and reclaim any pointers that it can prove to be unreachable.
+///
+/// The reachability argument for pointers is based on the timestamps.  An
+/// operation gets a timestamp when it starts, and clears that timestamp when it
+/// finishes.  Thus any thread with a cleared timestamp, or a timestamp strictly
+/// larger than a bundle's timestamp, cannot have references to anything in the
+/// bundle.
+///
+/// Note that for convenience, we don't actually have a bundle.  Instead, we
+/// store a flat list of {pointer, timestamp} pairs.
+///
+/// Each thread should have its own timestamp_smr_t instance, all of which
+/// should share the same timestamp_smr_t::global_t instance
+class timestamp_smr_t {
+  /// How many times can we insert into the unreachable set before requiring a
+  /// sweep()
+  static const int SWEEP_THRESHOLD = 1024;
+
+public:
+  /// The parent type for all objects managed by timestamp_smr_t. It consists
+  /// solely of a virtual destructor, to ensure a proper chain of destruction
+  /// when anything is reclaimed.
+  struct reclaimable_t {
+    /// Destroy this object and reclaim its memory
+    virtual ~reclaimable_t() {}
+  };
+
+  /// Global variables related to timestamp_smr_t.  A synchronization policy
+  /// that uses timestamp_smr_t is responsible for defining exactly one instance
+  /// of this object.
+  struct global_t {
+    /// A pointer to the head of the list of timestamp_smr_t contexts
+    std::atomic<timestamp_smr_t *> all_threads;
+
+    /// Construct a global context by zeroing the pointer to thread contexts
+    global_t() : all_threads(nullptr) {}
+  };
+
+private:
+  timestamp_smr_t *next;               // Next thread in the chain
+  std::atomic<uint64_t> ts;            // This thread's timestamp
+  minivector<reclaimable_t *> pending; // Objects to reclaim
+
+  /// Objects that are logically unreachable, but maybe not reclaimable yet due
+  /// to concurrent optimistic accesses.
+  std::deque<std::pair<reclaimable_t *, uint64_t>> unreachable;
+
+  /// How many more exits before we need to sweep
+  int exits_remaining = SWEEP_THRESHOLD;
+
+public:
+  /// Construct a timestamp_smr_t context by atomically adding it to the end of
+  /// the global list
+  timestamp_smr_t(global_t &_globals) : ts(ULLONG_MAX) {
+    while (true) {
+      timestamp_smr_t *curr_head = _globals.all_threads;
+      next = curr_head;
+      if (_globals.all_threads.compare_exchange_strong(curr_head, this))
+        break;
+    }
+  }
+
+  /// Begin a region that will optimistically access reclaimable_t objects
+  void enter() {
+    // enter the "epoch"
+    unsigned int dummy;
+    // TODO: Can we get by with rdtsc, since ts.exchange is a load/store fence
+    //        and there is a data dependence?
+    ts.exchange(__rdtscp(&dummy));
+  }
+
+  /// Exit a region that optimistically accesses reclaimable_t objects
+  ///
+  /// @param globals A reference to the global state for timestamp_smr_t
+  void exit(global_t &globals) {
+    // exit the "epoch"
+    ts = (ULLONG_MAX); // only need store fence, not load fence
+    // If we have pendings, we need a timestamp for them, then we can move them
+    // to `unreachable`
+    if (!pending.size())
+      return;
+    unsigned int dummy;
+    uint64_t time = __rdtscp(&dummy);
+    for (auto p : pending)
+      unreachable.push_back({p, time});
+    pending.clear();
+    // Check if it's time to sweep...
+    if (--exits_remaining > 0)
+      return;
+    exits_remaining = SWEEP_THRESHOLD;
+    sweep(globals);
+  }
+
+  /// Schedule an object for reclamation
+  void reclaim(reclaimable_t *ptr) { pending.push_back(ptr); }
+
+private:
+  /// Traverse the `unreachable` collection and reclaim anything whose timestamp
+  /// indicates that it cannot be undergoing optimistic access.
+  ///
+  /// @param globals A reference to the global state for timestamp_smr_t
+  void sweep(global_t &globals) {
+    // Find ts of oldest running operation
+    uint64_t oldest = ULLONG_MAX;
+    auto head = globals.all_threads.load();
+    while (head != nullptr) {
+      auto t = head->ts.load();
+      if (t < oldest)
+        oldest = t;
+      head = head->next;
+    }
+    // We know the deque is ordered from oldest to newest, so keep sweeping from
+    // the front until there's nothing old enough
+    while (!unreachable.empty()) {
+      auto [ptr, ts] = unreachable.front();
+      if (ts >= oldest)
+        return;
+      delete ptr;
+      unreachable.pop_front();
+    }
+  }
+};
diff --git a/artifact/policies/include/undolog.h b/artifact/policies/include/undolog.h
new file mode 100644
index 0000000..fd09698
--- /dev/null
+++ b/artifact/policies/include/undolog.h
@@ -0,0 +1,206 @@
+/// undolog.h defines an undo log that also provides helper functions for
+/// reading/writing memory properly with respect to the C++ memory model.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+#include "minivector.h"
+
+/// UndoLog is really just a vector of address/value pairs that are used by
+/// Eager TMs to un-do writes to memory in the event that the transaction
+/// aborts.
+class undolog_t {
+public:
+  /// undo_t represents a single entry in an undo log.  It consists of a
+  /// pointer, a value, and something to indicate the type that is logged
+  struct undo_t {
+    /// A lightweight enum to represent the type of element logged in this
+    /// undo_t. Valid values are 0/1/2/3/4/5/6/7/8,  for
+    /// u8/u16/u32/u64/f32/f64/f80/pointer/bool
+    int type;
+
+    /// A union storing the address, so that we can load/store through the
+    /// address without casting
+    union {
+      uint8_t *u1;
+      uint16_t *u2;
+      uint32_t *u4;
+      uint64_t *u8;
+      float *f4;
+      double *f8;
+      long double *f16;
+      void **p;
+      bool *b;
+    } addr;
+
+    /// A union storing the value that has been logged.  Again, we use a union
+    /// to avoid casting
+    union {
+      uint8_t u1;
+      uint16_t u2;
+      uint32_t u4;
+      uint64_t u8;
+      float f4;
+      double f8;
+      long double f16;
+      void *p;
+      bool b;
+    } val;
+
+/// Initialize this undo_t from a pointer by setting addr to the pointer and
+/// then dereferencing the pointer to get the value for val.  We use
+/// overloading to do this in a type-safe way for the 8 primitive types in our
+/// STM, and a macro to avoid lots of boilerplate code
+#define MAKE_UNDOT_FUNCS(TYPE, FIELD, TYPEID)                                  \
+  void initFromAddr(TYPE *a) {                                                 \
+    addr.FIELD = a;                                                            \
+    val.FIELD = *a;                                                            \
+    type = TYPEID;                                                             \
+  }                                                                            \
+  void initFromVal(TYPE *a, TYPE v) {                                          \
+    addr.FIELD = a;                                                            \
+    val.FIELD = v;                                                             \
+    type = TYPEID;                                                             \
+  }
+    MAKE_UNDOT_FUNCS(uint8_t, u1, 0);
+    MAKE_UNDOT_FUNCS(uint16_t, u2, 1);
+    MAKE_UNDOT_FUNCS(uint32_t, u4, 2);
+    MAKE_UNDOT_FUNCS(uint64_t, u8, 3);
+    MAKE_UNDOT_FUNCS(float, f4, 4);
+    MAKE_UNDOT_FUNCS(double, f8, 5);
+    MAKE_UNDOT_FUNCS(long double, f16, 6);
+    MAKE_UNDOT_FUNCS(void *, p, 7);
+    MAKE_UNDOT_FUNCS(bool, b, 8);
+
+    template <class T> void initFromAddr(T **a) { initFromAddr((void **)a); }
+    void initFromAddr(int32_t *a) { initFromAddr((uint32_t *)a); }
+
+    /// Restore the value at addr to the value stored in val
+    void restoreValue() {
+      switch (type) {
+      case 0:
+        *addr.u1 = val.u1;
+        break;
+      case 1:
+        *addr.u2 = val.u2;
+        break;
+      case 2:
+        *addr.u4 = val.u4;
+        break;
+      case 3:
+        *addr.u8 = val.u8;
+        break;
+      case 4:
+        *addr.f4 = val.f4;
+        break;
+      case 5:
+        *addr.f8 = val.f8;
+        break;
+      case 6:
+        *addr.f16 = val.f16;
+        break;
+      case 7:
+        *addr.p = val.p;
+        break;
+      }
+    }
+
+    /// Restore the value at addr to the value stored in val.  This version uses
+    /// casts to std::atomic
+    void restoreValue_atomic() {
+      switch (type) {
+      case 0:
+        reinterpret_cast<std::atomic<uint8_t> *>(addr.u1)->store(
+            val.u1, std::memory_order_release);
+        break;
+      case 1:
+        reinterpret_cast<std::atomic<uint16_t> *>(addr.u2)->store(
+            val.u2, std::memory_order_release);
+        break;
+      case 2:
+        reinterpret_cast<std::atomic<uint32_t> *>(addr.u4)->store(
+            val.u4, std::memory_order_release);
+        break;
+      case 3:
+        reinterpret_cast<std::atomic<uint64_t> *>(addr.u8)->store(
+            val.u8, std::memory_order_release);
+        break;
+      case 4:
+        reinterpret_cast<std::atomic<float> *>(addr.f4)->store(
+            val.f4, std::memory_order_release);
+        break;
+      case 5:
+        reinterpret_cast<std::atomic<double> *>(addr.f8)->store(
+            val.f8, std::memory_order_release);
+        break;
+      case 6:
+        reinterpret_cast<std::atomic<long double> *>(addr.f16)->store(
+            val.f16, std::memory_order_release);
+        break;
+      case 7:
+        reinterpret_cast<std::atomic<void *> *>(addr.p)->store(
+            val.p, std::memory_order_release);
+        break;
+      }
+    }
+  };
+
+private:
+  /// The vector that stores address/value pairs for undoing writes
+  minivector<undo_t> undolog;
+
+public:
+  /// Construct an UndoLog by providing a default size for the underlying vector
+  undolog_t(const unsigned long _capacity = 64) : undolog(_capacity) {}
+
+  /// Undo the writes stored in this UndoLog
+  ///
+  /// NB: for this "nonatomic" undolog, this is prone to races in the C++
+  ///     memory model
+  void undo_writes() {
+    for (auto it = undolog.rbegin(), e = undolog.rend(); it != e; ++it) {
+      it->restoreValue_atomic();
+    }
+  }
+
+  /// Undo the writes stored in this UndoLog.  This is the version for when the
+  /// TM uses pessimistic locking, and does not require atomics<> to be
+  /// race-free.
+  // void undo_writes_nonatomic() {
+  //   for (auto it = undolog.rbegin(), e = undolog.rend(); it != e; ++it) {
+  //     it->restoreValue();
+  //   }
+  // }
+
+  /// Clear the UndoLog
+  void clear() { undolog.clear(); }
+
+  /// Insert an undo_t into the UndoLog
+  void push_back(undo_t data) { undolog.push_back(data); }
+
+  /// The UndoLog is responsible for providing a "correct" way for threads to
+  /// read data from memory concurrently with threads writing that data from a
+  /// transaction. Since this is the racy implementation of an undolog, the
+  /// "correct" way is to just do a regular read, and pretend there isn't a
+  /// race.
+  template <typename T> static T safe_read(T *addr) {
+    return reinterpret_cast<std::atomic<T> *>(addr)->load(
+        std::memory_order_acquire);
+  }
+
+  /// The UndoLog is responsible for providing a "correct" way for threads to
+  /// write data from memory concurrently with threads reading that data from a
+  /// transaction. Since this is the racy implementation of an undolog, the
+  /// "correct" way is to just do a regular write, and pretend there isn't a
+  /// race.
+  template <typename T>
+  static void safe_write(T *addr, T val) {
+    reinterpret_cast<std::atomic<T> *>(addr)->store(val,
+                                                    std::memory_order_release);
+  }
+
+  /// Report the number of undo entries in the log
+  size_t size() { return undolog.size(); }
+};
diff --git a/artifact/policies/xSTM/Makefile b/artifact/policies/xSTM/Makefile
new file mode 100644
index 0000000..6dbce38
--- /dev/null
+++ b/artifact/policies/xSTM/Makefile
@@ -0,0 +1,7 @@
+all:
+	$(MAKE) -C libs all
+	$(MAKE) -C plugin all
+
+clean:
+	$(MAKE) -C libs clean
+	$(MAKE) -C plugin clean
diff --git a/artifact/policies/xSTM/common/tm_api.h b/artifact/policies/xSTM/common/tm_api.h
new file mode 100644
index 0000000..ef8165e
--- /dev/null
+++ b/artifact/policies/xSTM/common/tm_api.h
@@ -0,0 +1,223 @@
+// This file defines the programmer API for our transactional memory
+// implementation.
+//
+// Broadly, there are three APIs:
+//
+// - Lambda API: transactions are lambdas run by an executor
+//
+// - C (legacy) API: transactions are functions
+//
+// - RAII API: transactions are lexically-scoped regions delineated by a stack
+//   object
+//
+// The lambda API is easier to implement in the compiler plugin.  However, the
+// RAII API has less boundary overhead, because it does not involve lambdas.
+//
+// The APIs also have a "light" variant, in which there is no compiler
+// instrumentation.  This strategy only supports two TM libraries: CGL (all
+// transactions protected by the same global reentrant lock) and HTM_GL (use
+// Intel TSX, fall back to a lock).  The "light" variant of the RAII
+// implementation needs different macros than the full version.  The C and
+// lambda APIs do not need anything special from this file in order to use
+// "lite" mode.
+//
+// All of the APIs benefit from the following common calls:
+//
+// - TX_SAFE:                 Indicate that a function is always safe to call
+//                            from transactions
+//
+// - TX_PURE:                 Indicate that a function does not need to be
+//                            instrumented: it is safe as-is to call from a
+//                            transaction.
+//
+// - TX_RENAME:               Provide a different name to use for the cloned
+//                            version of the corresponding function.  This is
+//                            useful when providing instrumentable versions of
+//                            standard library functions.
+//
+// - TX_PRIVATE_STACK_REGION: Assist the plugin in avoiding instrumentation for
+//                            locations that are local to a transaction but not
+//                            within the transactional portion of the stack.
+//
+// - TX_COMMIT_HANDLER:       Specify code to run when a transaction commits
+//
+// - TX_CTOR:                 Instruct the plugin to instrument a constructor
+
+#pragma once
+
+#include <functional>
+#include <setjmp.h>
+
+#include "tm_defines.h"
+
+//
+// Below is the portion of the API that is common to all APIs
+//
+
+// Public:   Any function that is called from a transaction should be marked as
+//           TX_SAFE
+//
+// Internal: TX_SAFE translates to the basic TM annotation, so that the
+//           plugin knows include the function in the root set of code regions
+//           that may be run transactionally
+#define TX_SAFE __attribute__((annotate(TM_ANNOTATE_STR)))
+
+// Public:   A function marked TX_PURE does not need instrumentation in order to
+//           be called from a transaction
+//
+//           NB: This annotation is only applied to function definitions, not
+//               declarations
+//
+// Internal: A function definition marked in this way will not be cloned or
+//           added to the root set.  It will still be called direclty, not via
+//           TM_TRANSLATE_CALL.
+#define TX_PURE __attribute__((annotate(TM_PURE_STR)))
+
+// Public:   TX_RENAME allows the programmer to indicate that a function should
+//           (a) be instrumented, (b) not be cloned, and (c) be re-named, so it
+//           can serve as the TX_SAFE clone for a function lacking a definition.
+//
+// Internal: The generated name will include a prefix, so that the plugin can
+//           figure out names correctly, without front-end support.  This means
+//           that the programmer must manually mangle names, when C++ linkage
+//           is used
+#define TX_RENAME(newname) TM_RENAME(newname)
+
+// Public:   TX_PRIVATE_STACK_REGION allows the programmer to indicate that some
+//           suffix of the non-transactional stack frame does not require
+//           instrumentation
+//
+// Internal: N/A
+#define TX_PRIVATE_STACK_REGION(ptr) TM_SET_STACKFRAME(ptr)
+
+// Public:   TX_COMMIT_HANDLER provides support for oncommit handlers
+//
+// Internal:
+#define TX_COMMIT_HANDLER(func, params) TM_COMMIT_HANDLER((func), (params))
+
+// Public:   TX_CTOR helps a programmer indicate to the plugin that a
+//           constructor may be called by transactions in another translation
+//           unit.
+//
+// Internal: N/A
+#define TX_CTOR TM_CTOR
+
+// The common part of the API requires these declarations
+extern "C" {
+void TM_CTOR(void);
+void TM_SET_STACKFRAME(void *);
+void TM_COMMIT_HANDLER(TM_C_FUNC, void *args);
+}
+
+//
+// The mechanism used by the C API for launching a transaction is a single
+// function, TX_BEGIN_C(), which takes as a parameter a function pointer, and
+// attempts to execute that function as a transaction.  It also takes a void*,
+// which it passes to the function it attempts to execute transactionally.
+//
+
+// Public:   To execute a TX_SAFE function as a transaction, the function must
+//           have the signature void(*)(void*).  To launch it, pass the
+//           function name and parameter to TM system via this command.  This
+//           is the "C" interface to our TM interface
+//
+// Internal: TX_BEGIN_C forwards to TM_EXECUTE_C, with a first parameter that
+//           indicates the desire to run the region using TM.  This call will
+//           be transformed by the plugin into a call to an internal
+//           (4-parameter) function for launching code with TM
+#define TX_BEGIN_C(flags, func, params) TM_EXECUTE_C(flags, (func), (params))
+
+// The C API requires these delcarations
+// TODO: get rid of flags
+extern "C" {
+void TM_EXECUTE_C(void *flags, TM_C_FUNC, void *args);
+}
+
+//
+// The mechanism used by the Lambda API for launching a transaction is a
+// function that takes a lambda as a parameter.  To make it feel more like a
+// lexically-scoped transaction, we make the beginning and end of the function
+// call (and lambda captures) into macros.  This also lets us add a hidden
+// parameter to the lambda, which makes our instrumentation much easier.
+//
+
+// Public:   To execute a region of C++ code as a transaction, delineate it
+//           with 'TX_BEGIN {' and '} TX_END;'.  Be sure not to put a semicolon
+//           after TX_BEGIN.
+//
+//           NB: the region will become a lambda.  Be careful about clever
+//               control flows, such as breaks, gotos, and returns.
+//
+// Example:
+//           TX_BEGIN {
+//               x++;
+//           } TX_END;
+//
+// Internal: The body of the lambda will be transformed so that it has both
+//           instrumented and uninstrumented code paths
+#define TX_BEGIN TM_EXECUTE(nullptr, [&](TM_OPAQUE*)
+#define TX_END   )
+
+// The lambda API requires these declarations
+extern "C" {
+void TM_EXECUTE(void *flags, std::function<void(TM_OPAQUE *)> func);
+}
+
+//
+// The mechanism used by the RAII API for launching a transaction is a
+// stack-constructed object that initializes the transaction upon construction,
+// and commits it upon destruction.  To get it all to work correctly, we need to
+// setjmp /before/ we construct the object, so we hide the setjmp and
+// constructor in a macro.
+//
+
+// Public:   To execute a region of C++ code as a transaction, checkpoint the
+//           thread's architectural state with a setjmp, then pass the jmp_buf
+//           to the constructor for the transaction object.  The programmer
+//           needs to be sure to do the scoping correctly: this macro should be
+//           the first code after an open brace.
+//
+// Internal: The control flow subgraph in which the RAII object is live will be
+//           transformed.
+#if !defined(TM_USE_RAII_LITE)
+#define TX_RAII                                                                \
+  jmp_buf TM_RAII_JMPBUF;                                                      \
+  setjmp(TM_RAII_JMPBUF);                                                      \
+  bool TM_RAII_TXSTATE;                                                        \
+  TM_RAII tx(TM_RAII_TXSTATE, TM_RAII_JMPBUF)
+
+// The RAII API requires these function declarations
+extern "C" {
+bool TM_RAII_BEGIN(jmp_buf &) noexcept;
+void TM_RAII_END() noexcept;
+}
+
+// Public:   The RAII struct that we use to mark a transactional region
+//
+// Internal: N/A
+struct TM_RAII {
+  __attribute__((always_inline)) TM_RAII(bool &take_inst, jmp_buf &buffer) {
+    take_inst = TM_RAII_BEGIN(buffer);
+  }
+  __attribute__((always_inline)) ~TM_RAII() { TM_RAII_END(); }
+};
+
+#else
+#define TX_RAII                                                                \
+  bool TM_RAII_TXSTATE;                                                        \
+  TM_RAII tx(TM_RAII_TXSTATE)
+
+// The RAII_LITE API requires these function declarations
+extern "C" {
+bool TM_RAII_LITE_BEGIN();
+void TM_RAII_LITE_END();
+}
+
+// Public:   The RAII struct that we use to mark a transactional region
+//
+// Internal: N/A
+struct TM_RAII {
+  TM_RAII(bool &take_inst) { take_inst = TM_RAII_LITE_BEGIN(); }
+  ~TM_RAII() { TM_RAII_LITE_END(); }
+};
+#endif
diff --git a/artifact/policies/xSTM/common/tm_defines.h b/artifact/policies/xSTM/common/tm_defines.h
new file mode 100644
index 0000000..d33c89c
--- /dev/null
+++ b/artifact/policies/xSTM/common/tm_defines.h
@@ -0,0 +1,184 @@
+/// In order to ensure that our plugin, our libraries, and any programs we write
+/// all make use of the same common names for everything, we put all names into
+/// this file.
+///
+/// Note that this file is included in the llvm pass, and in the implementation
+/// of each TM library.  It is also included in every application that uses TM,
+/// indirectly, through the tm_api.h header.
+
+#pragma once
+
+//
+// These definitions are used by both the plugin and programmers who write
+// application code
+//
+
+// The annotation that gets placed on functions that need to be cloned by TM
+#define TM_ANNOTATE tm_function
+#define TM_ANNOTATE_STR "tm_function"
+
+// The annotation that gets placed on pure functions
+#define TM_PURE tm_pure
+#define TM_PURE_STR "tm_pure"
+
+// The function that executes an instrumented function via the C API
+#define TM_EXECUTE_C tm_execute_c
+#define TM_EXECUTE_C_STR "tm_execute_c"
+
+// The function that executes an instrumented region via the C++ "lambda" API
+#define TM_EXECUTE tm_execute
+#define TM_EXECUTE_STR "tm_execute"
+
+// The RAII class that executes an instrumented region via the C++ "raii" API
+#define TM_RAII tm_raii
+#define TM_RAII_STR "tm_raii"
+
+// The RAII class that executes an uninstrumented region via the C++ "raii_lite"
+// API
+#define TM_RAII_LITE tm_raii
+#define TM_RAII_LITE_STR "tm_raii"
+
+// The helper function to find a constructor
+#define TM_CTOR tm_ctor
+#define TM_CTOR_STR "tm_ctor"
+
+// The function that indicate a transaction is a read-only transaction.
+#define TM_SET_STACKFRAME tm_set_stackframe
+#define TM_SET_STACKFRAME_STR "tm_set_stackframe"
+
+// The function that provides support for oncommit handlers
+#define TM_COMMIT_HANDLER tm_commit_handler
+#define TM_COMMIT_HANDLER_STR "tm_commit_handler"
+
+// The annotation for replacing function names for programmer-provided
+// alternatives to functions with missing definitions
+#define TM_RENAME_STR "tm_rename_"
+#define TM_RENAME_1(newname) TM_RENAME_STR #newname
+#define TM_RENAME(newname) __attribute__((annotate(TM_RENAME_1(newname))))
+
+// The type of functions that get translated by TM_TRANSLATE_CALL.  Also the
+// type of functions passed to commit handlers
+#define TM_C_FUNC tm_c_func
+typedef void (*TM_C_FUNC)(void *);
+
+//
+// These definitions are used by both the plugin and library code
+//
+
+// The library must implement this function, and the plugin will replace calls
+// to TM_EXECUTE_C with calls to TM_EXECUTE_C_INTERNAL in order to run
+// functions as instrumented code
+#define TM_EXECUTE_C_INTERNAL tm_execute_c_internal
+#define TM_EXECUTE_C_INTERNAL_STR "tm_execute_c_internal"
+
+// The library will implement these memory access functions, and the plugin
+// will replace loads and stores with calls to these functions.
+#define TM_LOAD_U1 tm_load_u1
+#define TM_LOAD_U1_STR "tm_load_u1"
+#define TM_STORE_U1 tm_store_u1
+#define TM_STORE_U1_STR "tm_store_u1"
+#define TM_LOAD_U2 tm_load_u2
+#define TM_LOAD_U2_STR "tm_load_u2"
+#define TM_STORE_U2 tm_store_u2
+#define TM_STORE_U2_STR "tm_store_u2"
+#define TM_LOAD_U4 tm_load_u4
+#define TM_LOAD_U4_STR "tm_load_u4"
+#define TM_STORE_U4 tm_store_u4
+#define TM_STORE_U4_STR "tm_store_u4"
+#define TM_LOAD_U8 tm_load_u8
+#define TM_LOAD_U8_STR "tm_load_u8"
+#define TM_STORE_U8 tm_store_u8
+#define TM_STORE_U8_STR "tm_store_u8"
+#define TM_LOAD_F tm_load_f
+#define TM_LOAD_F_STR "tm_load_f"
+#define TM_STORE_F tm_store_f
+#define TM_STORE_F_STR "tm_store_f"
+#define TM_LOAD_D tm_load_d
+#define TM_LOAD_D_STR "tm_load_d"
+#define TM_STORE_D tm_store_d
+#define TM_STORE_D_STR "tm_store_d"
+#define TM_LOAD_LD tm_load_ld
+#define TM_LOAD_LD_STR "tm_load_ld"
+#define TM_STORE_LD tm_store_ld
+#define TM_STORE_LD_STR "tm_store_ld"
+#define TM_LOAD_P tm_load_p
+#define TM_LOAD_P_STR "tm_load_p"
+#define TM_STORE_P tm_store_p
+#define TM_STORE_P_STR "tm_store_p"
+
+// The library will implement these functions, and the plugin will
+// replace calls to malloc and free and memory intrinsic with calls to these
+// functions
+#define TM_MALLOC tm_malloc
+#define TM_MALLOC_STR "tm_malloc"
+#define TM_ALIGNED_ALLOC tm_aligned_alloc
+#define TM_ALIGNED_ALLOC_STR "tm_aligned_alloc"
+#define TM_FREE tm_free
+#define TM_FREE_STR "tm_free"
+#define TM_MEMCPY tm_memcpy
+#define TM_MEMCPY_STR "tm_memcpy"
+#define TM_MEMSET tm_memset
+#define TM_MEMSET_STR "tm_memset"
+#define TM_MEMMOVE tm_memmove
+#define TM_MEMMOVE_STR "tm_memmove"
+
+// The library will implement these helper functions, and the plugin will
+// add calls to these, as appropriate
+#define TM_UNSAFE tm_unsafe
+#define TM_UNSAFE_STR "tm_unsafe"
+#define TM_TRANSLATE_CALL tm_translate_call
+#define TM_TRANSLATE_CALL_STR "tm_translate_call"
+
+// TODO: we can probably get rid of these
+#define TM_RAII_CTOR "_ZN7" TM_RAII_STR "C2ERbRA1_13__jmp_buf_tag"
+#define TM_RAII_DTOR "_ZN7" TM_RAII_STR "D2Ev"
+
+// The library will need to work with these functions
+#define TM_SETJUMP_NAME "_setjmp"
+#define TM_CLANG_STDTERMINATE "__clang_call_terminate"
+
+//
+// These definitions are only used by the plugin
+//
+#define TM_PREFIX_STR "tm_"
+
+// An opaque pointer type that can be used to find lambdas that need to be added
+// to the root set during plugin execution.  We hide this from the programmer
+// via another macro.
+//
+// TODO: remove this
+#define TM_OPAQUE tm_opaque
+#define TM_OPAQUE_STR "tm_opaque"
+struct TM_OPAQUE;
+
+// The API function for registering a clone
+#define TM_REG_CLONE tm_register_clone
+#define TM_REG_CLONE_STR "tm_register_clone"
+
+// The name of the function that does clone registration
+#define TM_STATIC_INITIALIZER_STR "tm_initialization"
+
+//
+// These definitions are only used by application code, and are not visible to
+// the programmer
+//
+
+// The name of a setjmp buffer created as part of the RAII API
+#define TM_RAII_JMPBUF tm_raii_jmp_buf
+
+// The name of a bool that tracks the instrumentation state in the RAII API
+#define TM_RAII_TXSTATE tm_raii_should_run_instrumented
+
+// The function called by the RAII API to start a transaction
+#define TM_RAII_BEGIN tm_raii_begin
+#define TM_RAII_BEGIN_STR "tm_raii_begin"
+
+// The function called by the RAII API to end a transaction
+#define TM_RAII_END tm_raii_end
+#define TM_RAII_END_STR "tm_raii_end"
+
+// The function called by the RAII_LITE API to start a transaction
+#define TM_RAII_LITE_BEGIN tm_raii_lite_begin
+
+// The function called by the RAII_LITE API to end a transaction
+#define TM_RAII_LITE_END tm_raii_lite_end
diff --git a/artifact/policies/xSTM/common/xSTM.mk b/artifact/policies/xSTM/common/xSTM.mk
new file mode 100644
index 0000000..eadc7b5
--- /dev/null
+++ b/artifact/policies/xSTM/common/xSTM.mk
@@ -0,0 +1,3 @@
+# Set up flags for the llvm plugin
+CXXPASSSOFILE = $(TM_ROOT)/plugin/plugin/build/libtmplugin.so
+OPTFLAGS      = -load-pass-plugin $(CXXPASSSOFILE) -passes=tm_plugin
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/.gitignore b/artifact/policies/xSTM/libs/.gitignore
new file mode 100644
index 0000000..baf0a60
--- /dev/null
+++ b/artifact/policies/xSTM/libs/.gitignore
@@ -0,0 +1 @@
+obj64/
diff --git a/artifact/policies/xSTM/libs/Makefile b/artifact/policies/xSTM/libs/Makefile
new file mode 100644
index 0000000..ca1524e
--- /dev/null
+++ b/artifact/policies/xSTM/libs/Makefile
@@ -0,0 +1,45 @@
+# Each TM library will exist entirely in a single file.  The current list of STM
+# and PTM implementations is in a separate file
+include tm_names.mk
+LIBS = $(TM_LIB_NAMES)
+
+# This makefile will create a .o for each of the TM libraries
+OFILES = $(patsubst %, $(ODIR)/%.bc, $(LIBS))
+
+# Dependencies that will be generated
+DEPS = $(patsubst %, $(ODIR)/%.d, $(LIBS))
+
+# Let the programmer choose 32 or 64 bits, but default to 32
+BITS ?= 64
+
+# Directory names
+ODIR          := ./obj$(BITS)
+output_folder := $(shell mkdir -p $(ODIR))
+
+# Configure the compiler
+CXX      = clang++-15
+CXXFLAGS  = -mrtm -MMD -O3 -m$(BITS) -ggdb -std=c++20 -Wall -Werror -fPIC \
+            -march=native -Wextra -emit-llvm
+
+# Configure Makefile targets
+.DEFAULT_GOAL = all
+.PRECIOUS: $(OFILES)
+.PHONY: all clean
+
+# build all libraries as .o files
+all: $(OFILES)
+
+# clean by clobbering the build folder
+clean:
+	@echo Cleaning up...
+	@rm -rf $(ODIR)
+
+# build rules
+$(ODIR)/%.bc: stm_instances/%.cc
+	@echo "[CXX] $< --> $@"
+	@$(CXX) $< -o $@ -c $(CXXFLAGS)
+
+# Include dependencies
+-include $(DEPS)
+
+
diff --git a/artifact/policies/xSTM/libs/README.md b/artifact/policies/xSTM/libs/README.md
new file mode 100644
index 0000000..89e4dd3
--- /dev/null
+++ b/artifact/policies/xSTM/libs/README.md
@@ -0,0 +1,33 @@
+# TM and PTM Libraries
+
+The llvm-tm-support/libs folder is the home for all of the TM and PTM algorithms
+that are available for use in our system.  Note that these algorithms include
+traditional STM algorithms, Hybrid algorithms that try to use hardware TM
+support when it is available, and algorithms for performing transactions over
+persistent memory.
+
+## Structure of a TM implementation
+
+In order to share code as much as possible, and to support as many algorithms
+as possible without creating too much clutter, we use the following strategy.
+We begin with an algorithm category, typically based on fundamental
+synchronization decisions, such as the type of metadata used to mediate
+concurrent access to a location and how updates to the shared memory are
+managed.  We parameterize this implementation based on implementation
+options, such as contention management and privatization safety.  The
+resulting template is saved in the `stm_algs` or `ptm_algs` folder.  Note
+that these implementations are not aware of the TM API.
+
+To create machine code that can be used by an application, we create a file
+in the `stm_instances` or `ptm_instances` folder.  These files are named by
+the algorithm they use, plus the templates with which they are instantiated.
+They also include all of the necessary parts from the `api` folder, so that a
+single .cc file produces a single .o file that completely implements the TM
+library.
+
+## Other important folders
+
+The `common` folder is for other code that is shared among TM implementations.
+
+The `include` folder is for headers that are required by more than one component
+of our system (e.g., libraries, plugin, benchmarks)
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/include/cm.h b/artifact/policies/xSTM/libs/include/cm.h
new file mode 100644
index 0000000..8c45c23
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/cm.h
@@ -0,0 +1,66 @@
+/// cm.h provides a set of contention managers that can be used by a TM
+/// implementation.  These contention managers all provide the same public
+/// interface, so that they are interchangeable in TM algorithms.
+
+#pragma once
+
+#include "platform.h"
+
+/// NoopCM is a contention manager that does no contention management.
+class NoopCM {
+public:
+  /// NoopCM::Globals is empty, but to simplify the use of this class as a
+  /// template parameter to TM algorithms, we need to define a Globals class
+  /// anyway.
+  class Globals {};
+
+  /// Construct a no-op contention manager
+  NoopCM() {}
+
+  /// CM code to run before beginning a transaction
+  /// @returns true if the transaction should become irrevocable
+  bool beforeBegin(Globals &) { return false; }
+
+  /// CM code to run after a transaction finishes cleaning up from an abort
+  void afterAbort(Globals &, uint64_t) {}
+
+  /// CM code to run after a transaction finishes cleaning up from a commit
+  void afterCommit(Globals &) {}
+};
+
+/// ExpBackoffCM is a contention manager that does randomized exponential
+/// backoff on abort
+///
+/// The backoff threshold can be tuned via MIN and MIX, which are the logarithms
+/// of the shortest and longest backoff times.  Backoff times are in # cycles,
+/// via the CPU tick counter.
+template <int MIN, int MAX> class ExpBackoffCM {
+public:
+  /// ExpBackoffCM::Globals is empty, but to simplify the use of this class as a
+  /// template parameter to TM algorithms, we need to define a Globals class
+  /// anyway.
+  class Globals {};
+
+private:
+  /// The number of consecutive aborts by the current thread
+  int consecAborts;
+
+  /// A seed to use for random number generation
+  unsigned seed;
+
+public:
+  /// Construct an exponential backoff contention manager
+  ExpBackoffCM() : consecAborts(0), seed((uintptr_t)(&consecAborts)) {}
+
+  /// CM code to run before beginning a transaction
+  /// @returns true if the transaction should become irrevocable
+  bool beforeBegin(Globals &) { return false; }
+
+  /// CM code to run after a transaction finishes cleaning up from an abort
+  void afterAbort(Globals &, uint64_t) {
+    exp_backoff(++consecAborts, seed, MIN, MAX);
+  }
+
+  /// CM code to run after a transaction finishes cleaning up from a commit
+  void afterCommit(Globals &) { consecAborts = 0; }
+};
diff --git a/artifact/policies/xSTM/libs/include/constants.h b/artifact/policies/xSTM/libs/include/constants.h
new file mode 100644
index 0000000..aaee3b8
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/constants.h
@@ -0,0 +1,28 @@
+/// constants.h provides some common values to use when instantiating TM
+/// algorithm templates.  An instantiation of a TM algorithm is free to ignore
+/// these constants and choose others.  These exist primarily to ensure that we
+/// have reasonable defaults that are shared across similar instantiations,
+/// without hard-coding constants into the instantiations themselves.
+
+#pragma once
+
+#include <cstdint>
+
+/// log_2 of the number of bytes protected by an orec
+const int OREC_COVERAGE = 5;
+
+/// Quiescence benefits from a limit on the number of threads.  4096 is safe
+const int MAX_THREADS = 4096;
+
+/// The number of orecs in the system
+const uint32_t NUM_STRIPES = 1048576;
+
+/// A low threshold for tuning backoff
+const uint32_t BACKOFF_MIN = 4;
+
+/// A high threshold for tuning backoff
+const uint32_t BACKOFF_MAX = 16;
+
+/// A threshold for the number of consecutive aborts before a transaction should
+/// become irrevocable.
+const uint32_t ABORTS_THRESHOLD = 100;
diff --git a/artifact/policies/xSTM/libs/include/epochs.h b/artifact/policies/xSTM/libs/include/epochs.h
new file mode 100644
index 0000000..726e8a0
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/epochs.h
@@ -0,0 +1,286 @@
+/// epochs.h provides a set of EpochManagers, which handles the assignment of
+/// unique IDs to threads, and also allow a TM to customize the way it handles
+/// two related tasks:
+///
+/// - The management of quiescence
+/// - The management of irrevocability
+///
+/// The foundation of the EpochManager is the Epoch Table (ET).  The ET has an
+/// atomic integer per thread.  Threads set their integers to the time at which
+/// they begin a transaction, and then clear (to -1) when they end a
+/// transaction.  The integers make it possible to observe both (a) if any
+/// thread is in a transaction, and (b) if any thread is executing with a stale
+/// view of memory.  These properties allow threads to know when certain actions
+/// (like freeing memory) can proceed.
+
+#pragma once
+
+#include <cstddef>
+#include <exception>
+
+#include "pad_word.h"
+
+/// BasicEpochManager is an Epoch manager that can assign unique IDs, but which
+/// does not support quiescence or irrevocability.  Since we use it as the base
+/// for all other EpochManagers, we place the Epoch table in it, even though it
+/// does not use the Epoch table.
+template <int MAXTHREADS> class BasicEpochManager {
+public:
+  /// Globals encapsulates the shared data that threads' EpochManagers will use
+  /// to coordinate.
+  class Globals {
+  public:
+    /// A monotonically increasing counter for assigning thread IDs
+    pad_dword_t idGenerator;
+
+    /// The epoch table for tracking if threads are in a transaction or not.
+    pad_word_t epochs[MAXTHREADS];
+
+    /// Construct a BasicEpochManager::Globals by zeroing the generator and
+    /// initializing the epoch table to all -1s
+    Globals() {
+      idGenerator.val = 0;
+      for (int i = 0; i < MAXTHREADS; ++i) {
+        epochs[i].val = -1;
+      }
+    }
+
+    /// Return the total number of threads in the system
+    uintptr_t getThreads() { return idGenerator.val; }
+  };
+
+  /// The unique Id of the thread to which this EpochManager belongs
+  size_t id;
+
+  /// Construct a thread's instance of the BasicEpochManager by giving the
+  /// thread a unique id.
+  BasicEpochManager(Globals &g) : id(g.idGenerator.val++) {
+    if (id >= MAXTHREADS) {
+      std::terminate();
+    }
+  }
+
+  /// Return whether the thread is irrevocable or not
+  bool isIrrevoc() { return false; }
+
+  /// Clearing a thread's value in the epoch table is a no-op
+  void clearEpoch(Globals &) {}
+
+  /// Setting a thread's value in the epoch table is a no-op
+  void setEpoch(Globals &, uintptr_t) {}
+
+  /// When a thread starts, we need not take any action, because there is no
+  /// irrevocability or quiescence
+  void onBegin(Globals &, uintptr_t) {}
+
+  /// A thread should never commit as irrevocable with this EpochManager
+  void onCommitIrrevoc(Globals &) { std::terminate(); }
+
+  /// Quiescing is a no-op for this EpochManager
+  void quiesce(Globals &, uintptr_t) {}
+
+  /// Returns false, since there are never irrevocable threads
+  bool existIrrevoc(Globals &) { return false; }
+
+  /// Attempts to become irrevocable will always fail.  It is up to the TM to
+  /// decide how to respond.
+  bool tryIrrevoc(Globals &) { return false; }
+};
+
+/// IrrevocQuiesceEpochManager is an Epoch manager that supports both
+/// irrevocability and quiescence
+template <int MAXTHREADS>
+class IrrevocQuiesceEpochManager : public BasicEpochManager<MAXTHREADS> {
+public:
+  /// IrrevocEpochManager adds an irrevocability token to the
+  /// BasicEpochManager's Globals
+  class Globals : public BasicEpochManager<MAXTHREADS>::Globals {
+  public:
+    /// A token that can be assigned to one thread at a time
+    pad_dword_t token;
+
+    /// Construct an IrrevocEpochManager::Globals by zeroing the token and
+    /// forwarding to the base constructor
+    Globals() : BasicEpochManager<MAXTHREADS>::Globals() { token.val = 0; }
+  };
+
+private:
+  /// hasToken tracks if the current thread owns Globals::token
+  bool hasToken;
+
+public:
+  /// Construct a thread's instance of the BasicEpochManager by clearing its
+  /// hasToken field and forwarding to the base constructor.
+  IrrevocQuiesceEpochManager(Globals &g)
+      : BasicEpochManager<MAXTHREADS>(g), hasToken(false) {}
+
+  /// Return whether the thread is irrevocable or not
+  bool isIrrevoc() { return hasToken; }
+
+  /// Clear a thread's value in the epoch table
+  void clearEpoch(Globals &g) { g.epochs[this->id].val = -1LL; }
+
+  /// Set a thread's value in the epoch table
+  void setEpoch(Globals &g, uintptr_t time) { g.epochs[this->id].val = time; }
+
+  /// When a thread starts, block it until there are no irrevocable
+  /// transactions, and also update the epoch table
+  void onBegin(Globals &g, uintptr_t time) {
+    while (true) {
+      setEpoch(g, time);
+      if (!g.token.val) {
+        break;
+      }
+      clearEpoch(g);
+      while (g.token.val)
+        ;
+    }
+  }
+
+  /// When a thread commits as irrevocable, reset it in the epoch table and
+  /// release the token
+  ///
+  /// NB: this can also be used if a successful tryIrrevoc() is followed by an
+  ///     unsuccessful validation during a thread's attempt to transition to
+  ///     irrevocable.
+  void onCommitIrrevoc(Globals &g) {
+    clearEpoch(g);
+    g.token.val = 0;
+    hasToken = false;
+  }
+
+  /// Check if there exists an irrevocable thread
+  bool existIrrevoc(Globals &g) { return g.token.val; }
+
+  /// Try to become irrevocable, but possibly fail
+  bool tryIrrevoc(Globals &g) {
+    // If we are already irrevocable, succeed immediately
+    if (hasToken) {
+      return true;
+    }
+    // Attempt to get the token
+    uint64_t oldval = 0;
+    if (g.token.val || !g.token.val.compare_exchange_strong(oldval, 1)) {
+      return false;
+    }
+    // Wait on all threads to exit transactions
+    uintptr_t count = g.idGenerator.val;
+    for (uintptr_t i = 0; i < count; ++i) {
+      if (i != this->id) { // don't wait on self :)
+        while (g.epochs[i].val != (uintptr_t)-1)
+          ;
+      }
+    }
+    // mark self irrevocable
+    hasToken = true;
+    return true;
+  }
+
+  /// Wait for all threads to update their entries in the epoch table to be
+  /// greater than the provided time.  Note that the table holds unsigned
+  /// integers, so -1 is big.
+  void quiesce(Globals &g, uintptr_t time) {
+    uintptr_t count = g.idGenerator.val;
+    for (uintptr_t i = 0; i < count; ++i) {
+      if (i != this->id) { // don't wait on self :)
+        while (g.epochs[i].val < time)
+          ;
+      }
+    }
+  }
+};
+
+/// CCSTMEpochManager is an Epoch manager specifically for CCSTM.  The main
+/// problem it addresses is that exoTM is opaque, but exoTM has the `start_time`
+/// that serves as the epoch flag.  Thus we can't have an array of epoch flags,
+/// instead we need a list.
+///
+/// CCSTMEpochManager does not bother with thread Ids, but it does support
+/// quiescence and irrevocability.
+///
+/// @param DESCRIPTOR The thread descriptor type
+/// @param EXOTM      The ExoTM implementation
+/// @param QUIESCE    True to enable quiescence, false otherwise
+template <class DESCRIPTOR, class EXOTM, bool QUIESCE>
+struct CCSTMEpochManager {
+  /// Globals related to CCSTMEpochManager
+  struct Globals {
+    std::atomic<DESCRIPTOR *> all_threads; // List of threads
+    pad_dword_t token;                     // Irrevocability token
+
+    /// Construct the Globals object
+    Globals() : all_threads(nullptr) { token.val = 0; }
+  };
+
+  bool hasToken = false;      // Per-thread: tracks if the thread is irrevoc
+  DESCRIPTOR *next = nullptr; // Pointer to next thread in list
+
+  /// Construct a thread's Epoch context by atomically adding its descriptor to
+  /// the list.
+  CCSTMEpochManager(DESCRIPTOR *me, Globals &globals) {
+    while (true) {
+      auto curr_head = globals.all_threads.load();
+      next = curr_head;
+      if (globals.all_threads.compare_exchange_strong(curr_head, me))
+        break;
+    }
+  }
+
+  /// Return whether the thread is irrevocable or not
+  bool isIrrevoc() { return hasToken; }
+
+  /// When a thread commits as irrevocable, reset it in the epoch table and
+  /// release the token
+  ///
+  /// NB: this can also be used if a successful tryIrrevoc() is followed by an
+  ///     unsuccessful validation during a thread's attempt to transition to
+  ///     irrevocable.
+  void onCommitIrrevoc(Globals &g) {
+    g.token.val = 0;
+    hasToken = false;
+  }
+
+  /// Check if there exists an irrevocable thread
+  bool existIrrevoc(Globals &g) { return g.token.val; }
+
+  /// Try to become irrevocable, but possibly fail
+  bool tryIrrevoc(Globals &g, DESCRIPTOR *me) {
+    // If we are already irrevocable, succeed immediately
+    if (hasToken) {
+      return true;
+    }
+    // Attempt to get the token
+    uint64_t oldval = 0;
+    if (g.token.val || !g.token.val.compare_exchange_strong(oldval, 1)) {
+      return false;
+    }
+    // Wait on all threads to exit transactions
+    auto curr = g.all_threads.load();
+    while (curr != nullptr) {
+      if (curr != me) {
+        while (curr->exo.get_start_time() != EXOTM::END_OF_TIME)
+          ;
+      }
+      curr = curr->epoch.next;
+    }
+
+    // mark self irrevocable
+    hasToken = true;
+    return true;
+  }
+
+  /// Wait for all threads to update their entries in the epoch table to be
+  /// greater than the provided time
+  void quiesce(uintptr_t time, DESCRIPTOR *me, Globals &globals) {
+    if (!QUIESCE)
+      return;
+    auto curr = globals.all_threads.load();
+    while (curr != nullptr) {
+      if (curr != me) {
+        while (curr->exo.get_start_time() < time)
+          ;
+      }
+      curr = curr->epoch.next;
+    }
+  }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/include/orec_t.h b/artifact/policies/xSTM/libs/include/orec_t.h
new file mode 100644
index 0000000..18c2833
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/orec_t.h
@@ -0,0 +1,74 @@
+/// orec_t.h defines the ownership record type used by many STM algorithms.  It
+/// also provides a table of ownership records, templated on the time source.
+
+#pragma once
+
+#include <atomic>
+
+/// local_orec_t is a way of looking at a 64-bit int and telling if it indicates
+/// a lock or a version number.  Set 'all' by copying from an atomic<uintptr_t>
+///
+/// Warning: this is only good for little endian (e.g., x86)
+union local_orec_t {
+  struct {
+    uintptr_t id : (8 * sizeof(uintptr_t)) - 1;
+    uintptr_t lock : 1;
+  } fields;
+  uintptr_t all;
+};
+
+/// The ownership record type is used by orec-based STMs to protect regions of
+/// memory.
+struct orec_t {
+  /// curr is either a lock word, or a version number
+  std::atomic<uintptr_t> curr;
+
+  /// When a thread acquires the orec, it can store the previous version number
+  /// in prev, so that it can use it to clean up in the case of aborts.
+  uintptr_t prev;
+};
+
+/// OrecTable is a table of orecs, and a timestamp.  It is useful in TM
+/// algorithms that use a global "clock" to determine the non-locked values of
+/// orecs.
+template <int NUM_ORECS, int COVERAGE, class TIMESOURCE> class OrecTable {
+  /// The orec table
+  orec_t orecs[NUM_ORECS];
+
+public:
+  typedef typename TIMESOURCE::time_snapshot_t time_snapshot_t;
+
+  /// The global timestamp, for assigning commit orders and reducing
+  /// validation
+  TIMESOURCE timestamp;
+
+  /// Map addresses to orec table entries
+  orec_t *get(void *addr) {
+    return &orecs[(reinterpret_cast<uintptr_t>(addr) >> COVERAGE) % NUM_ORECS];
+  }
+
+  /// Get the current value of the clock
+  uintptr_t get_time() { return timestamp.get_time(); }
+
+  /// Get the current value of the clock.  This version implies some stronger
+  /// fencing behavior than the regular get_time.
+  uintptr_t get_time_strong_ordering() {
+    return timestamp.get_time_strong_ordering();
+  }
+
+  /// Increment the clock, and return the value it was incremented *to*
+  uint64_t increment_get() { return timestamp.increment_get(); }
+
+  /// Increment the clock, and ignore the new value.  This is useful when doing
+  /// abort-time bumping in undo-based STM.
+  void increment() { timestamp.increment(); }
+
+  /// Create a locked orec from an id, so that a thread knows what value to use
+  /// as its lock word
+  static uintptr_t make_lockword(int id) {
+    local_orec_t tmp;
+    tmp.all = id;
+    tmp.fields.lock = 1;
+    return tmp.all;
+  }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/include/pad_word.h b/artifact/policies/xSTM/libs/include/pad_word.h
new file mode 100644
index 0000000..82e701a
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/pad_word.h
@@ -0,0 +1,32 @@
+/// pad_word.h defines padded 64-bit atomic ints.  It is useful for ensuring
+/// that synchronization metadata lives on separate cache lines.
+
+#pragma once
+
+#include <atomic>
+
+#include "platform.h"
+
+/// pad_word_t is an atomic uintptr_t that is padded out to a cache line
+struct pad_word_t {
+  /// The value
+  std::atomic<uintptr_t> val;
+
+  /// Some padding
+  char pad[CACHELINE_BYTES - sizeof(uintptr_t)];
+
+  /// Construct by zeroing the value
+  pad_word_t() { val = 0; }
+};
+
+/// pad_dword_t is an atomic uintptr_t that is padded out to two cache lines
+struct pad_dword_t {
+  /// The value
+  std::atomic<uintptr_t> val;
+
+  /// Some padding
+  char pad[2 * CACHELINE_BYTES - sizeof(uintptr_t)];
+
+  /// Construct by zeroing the value
+  pad_dword_t() { val = 0; }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/include/platform.h b/artifact/policies/xSTM/libs/include/platform.h
new file mode 100644
index 0000000..d73d2c3
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/platform.h
@@ -0,0 +1,82 @@
+/// platform.h provides some platform details and platform-specific functions,
+/// so that we can avoid inline assembly or platform-specific code in our
+/// implementations
+
+#pragma once
+
+#include <cstdint>
+#include <ctime>
+
+/// A constant to help us with padding things to a cache line.
+const int CACHELINE_BYTES = 64;
+
+/// Yield the CPU
+inline void yield_cpu() { sched_yield(); }
+
+/// The cheapest Linux clock with good enough resolution to manage backoff
+inline uint64_t getElapsedTime() {
+  struct timespec t;
+  clock_gettime(CLOCK_REALTIME, &t);
+  return (((long long)t.tv_sec) * 1000000000L) + ((long long)t.tv_nsec);
+}
+
+/// Spin briefly
+void spin64() {
+  for (int i = 0; i < 64; ++i)
+    __asm__ volatile("nop");
+}
+
+/// Spin for a user-specified number of instructions
+void spinX(int x) {
+  for (int i = 0; i < x; ++i)
+    __asm__ volatile("nop");
+}
+
+/// use rdtscp for high-precision tick counter with pipeline stall
+inline uint64_t tickp() {
+  uint32_t tmp[2];
+  asm volatile("rdtscp" : "=a"(tmp[1]), "=d"(tmp[0]) : "c"(0x10) : "memory");
+  return (((uint64_t)tmp[0]) << 32) | tmp[1];
+}
+
+/// Produce a random number using a simple PRNG
+inline int rand_r_32(unsigned int *seed) {
+  unsigned int next = *seed;
+  int result;
+
+  next *= 1103515245;
+  next += 12345;
+  result = (unsigned int)(next / 65536) % 2048;
+
+  next *= 1103515245;
+  next += 12345;
+  result <<= 10;
+  result ^= (unsigned int)(next / 65536) % 1024;
+
+  next *= 1103515245;
+  next += 12345;
+  result <<= 10;
+  result ^= (unsigned int)(next / 65536) % 1024;
+
+  *seed = next;
+  return result;
+}
+
+/// Perform randomized exponential backoff.  We wait for a random number of CPU
+/// ticks, so that preemption during backoff is handled cleanly
+void exp_backoff(uint32_t consec_aborts, uint32_t &seed, uint32_t MIN,
+                 uint32_t MAX) {
+  // how many bits should we use to pick an amount of time to wait?
+  uint32_t bits = consec_aborts + MIN - 1;
+  bits = (bits > MAX) ? MAX : bits;
+  // get a random amount of time to wait, bounded by an exponentially
+  // increasing limit
+  int32_t delay = rand_r_32(&seed);
+  delay &= ((1 << bits) - 1);
+  // wait until at least that many ns have passed
+  unsigned long long start = tickp();
+  unsigned long long stop_at = start + delay;
+  while (tickp() < stop_at) {
+    spin64();
+  }
+}
diff --git a/artifact/policies/xSTM/libs/include/timesource.h b/artifact/policies/xSTM/libs/include/timesource.h
new file mode 100644
index 0000000..03d42e1
--- /dev/null
+++ b/artifact/policies/xSTM/libs/include/timesource.h
@@ -0,0 +1,55 @@
+/// timesource.h provides RDTSCP and GV1 clocks, for assigning start and commit
+/// times to transactions.
+
+#pragma once
+
+#include <atomic>
+#include <x86intrin.h>
+
+/// CounterTimesource uses a monotonically increasing shared memory counter as
+/// the timesource
+class CounterTimesource {
+  /// The value of the global clock when this transaction started/validated
+  pad_dword_t timestamp;
+
+public:
+  typedef uint64_t time_snapshot_t;
+
+  /// Runs the orec_t.h implemntation to get the current time
+  uint64_t get_time() { return timestamp.val; }
+
+  /// get_time_strong_ordering is the same for CounterTimesource
+  uint64_t get_time_strong_ordering() { return timestamp.val; }
+
+  /// Runs the orec_t.h implementation of increment_get()
+  uint64_t increment_get() { return 1 + timestamp.val.fetch_add(1); }
+
+  /// Increment the clock, and ignore the new value.  This is useful when doing
+  /// abort-time bumping in undo-based STM.
+  void increment() { timestamp.val++; }
+};
+
+/// RdtscpTimesource uses the hardware clock cycle counter with rdtscp as the
+/// timesource
+class RdtscpTimesource {
+
+public:
+  typedef std::atomic<uint64_t> time_snapshot_t;
+
+  /// Use rdtscp to get the hardware clock cycle count
+  uint64_t get_time() {
+    unsigned int dummy;
+    return __rdtscp(&dummy);
+  }
+
+  /// Use rdtscp to get the hardware clock cycle count, but enforces strong
+  /// ordering with an atomic add
+  uint64_t get_time_strong_ordering() { return get_time(); }
+
+  /// Not needed for RdtscpTimesource. Instead we do the same thing as
+  /// get_time()
+  uint64_t increment_get() { return get_time_strong_ordering(); }
+
+  /// No-op for RdtscpTimesource
+  void increment() {}
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_algs/exo_eager_c1.h b/artifact/policies/xSTM/libs/stm_algs/exo_eager_c1.h
new file mode 100644
index 0000000..a0cce40
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/exo_eager_c1.h
@@ -0,0 +1,304 @@
+/// An xSTM algorithm built from exotm, with undo logging and check-once orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../exoTM/exotm.h"
+#include "../../../include/minivector.h"
+#include "../../../include/undolog.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/stackframe.h"
+
+/// ExoEagerC1 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Encounter-time locking with undo
+///
+/// @param QUIESCE true for quiescence, false if transactions don't quiesce
+/// @param CM      a contention manager, invoked only at begin/commit/abort
+template <bool QUIESCE, class CM> class ExoEagerC1 {
+  /// The type of the Epoch table
+  ///
+  /// NB: There's a very close interaction with the Epoch table.  Since we want
+  ///     to share it among the xSTM::exo algorithms, we need a `friend`.
+  using Epoch = CCSTMEpochManager<ExoEagerC1, exotm_t, QUIESCE>;
+  friend Epoch;
+
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    static const int NUM_ORECS = 1048576; // The number of orecs to use
+    exotm_t::orec_t orecs[NUM_ORECS];     // The table of orecs
+    typename CM::Globals cm;              // Global Contention Management info
+    typename Epoch::Globals epoch;        // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  exotm_t exo;                      // Per-thread ExoTM metadata
+  Epoch epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  minivector<int> readset;          // Orecs to validate
+  undolog_t undolog;                // An undo log, for undoing writes on abort
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  ExoEagerC1() : epoch(this, globals.epoch), cm() {}
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  Then make sure there
+      // isn't an irrevocable transaction.
+      while (true) {
+        exo.wo_begin();
+        if (!globals.epoch.token.val)
+          break;
+        exo.ro_end(); // Just enough to exit the epoch, nothing more :)
+        while (globals.epoch.token.val)
+          ;
+      }
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        // NB: we stay in the epoch until the transaction is done
+        epoch.onCommitIrrevoc(globals.epoch);
+        exo.wo_end(); // because there may be locks to release
+        // Do the remaining clean-up
+        // NB: We reset most lists when becoming irrevocable
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (undolog.size() == 0) {
+        // To quiesce, we need to wait for anyone who started *before* we
+        // started, since we linearized at start time.
+        auto end_time = exo.get_start_time();
+        exo.ro_end();
+        // NB: CM before quiesce, in case CM needs to unblock others
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(end_time, this, globals.epoch);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: we have all locks, so just validate
+      for (auto o : readset)
+        if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME)
+          abortTx();
+
+      // release locks and exit epoch table
+      exo.wo_end();
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(exo.get_last_wo_end_time(), this, globals.epoch);
+      undolog.clear();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Use a simple hash to transform an address into the index of an orec
+  int get_orec_index(void *addr) {
+    return (reinterpret_cast<uintptr_t>(addr) >> OREC_COVERAGE) %
+           globals.NUM_ORECS;
+  }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // get the orec address, then start a loop to read a consistent value
+    int o = get_orec_index(addr);
+    while (true) {
+      // read the location, then orec
+      T from_mem = undolog_t::safe_read(addr);
+
+      // If validation passes, then we can log it and return
+      bool locked = false;
+      if (exo.check_orec(&globals.orecs[o], locked) != exotm_t::END_OF_TIME) {
+        if (!locked)
+          readset.push_back(o);
+        return from_mem;
+      }
+
+      // abort if locked
+      if (locked)
+        abortTx();
+
+      // Extend the validity range, then try again
+      auto old_start = exo.get_start_time();
+      exo.wo_begin();
+      validate(old_start);
+    }
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+
+    // get the orec address, then start a loop to acquire consistently
+    int o = get_orec_index(addr);
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      bool locked = false;
+      if (exo.acquire_consistent(&globals.orecs[o], locked)) {
+        // Add old value to undo log, update memory, and return
+        typename undolog_t::undo_t u;
+        u.initFromAddr(addr);
+        undolog.push_back(u);
+        undolog_t::safe_write(addr, val);
+        return;
+      }
+
+      // abort if locked
+      if (locked)
+        abortTx();
+
+      // Extend the validity range, then try again
+      auto old_start = exo.get_start_time();
+      exo.wo_begin();
+      validate(old_start);
+    }
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // Get the token and quiesce, or else abort
+    if (!epoch.tryIrrevoc(globals.epoch, this))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // clear lists
+    allocator.onCommit();
+    readset.clear();
+    undolog.clear();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than the given time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate(uint64_t time) {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      bool mine = false;
+      bool ok = exo.check_continuation(&globals.orecs[o], time, mine);
+      if (!ok && !mine)
+        abortTx();
+    }
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // undo any writes
+    undolog.undo_writes();
+
+    // Exit the Epoch and CM, so other threads don't have to wait on this thread
+    //
+    // NB: has to be silent store, because of undo and check-once orecs
+    exo.wo_end();
+    cm.afterAbort(globals.cm, 0);
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    undolog.clear();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+};
diff --git a/artifact/policies/xSTM/libs/stm_algs/exo_eager_c2.h b/artifact/policies/xSTM/libs/stm_algs/exo_eager_c2.h
new file mode 100644
index 0000000..78e34cd
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/exo_eager_c2.h
@@ -0,0 +1,307 @@
+/// An xSTM algorithm built from exotm, with undo logging and check-twice orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../exoTM/exotm.h"
+#include "../../../include/minivector.h"
+#include "../../../include/undolog.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/stackframe.h"
+
+/// ExoEagerC1 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Encounter-time locking with undo
+///
+/// @param QUIESCE true for quiescence, false if transactions don't quiesce
+/// @param CM      a contention manager, invoked only at begin/commit/abort
+template <bool QUIESCE, class CM> class ExoEagerC2 {
+  /// The type of the Epoch table
+  ///
+  /// NB: There's a very close interaction with the Epoch table.  Since we want
+  ///     to share it among the xSTM::exo algorithms, we need a `friend`.
+  using Epoch = CCSTMEpochManager<ExoEagerC2, exotm_t, QUIESCE>;
+  friend Epoch;
+
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    static const int NUM_ORECS = 1048576; // The number of orecs to use
+    exotm_t::orec_t orecs[NUM_ORECS];     // The table of orecs
+    typename CM::Globals cm;              // Global Contention Management info
+    typename Epoch::Globals epoch;        // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  exotm_t exo;                      // Per-thread ExoTM metadata
+  Epoch epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  minivector<int> readset;          // Orecs to validate
+  undolog_t undolog;                // An undo log, for undoing writes on abort
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  ExoEagerC2() : epoch(this, globals.epoch), cm() {}
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  Then make sure there
+      // isn't an irrevocable transaction.
+      while (true) {
+        exo.wo_begin();
+        if (!globals.epoch.token.val)
+          break;
+        exo.ro_end(); // Just enough to exit the epoch, nothing more :)
+        while (globals.epoch.token.val)
+          ;
+      }
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        // NB: we stay in the epoch until the transaction is done
+        epoch.onCommitIrrevoc(globals.epoch);
+        exo.wo_end(); // because there may be locks to release
+        // Do the remaining clean-up
+        // NB: We reset most lists when becoming irrevocable
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (undolog.size() == 0) {
+        // To quiesce, we need to wait for anyone who started *before* we
+        // started, since we linearized at start time.
+        auto end_time = exo.get_start_time();
+        exo.ro_end();
+        // NB: CM before quiesce, in case CM needs to unblock others
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(end_time, this, globals.epoch);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: we have all locks, so just validate
+      for (auto o : readset)
+        if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME)
+          abortTx();
+
+      // release locks and exit epoch table
+      exo.wo_end();
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(exo.get_last_wo_end_time(), this, globals.epoch);
+      undolog.clear();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Use a simple hash to transform an address into the index of an orec
+  int get_orec_index(void *addr) {
+    return (reinterpret_cast<uintptr_t>(addr) >> OREC_COVERAGE) %
+           globals.NUM_ORECS;
+  }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // get the orec address, then start a loop to read a consistent value
+    int o = get_orec_index(addr);
+    while (true) {
+      // Pre-check the orec, and record if it's locked
+      bool locked = false;
+      auto pre = exo.check_orec(&globals.orecs[o], locked);
+      // read the location, then orec
+      T from_mem = undolog_t::safe_read(addr);
+      if (locked && pre != exotm_t::END_OF_TIME)
+        return from_mem; // owned by me: don't need another check
+      auto post = exo.check_orec(&globals.orecs[o]);
+      // If validation passes, then we can log it and return
+      if (pre == post && pre != exotm_t::END_OF_TIME) {
+        readset.push_back(o);
+        return from_mem;
+      }
+
+      // abort if locked
+      if (locked)
+        abortTx();
+
+      // Extend the validity range, then try again
+      auto old_start = exo.get_start_time();
+      exo.wo_begin();
+      validate(old_start);
+    }
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+
+    // get the orec address, then start a loop to acquire consistently
+    int o = get_orec_index(addr);
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      bool locked = false;
+      if (exo.acquire_consistent(&globals.orecs[o], locked)) {
+        // Add old value to undo log, update memory, and return
+        typename undolog_t::undo_t u;
+        u.initFromAddr(addr);
+        undolog.push_back(u);
+        undolog_t::safe_write(addr, val);
+        return;
+      }
+
+      // abort if locked
+      if (locked)
+        abortTx();
+
+      // Extend the validity range, then try again
+      auto old_start = exo.get_start_time();
+      exo.wo_begin();
+      validate(old_start);
+    }
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // Get the token and quiesce, or else abort
+    if (!epoch.tryIrrevoc(globals.epoch, this))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // clear lists
+    allocator.onCommit();
+    readset.clear();
+    undolog.clear();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than the given time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate(uint64_t time) {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      bool mine = false;
+      bool ok = exo.check_continuation(&globals.orecs[o], time, mine);
+      if (!ok && !mine)
+        abortTx();
+    }
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // undo any writes
+    undolog.undo_writes();
+
+    // Exit the Epoch and CM, so other threads don't have to wait on this thread
+    //
+    // NB: eager + check-twice orecs --> need to bump the orecs
+    exo.unwind(exotm_t::BUMP_ORECS);
+    cm.afterAbort(globals.cm, 0);
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    undolog.clear();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+};
diff --git a/artifact/policies/xSTM/libs/stm_algs/exo_lazy_c1.h b/artifact/policies/xSTM/libs/stm_algs/exo_lazy_c1.h
new file mode 100644
index 0000000..f621f06
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/exo_lazy_c1.h
@@ -0,0 +1,304 @@
+/// An xSTM algorithm built from exotm, with commit-time locking and check-once
+/// orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../exoTM/exotm.h"
+#include "../../../include/minivector.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/redolog.h"
+#include "include/stackframe.h"
+
+/// ExoLazyC1 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-once orecs
+/// - Commit-time locking with redo
+///
+/// @param QUIESCE true for quiescence, false if transactions don't quiesce
+/// @param CM      a contention manager, invoked only at begin/commit/abort
+template <bool QUIESCE, class CM> class ExoLazyC1 {
+  /// The type of the Epoch table
+  ///
+  /// NB: There's a very close interaction with the Epoch table.  Since we want
+  ///     to share it among the xSTM::exo algorithms, we need a `friend`.
+  using Epoch = CCSTMEpochManager<ExoLazyC1, exotm_t, QUIESCE>;
+  friend Epoch;
+
+  /// The type of the redo log
+  using REDOLOG = redolog_t<1 << OREC_COVERAGE>;
+
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    static const int NUM_ORECS = 1048576; // The number of orecs to use
+    exotm_t::orec_t orecs[NUM_ORECS];     // The table of orecs
+    typename CM::Globals cm;              // Global Contention Management info
+    typename Epoch::Globals epoch;        // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  exotm_t exo;                      // Per-thread ExoTM metadata
+  Epoch epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  minivector<int> readset;          // Orecs to validate
+  REDOLOG redolog;                  // A redo log, for redoing writes at commit
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  ExoLazyC1() : epoch(this, globals.epoch), cm() {}
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  Then make sure there
+      // isn't an irrevocable transaction.
+      while (true) {
+        exo.wo_begin();
+        if (!globals.epoch.token.val)
+          break;
+        exo.ro_end(); // Just enough to exit the epoch, nothing more :)
+        while (globals.epoch.token.val)
+          ;
+      }
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        // NB: we stay in the epoch until the transaction is done
+        epoch.onCommitIrrevoc(globals.epoch);
+        exo.ro_end();
+        // Do the remaining clean-up
+        // NB: We reset most lists when becoming irrevocable
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (redolog.size() == 0) {
+        // To quiesce, we need to wait for anyone who started *before* we
+        // started, since we linearized at start time.
+        auto end_time = exo.get_start_time();
+        exo.ro_end();
+        // NB: CM before quiesce, in case CM needs to unblock others
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(end_time, this, globals.epoch);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: acquire locks, then validate
+      size_t entries = redolog.size();
+      for (size_t i = 0; i < entries; ++i)
+        if (!exo.acquire_consistent(
+                &globals.orecs[get_orec_index(redolog.get_address(i))]))
+          abortTx();
+      for (auto o : readset)
+        if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME)
+          abortTx();
+
+      // replay redo log, then release locks and exit epoch table
+      redolog.writeback();
+      exo.wo_end();
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(exo.get_last_wo_end_time(), this, globals.epoch);
+      redolog.reset();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Use a simple hash to transform an address into the index of an orec
+  int get_orec_index(void *addr) {
+    return (reinterpret_cast<uintptr_t>(addr) >> OREC_COVERAGE) %
+           globals.NUM_ORECS;
+  }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // Lookup in redo log to populate ret.  Note that prior casting can lead to
+    // ret having only some bytes properly set
+    T ret;
+    int found_mask = redolog.find(addr, ret);
+    // If we found all the bytes in the redo log, then it's easy
+    int desired_mask = (1UL << sizeof(T)) - 1;
+    if (desired_mask == found_mask)
+      return ret;
+
+    // get the orec address, then start a loop to read a consistent value
+    int o = get_orec_index(addr);
+    T from_mem;
+    while (true) {
+      // read the location, then orec
+      from_mem = REDOLOG::safe_read(addr);
+
+      // If validation passes, then we can log it and reconstruct it
+      bool locked = false;
+      if (exo.check_orec(&globals.orecs[o], locked) != exotm_t::END_OF_TIME) {
+        readset.push_back(o);
+        break;
+      }
+
+      // wait if locked
+      while (locked)
+        exo.check_orec(&globals.orecs[o], locked);
+
+      // Extend the validity range, then try again
+      auto old_start = exo.get_start_time();
+      exo.wo_begin();
+      validate(old_start);
+    }
+
+    // If redolog was a partial hit, reconstruction is needed
+    if (!found_mask)
+      return from_mem;
+    REDOLOG::reconstruct(from_mem, ret, found_mask);
+    return ret;
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+    // Otherwise, put it in the redo log
+    redolog.insert(addr, val);
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // Get the token and quiesce, or else abort
+    if (!epoch.tryIrrevoc(globals.epoch, this))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // replay redo log, then clear lists
+    redolog.writeback();
+    allocator.onCommit();
+    readset.clear();
+    redolog.reset();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than the given time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate(uint64_t time) {
+    // The common case is "no abort", so we don't put the branches inside the
+    // loop.  If we end up aborting, the extra orec checks are kind of like
+    // backoff.
+    bool good = true;
+    for (auto o : readset)
+      good &= exo.check_continuation(&globals.orecs[o], time);
+    if (!good)
+      abortTx();
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // Exit the Epoch and CM, so other threads don't have to wait on this thread
+    exo.unwind();
+    cm.afterAbort(globals.cm, 0);
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    redolog.reset();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+};
diff --git a/artifact/policies/xSTM/libs/stm_algs/exo_lazy_c2.h b/artifact/policies/xSTM/libs/stm_algs/exo_lazy_c2.h
new file mode 100644
index 0000000..76488ee
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/exo_lazy_c2.h
@@ -0,0 +1,305 @@
+/// An xSTM algorithm built from exotm, with commit-time locking and check-twice
+/// orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../exoTM/exotm.h"
+#include "../../../include/minivector.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/redolog.h"
+#include "include/stackframe.h"
+
+/// ExoLazyC2 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses ExoTM for orecs and rdtsc clock
+/// - Check-twice orecs
+/// - Commit-time locking with redo
+///
+/// @param QUIESCE true for quiescence, false if transactions don't quiesce
+/// @param CM      a contention manager, invoked only at begin/commit/abort
+template <bool QUIESCE, class CM> class ExoLazyC2 {
+  /// The type of the Epoch table
+  ///
+  /// NB: There's a very close interaction with the Epoch table.  Since we want
+  ///     to share it among the xSTM::exo algorithms, we need a `friend`.
+  using Epoch = CCSTMEpochManager<ExoLazyC2, exotm_t, QUIESCE>;
+  friend Epoch;
+
+  /// The type of the redo log
+  using REDOLOG = redolog_t<1 << (OREC_COVERAGE)>;
+
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    static const int NUM_ORECS = 1048576; // The number of orecs to use
+    exotm_t::orec_t orecs[NUM_ORECS];     // The table of orecs
+    typename CM::Globals cm;              // Global Contention Management info
+    typename Epoch::Globals epoch;        // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  exotm_t exo;                      // Per-thread ExoTM metadata
+  Epoch epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  minivector<int> readset;          // Orecs to validate
+  REDOLOG redolog;                  // A redo log, for redoing writes at commit
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  ExoLazyC2() : epoch(this, globals.epoch), cm() {}
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  Then make sure there
+      // isn't an irrevocable transaction.
+      while (true) {
+        exo.wo_begin();
+        if (!globals.epoch.token.val)
+          break;
+        exo.ro_end(); // Just enough to exit the epoch, nothing more :)
+        while (globals.epoch.token.val)
+          ;
+      }
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        // NB: we stay in the epoch until the transaction is done
+        epoch.onCommitIrrevoc(globals.epoch);
+        exo.ro_end();
+        // Do the remaining clean-up
+        // NB: We reset most lists when becoming irrevocable
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (redolog.size() == 0) {
+        // To quiesce, we need to wait for anyone who started *before* we
+        // started, since we linearized at start time.
+        auto end_time = exo.get_start_time();
+        exo.ro_end();
+        // NB: CM before quiesce, in case CM needs to unblock others
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(end_time, this, globals.epoch);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: acquire locks, then validate
+      size_t entries = redolog.size();
+      for (size_t i = 0; i < entries; ++i)
+        if (!exo.acquire_consistent(
+                &globals.orecs[get_orec_index(redolog.get_address(i))]))
+          abortTx();
+      for (auto o : readset)
+        if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME)
+          abortTx();
+
+      // replay redo log, then release locks and exit epoch table
+      redolog.writeback();
+      exo.wo_end();
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(exo.get_last_wo_end_time(), this, globals.epoch);
+      redolog.reset();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Use a simple hash to transform an address into the index of an orec
+  int get_orec_index(void *addr) {
+    return (reinterpret_cast<uintptr_t>(addr) >> OREC_COVERAGE) %
+           globals.NUM_ORECS;
+  }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // Lookup in redo log to populate ret.  Note that prior casting can lead to
+    // ret having only some bytes properly set
+    T ret;
+    int found_mask = redolog.find(addr, ret);
+    // If we found all the bytes in the redo log, then it's easy
+    int desired_mask = (1UL << sizeof(T)) - 1;
+    if (desired_mask == found_mask)
+      return ret;
+
+    // get the orec address, then start a loop to read a consistent value
+    int o = get_orec_index(addr);
+    T from_mem;
+    while (true) {
+      // Pre-check the orec, and record if it's locked (can't be by this txn)
+      bool locked = false;
+      auto pre = exo.check_orec(&globals.orecs[o], locked);
+      // read the location, then orec
+      from_mem = REDOLOG::safe_read(addr);
+      auto post = exo.check_orec(&globals.orecs[o]);
+      if (pre == post && pre != exotm_t::END_OF_TIME) {
+        readset.push_back(o);
+        break;
+      }
+
+      // wait if locked
+      while (locked)
+        exo.check_orec(&globals.orecs[o], locked);
+
+      // Extend the validity range, then try again
+      auto old_start = exo.get_start_time();
+      exo.wo_begin();
+      validate(old_start);
+    }
+
+    // If redolog was a partial hit, reconstruction is needed
+    if (!found_mask)
+      return from_mem;
+    REDOLOG::reconstruct(from_mem, ret, found_mask);
+    return ret;
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+    // Otherwise, put it in the redo log
+    redolog.insert(addr, val);
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // Get the token and quiesce, or else abort
+    if (!epoch.tryIrrevoc(globals.epoch, this))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability.
+    for (auto o : readset) {
+      if (exo.check_orec(&globals.orecs[o]) == exotm_t::END_OF_TIME) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // replay redo log, then clear lists
+    redolog.writeback();
+    allocator.onCommit();
+    readset.clear();
+    redolog.reset();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than the given time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate(uint64_t time) {
+    // The common case is "no abort", so we don't put the branches inside the
+    // loop.  If we end up aborting, the extra orec checks are kind of like
+    // backoff.
+    bool good = true;
+    for (auto o : readset)
+      good &= exo.check_continuation(&globals.orecs[o], time);
+    if (!good)
+      abortTx();
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // Exit the Epoch and CM, so other threads don't have to wait on this thread
+    exo.unwind();
+    cm.afterAbort(globals.cm, 0);
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    redolog.reset();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+};
diff --git a/artifact/policies/xSTM/libs/stm_algs/include/alloc.h b/artifact/policies/xSTM/libs/stm_algs/include/alloc.h
new file mode 100644
index 0000000..fb72075
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/include/alloc.h
@@ -0,0 +1,94 @@
+/// alloc.h provides a set of allocation managers that can be used by a TM
+/// implementation.  These allocation managers all provide the same public
+/// interface, so that they are interchangeable in TM algorithms.
+
+#pragma once
+
+#include <cstdlib>
+
+#include "../../../../include/minivector.h"
+
+/// A mechanism that allows a transaction to log its allocations and frees, and
+/// to finalize or undo them if the transaction commits or aborts.  It also
+/// supports the "capture" optimization, which tracks the most recent allocation
+/// and suggests to the TM that accesses in that allocation shouldn't be
+/// instrumented.
+class BasicAllocationManager {
+protected:
+  minivector<void *> mallocs; // the transaction's not-yet-committed allocations
+  minivector<void *> frees;   // the transaction's not-yet-committed reclaims
+  bool active = false;        // track if allocation management is active
+  void *lastAlloc;            // address of last allocation
+  size_t lastSize;            // size of last allocation
+
+public:
+  /// Indicate that logging should begin
+  void onBegin() { active = true; }
+
+  /// When a transaction commits, finalize its mallocs and frees.  Note that
+  /// this should be called *after* privatization is ensured.
+  void onCommit() {
+    mallocs.clear();
+    for (auto a : frees) {
+      free(a);
+    }
+    frees.clear();
+    active = false;
+    lastAlloc = nullptr;
+    lastSize = 0;
+  }
+
+  /// When a transaction aborts, drop its frees and reclaim its mallocs
+  void onAbort() {
+    frees.clear();
+    for (auto p : mallocs) {
+      free(p);
+    }
+    mallocs.clear();
+    active = false;
+    lastAlloc = nullptr;
+    lastSize = 0;
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *alloc(size_t size) {
+    void *res = malloc(size);
+    if (active) {
+      mallocs.push_back(res);
+      lastAlloc = res;
+      lastSize = size;
+    }
+    return res;
+  }
+
+  /// Allocate memory that is aligned on a byte boundary as specified by A
+  void *alignAlloc(size_t A, size_t size) {
+    void *res = aligned_alloc(A, size);
+    if (active) {
+      mallocs.push_back(res);
+      lastAlloc = res;
+      lastSize = size;
+    }
+    return res;
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void reclaim(void *addr) {
+    if (active) {
+      frees.push_back(addr);
+    } else {
+      free(addr);
+    }
+  }
+
+  /// Return true if the given address is within the range returned by the most
+  /// recent allocation
+  bool checkCaptured(void *addr) {
+    uintptr_t lstart = (uintptr_t)lastAlloc;
+    uintptr_t lend = lstart + lastSize;
+    uintptr_t a = (uintptr_t)addr;
+    return a >= lstart && a < lend;
+  }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_algs/include/deferred.h b/artifact/policies/xSTM/libs/stm_algs/include/deferred.h
new file mode 100644
index 0000000..d860846
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/include/deferred.h
@@ -0,0 +1,35 @@
+/// deferred.h provides support for registering "on commit" handlers, which run
+/// after a transaction completes.
+
+#pragma once
+
+#include <cstdlib>
+#include <functional>
+
+#include "../../../../include/minivector.h"
+
+/// The DeferredActionHandler stores a list of actions to perform, and when a
+/// transaction commits, it performs those actions in the order they were
+/// registered.
+class DeferredActionHandler {
+  /// a list of all actions to perform upon transaction commit
+  minivector<std::pair<void (*)(void *), void *>> actions;
+
+public:
+  /// Register a function to run after the transaction commits
+  void registerHandler(void (*func)(void *), void *args) {
+    actions.push_back({func, args});
+  }
+
+  /// Execute all deferred actions upon transaction commit, and then clear the
+  /// list of actions
+  void onCommit() {
+    for (auto i : actions) {
+      i.first(i.second);
+    }
+    actions.clear();
+  }
+
+  /// Clear the list of pending actions when a transaction aborts
+  void onAbort() { actions.clear(); }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_algs/include/redolog.h b/artifact/policies/xSTM/libs/stm_algs/include/redolog.h
new file mode 100644
index 0000000..d71e5d5
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/include/redolog.h
@@ -0,0 +1,370 @@
+/// redolog.h defines a redo log that also provides helper functions for
+/// reading/writing memory properly with respect to the C++ memory model.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <string.h>
+
+/// RedoLog combines a hash-based index with a vector, so that we can quickly
+/// find elements, and still iterate through the collection efficiently.  Within
+/// the vector, we store chunks of contiguous bytes, so that it is easy to
+/// re-assemble data that was written at varying granularities.
+///
+/// NB: CHUNKSIZE must be <= 64, and a power of 2.  It should be at least as
+///     large as the largest scalar datatype supported by the language.  16, 32,
+///     and 64 are probably the only reasonable values.
+///
+/// NB: CHUNKSIZE dictates the alignment that the compiler must obey.  That is,
+///     when it is 8, a scalar variable cannot cross an 8-byte boundary, or we
+///     will not be able to log it correctly.  On SPARC, we'd get a bus error
+///     anyway.  But on x86, such mis-alignment is possible.
+template <int CHUNKSIZE> class redolog_t {
+
+  /// MASK is used to isolate/clear the low bits of an address. It is dependent
+  /// on CHUNKSIZE
+  static const uintptr_t MASK = ((uintptr_t)CHUNKSIZE) - 1;
+
+  /// The hash index consists of a version (for fast clearing), an address, and
+  /// an index into the vector.
+  struct index_t {
+    /// Version number... if it doesn't match the RedoLog's version number, this
+    /// index_t is not in use
+    size_t version;
+
+    /// The key is an address
+    uintptr_t address;
+
+    /// The value is an index into the vector
+    size_t index;
+
+    /// Constructor: Note that version 0 is not allowed in the RedoLog
+    index_t() : version(0), address(0), index(0) {}
+  };
+
+  /// The vector holds these: they correspond to a range of bytes that may be
+  /// written back.
+  struct writeback_chunk_t {
+    /// The base address
+    uintptr_t key;
+
+    /// Bitmask for which bytes are valid
+    uint64_t mask;
+
+    /// The actual data
+    uint8_t data[CHUNKSIZE];
+  };
+
+  /// number of static probes before we resize the list
+  static const int SPILL_FACTOR = 3;
+
+  /// The "hashtable" of the Redo Log
+  index_t *index;
+
+  /// Size of hashtable
+  size_t ilength;
+
+  /// For fast-clearing of the hash table
+  size_t version;
+
+  /// used by the hash function
+  size_t shift;
+
+  /// The "vector" of the Redo Log
+  writeback_chunk_t *redo_vector;
+
+  /// Capacity of the vector
+  size_t vector_capacity;
+
+  /// Current # elements in vector
+  size_t vector_size;
+
+  /// This hash function is straight from CLRS (that's where the magic constant
+  /// comes from).
+  size_t hash(uintptr_t const key) const {
+    static const unsigned long long s = 2654435769ull;
+    const unsigned long long r = ((unsigned long long)key) * s;
+    return (size_t)((r & 0xFFFFFFFF) >> shift);
+  }
+
+  /// Double the size of the index. This *does not* do anything as far as
+  /// actually doing memory allocation. Callers should delete[] the index table,
+  /// increment the table size, and then reallocate it.
+  size_t doubleIndexLength() {
+    assert(shift != 0 &&
+           "ERROR: the writeset doesn't support an index this large");
+    shift -= 1;
+    ilength = 1 << (8 * sizeof(uint32_t) - shift);
+    return ilength;
+  }
+
+  /// Increase the size of the hash and rehash everything
+  __attribute__((noinline)) void rebuild() {
+    assert(version != 0 && "ERROR: the version should *never* be 0");
+
+    // double the index size
+    delete[] index;
+    index = new index_t[doubleIndexLength()];
+
+    // rehash the elements
+    for (size_t i = 0; i < vector_size; ++i) {
+      size_t h = hash(redo_vector[i].key);
+
+      // search for the next available slot
+      while (index[h].version == version)
+        h = (h + 1) % ilength;
+
+      index[h].address = redo_vector[i].key;
+      index[h].version = version;
+      index[h].index = i;
+    }
+  }
+
+  /// Double the size of the vector if/when it becomes full
+  __attribute__((noinline)) void resize() {
+    writeback_chunk_t *temp = redo_vector;
+    vector_capacity *= 2;
+    redo_vector = (writeback_chunk_t *)malloc(vector_capacity *
+                                              sizeof(writeback_chunk_t));
+    memcpy(redo_vector, temp, sizeof(writeback_chunk_t) * vector_size);
+    free(temp);
+  }
+
+  /// zero the hash on version# overflow... highly unlikely
+  __attribute__((noinline)) void reset_internal() {
+    memset(index, 0, sizeof(index_t) * ilength);
+    version = 1;
+  }
+
+public:
+  /// Construct a RedoLog by providing an initial capacity (default 64)
+  redolog_t(const size_t initial_capacity = 64)
+      : index(nullptr), ilength(0), version(1), shift(8 * sizeof(uint32_t)),
+        redo_vector(nullptr), vector_capacity(initial_capacity),
+        vector_size(0) {
+    // Find a good index length for the initial capacity of the list.
+    while (ilength < SPILL_FACTOR * initial_capacity)
+      doubleIndexLength();
+    index = new index_t[ilength];
+    redo_vector = (writeback_chunk_t *)malloc(vector_capacity *
+                                              sizeof(writeback_chunk_t));
+  }
+
+  /// Reclaim the dynamically allocated parts of a RedoLog when we destroy it
+  ~redolog_t() {
+    delete[] index;
+    free(redo_vector);
+  }
+
+  /// Find the vector index of the chunk containing key, or -1 on failure
+  int lookup(uintptr_t key) {
+    size_t h = hash(key);
+    while (index[h].version == version) {
+      if (index[h].address != key) {
+        // use linear probing... given SPILL_FACTOR, we never wrap around
+        h = (h + 1) % ilength;
+        continue;
+      }
+      return index[h].index;
+    }
+    return -1;
+  }
+
+  /// Fast check if the RedoLog is empty
+  bool isEmpty() const { return vector_size == 0; }
+
+  /// reserve is effectively the first half of an "upsert".  It finds the vector
+  /// entry into which a key should go, or makes that vector entry
+  ///
+  /// NB: we expect key's low bits to be masked to zero
+  int reserve(uintptr_t key) {
+    //  Find the slot that this address should hash to. If it is valid,
+    //  return the index. If we find an unused slot then it's a new
+    //  insertion.
+    size_t h = hash(key);
+    while (index[h].version == version) {
+      if (index[h].address == key) {
+        return index[h].index;
+      }
+      // keep probing...
+      h = (h + 1) % ilength;
+    }
+
+    // at this point, h is a valid insertion point
+    index[h].address = key;
+    index[h].version = version;
+    index[h].index = vector_size;
+
+    // initialize the vector
+    redo_vector[vector_size].key = key;
+    redo_vector[vector_size].mask = 0LL;
+
+    // update the next element pointer into the vector
+    ++vector_size;
+
+    // resize the vector if there's only one spot left
+    if (__builtin_expect(vector_size == vector_capacity, false))
+      resize();
+
+    // if we reach our load-factor, rebuild
+    if (__builtin_expect((vector_size * SPILL_FACTOR) >= ilength, false)) {
+      rebuild();
+      return reserve(key);
+    }
+
+    return index[h].index;
+  }
+
+  /// fast-clear the hash by bumping the version number
+  void reset() {
+    vector_size = 0;
+    version += 1;
+    // check overflow
+    if (version != 0)
+      return;
+    reset_internal();
+  }
+
+  /// write the contents of the redolog back to main memory.  This version
+  /// is atomic wrt the C++ memory model.
+  ///
+  /// TODO: The writeback granularities are probably not correct.  See the
+  /// HandSTM writeback for details.
+  void writeback() {
+    // iterate through the slabs, and then write out the bytes
+    for (size_t i = 0; i < vector_size; ++i) {
+      for (int bytes = 0; bytes < CHUNKSIZE; bytes += 4) {
+        // figure out if current 4 bytes are all valid
+        int m = redo_vector[i].mask >> bytes;
+        m = m & 0xF;
+        if (m == 0xF) {
+          // we can write this as a 32-bit word
+          uint32_t *addr = (uint32_t *)(redo_vector[i].key + bytes);
+          uint32_t *data = (uint32_t *)(redo_vector[i].data + bytes);
+          reinterpret_cast<std::atomic<uint32_t> *>(addr)->store(
+              *data, std::memory_order_release);
+        } else if (m != 0) {
+          // write out live bytes, one at a time
+          uint8_t *addr = (uint8_t *)redo_vector[i].key + bytes;
+          uint8_t *data = redo_vector[i].data + bytes;
+          // NB: an easily-unrolled loop probably outperforms a while loop
+          for (int q = 0; q < 4; ++q) {
+            if (m & 1)
+              reinterpret_cast<std::atomic<uint8_t> *>(addr)->store(
+                  *data, std::memory_order_release);
+            ++addr;
+            ++data;
+            m >>= 1;
+          }
+        }
+      }
+    }
+  }
+
+  /// type-specialized code for inserting an address/value pair into the RedoLog
+  template <typename T> void insert(T *addr, T val) {
+    // compute mask, key, and offset within block
+    uint64_t mask = (1UL << sizeof(T)) - 1;
+    uintptr_t key = (uintptr_t)addr & ~MASK;
+    uint64_t offset = (uintptr_t)addr & MASK;
+
+    // get slab index, transform into in-slab address where we should write
+    int idx = reserve(key);
+    uint8_t *dataptr = redo_vector[idx].data;
+    dataptr += offset;
+
+    // do a type-preserving write, and update the mask
+    T *tgt = (T *)dataptr;
+    *tgt = val;
+    redo_vector[idx].mask |= (mask << offset);
+  }
+
+  /// type-specialized code for looking up a value from the RedoLog
+  template <typename T> int find(T *addr, T &val) {
+    /// compute mask, key, and offset within block
+    uint64_t mask = (1UL << sizeof(T)) - 1;
+    uintptr_t key = (uintptr_t)addr & ~MASK;
+    uint64_t offset = (uintptr_t)addr & MASK;
+
+    // get slab target, see if it's valid
+    int idx = lookup(key);
+    if (idx == -1)
+      return 0; // not valid because it doesn't exist
+    uint64_t nodemask = redo_vector[idx].mask >> offset;
+    uint32_t livebits = mask & nodemask;
+    if (!livebits)
+      return 0; // not valid because offset all 0s in mask
+    // It's valid, so read a full T's worth of data
+    uint8_t *dataptr = redo_vector[idx].data;
+    dataptr += offset;
+    T *tgt = (T *)dataptr;
+    val = *tgt;
+    // return the mask, since some of the bytes we just read may be invalid
+    return livebits;
+  }
+
+  /// Sometimes, a program will have different granularities (byte, int, long)
+  /// when accessing the same region of memory at different times, in the same
+  /// transaction.  If a smaller-granularity write is followed by a
+  /// larger-granularity read, then find() could result in a partial hit (found
+  /// mask is some ones, but not sizeof(T) ones).  In that case, we must copy
+  /// the bytes from the redo log *onto* the bytes previously from memory, one
+  /// byte at a time, using the mask.  This function does the job, with
+  /// pass-by-reference.
+  template <typename T>
+  static void reconstruct(T &from_mem, T &ret, int &found_mask) {
+    // This is the unlikely, slow path.  We must weave the bytes together in
+    // accordance with the mask.  It is essentially a memcpy, one byte at a
+    // time, only for bytes whose corresponding bit in the mask is 1.  Hopefully
+    // this unrolls nicely
+    char *from = (char *)&from_mem;
+    char *to = (char *)&ret;
+    int working_mask = 1;
+    for (size_t i = 0; i < sizeof(T); ++i) {
+      if (!(found_mask & working_mask)) {
+        *to = *from;
+      }
+      ++from;
+      ++to;
+      working_mask <<= 1;
+    }
+  }
+
+  /// The RedoLog is responsible for providing a "correct" way for threads to
+  /// read data from memory concurrently with threads writing that data through
+  /// the redolog.  Since C++20 isn't available yet, the best we can do is a
+  /// cast to std::atomic.  This is only correct for primitive types.
+  template <typename T> static T safe_read(T *addr) {
+    // NB: The following instruction is not technically correct, but it is the
+    //     best we can do until we switch to C++20 and have atomic_ref.  The
+    //     issue is that orecs are effectively seqlocks, and per Boehm MSPC
+    //     2012, the data read within a seqlock critical section needs to be an
+    //     atomic variable with acquire semantics.  In STM, we can't have T be
+    //     atomic, because T isn't atomic in the parent program, and if it were,
+    //     then the transaction would have to serialize.
+    //
+    //     Casting to an atomic<> /inside/ of this method is mostly correct:
+    //     It leads to the correct code generation wrt reordering and fences.
+    //     However, it can potentially lead to errors for non-primitive types,
+    //     for which sizeof(atomic<T>) != sizeof(T). std::atomic_ref<> will
+    //     address this, but for now, we can't do better than a cast.
+    return reinterpret_cast<std::atomic<T> *>(addr)->load(
+        std::memory_order_acquire);
+    // NB: Once C++20 arrives, we can do this:
+    //
+    // from_mem = std::atomic_ref<T>(*addr).load(std::memory_order_acquire);
+  }
+
+  /// Report the number of elements stored in the redo log
+  size_t size() { return vector_size; }
+
+  /// Degenerate iterator interface: report the base address of the idx'th
+  /// element in the write set
+  ///
+  /// NB: This only works because of consistent use of OREC_COVERAGE in this
+  ///     file and the parent STM
+  void *get_address(size_t idx) { return (void *)redo_vector[idx].key; }
+};
diff --git a/artifact/policies/xSTM/libs/stm_algs/include/stackframe.h b/artifact/policies/xSTM/libs/stm_algs/include/stackframe.h
new file mode 100644
index 0000000..458fb53
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/include/stackframe.h
@@ -0,0 +1,70 @@
+/// stackframe.h provides implementations of the stack frame manager.  The stack
+/// frame manager is responsible for two tasks:
+///
+/// - It tracks nesting, to know if a thread is in a transaction, and to enable
+///   subsumption (flat) nesting
+///
+/// - It tracks the top of the non-transactional stack, so that loads/stores to
+///   the transactional region can skip instrumentation
+
+#pragma once
+
+#include <cstdint>
+
+/// OptimizedStackFrameManager is a stack frame manager that is able to change
+/// the stack frame boundary.  This lets the programmer indicate that the top of
+/// the nontransactional stack is not shared and can be accessed without
+/// instrumentation.
+///
+/// A small amount of this support is necessary at all times, e.g., to prevent
+/// redo/undo to dead stack frames, or to prevent NOrec validation of stack
+/// reads.  The choice of API determines whether the programmer can manually
+/// expand the stack frame, e.g., to include local variables declared outside of
+/// the transaction.
+///
+/// NB: When the optimization is explicitly used at the API level, the
+///     programmer may need to manually checkpoint some variables, if they
+///     aren't written on every control flow path through the transaction.
+class OptimizedStackFrameManager {
+  /// An address we can use to identify the current bottom of the transactional
+  /// part of the stack
+  uintptr_t stackBottom;
+
+  /// The nesting depth
+  size_t nesting;
+
+public:
+  /// Construct by zeroing all fields
+  OptimizedStackFrameManager() : stackBottom(0), nesting(0) {}
+
+  /// When a transaction begins, increment nesting and return true if this is a
+  /// top-level transaction
+  bool onBegin() { return ++nesting == 1; }
+
+  /// When a transaction ends, decrement nesting and return true if this is a
+  /// top-level transaction
+  bool onEnd() { return --nesting == 0; }
+
+  /// When a transaction aborts, reset nesting
+  void onAbort() { nesting = 0; }
+
+  /// Set the bottom of the stack, but do so only if the bottom is currently
+  /// unset
+  void setBottom(void *b) {
+    if (!stackBottom) {
+      stackBottom = (uintptr_t)b;
+    }
+  }
+
+  /// Return true if the given pointer is to the transactional part of the
+  /// stack.
+  bool onStack(void *ptr) {
+    uintptr_t addr = (uintptr_t)ptr;
+    return (addr < stackBottom && addr > (uintptr_t)&addr);
+  }
+
+  /// When a transaction has committed, clear its stack bottom.  This is
+  /// necessary, so that setBottom can be called correctly from outside of a
+  /// transaction.
+  void onCommit() { stackBottom = 0; }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_algs/orec_eager_c1.h b/artifact/policies/xSTM/libs/stm_algs/orec_eager_c1.h
new file mode 100644
index 0000000..47c0739
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/orec_eager_c1.h
@@ -0,0 +1,315 @@
+/// A traditional xSTM algorithm implementation using undo logging and a bespoke
+/// table of check-once orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../include/minivector.h"
+#include "../../../include/undolog.h"
+#include "../include/orec_t.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/stackframe.h"
+
+/// OrecEagerC1 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses a custom orec table with pluggable clock types
+/// - Check-once orecs
+/// - Encounter-time locking with undo
+///
+/// @param ORECTABLE a table of orecs, and a clock
+/// @param EPOCH     an epoch type, for quiescence and irrevocability
+/// @param CM        a contention manager, invoked only at begin/commit/abort
+template <class ORECTABLE, class EPOCH, class CM> class OrecEagerC1 {
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    ORECTABLE orecs;               // Orecs and a clock
+    typename CM::Globals cm;       // Global Contention Management info
+    typename EPOCH::Globals epoch; // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  EPOCH epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  uint64_t start_time;              // Transaction start time
+  uint64_t my_lock;                 // Per-thread unique lock word
+  minivector<orec_t *> readset;     // Orecs to validate
+  minivector<orec_t *> lockset;     // Locks that are held
+  undolog_t undolog;                // An undo log, for undoing writes on abort
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  OrecEagerC1() : epoch(globals.epoch), cm() {
+    my_lock = ORECTABLE::make_lockword(epoch.id);
+  }
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  epoch.onBegin will wait
+      // until there are no irrevocable transactions.
+      start_time = globals.orecs.get_time_strong_ordering();
+      epoch.onBegin(globals.epoch, start_time);
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (lockset.empty()) {
+        // NB: CM before quiesce, in case CM needs to unblock others
+        epoch.clearEpoch(globals.epoch);
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(globals.epoch, start_time);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: we have all locks, so just validate
+      for (auto o : readset) {
+        uint64_t v = o->curr;
+        if (v > start_time && v != my_lock)
+          abortTx();
+      }
+
+      // get a commit time (includes memory fence), release locks, exit epoch
+      uint64_t end_time = globals.orecs.increment_get();
+      epoch.clearEpoch(globals.epoch);
+      for (auto o : lockset)
+        o->curr = end_time;
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(globals.epoch, end_time);
+      undolog.clear();
+      lockset.clear();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // get the orec address, then start a loop to read a consistent value
+    orec_t *o = globals.orecs.get(addr);
+    while (true) {
+      // read the location, then orec
+      T from_mem = undolog_t::safe_read(addr);
+      local_orec_t post;
+      post.all = o->curr;
+      // if caller has lock, we're done
+      if (post.all == my_lock)
+        return from_mem;
+
+      // If validation passes, then we can log it and return
+      if (post.all <= start_time) {
+        readset.push_back(o);
+        return from_mem;
+      }
+
+      // abort if locked
+      if (post.fields.lock)
+        abortTx();
+
+      // Extend the validity range, then try again
+      uintptr_t newts = globals.orecs.get_time_strong_ordering();
+      epoch.setEpoch(globals.epoch, newts);
+      validate();
+      start_time = newts;
+    }
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+
+    // get the orec address, then start a loop to ensure a consistent value
+    orec_t *o = globals.orecs.get(addr);
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      local_orec_t pre;
+      pre.all = o->curr;
+      if (pre.all <= start_time) {
+        if (!o->curr.compare_exchange_strong(pre.all, my_lock))
+          abortTx();
+        lockset.push_back(o);
+        o->prev = pre.all;
+        break;
+      }
+
+      // If lock held by me, all good
+      if (pre.all == my_lock)
+        break;
+
+      // abort if locked
+      if (pre.fields.lock)
+        abortTx();
+
+      // Extend the validity range, then try again
+      uintptr_t newts = globals.orecs.get_time_strong_ordering();
+      epoch.setEpoch(globals.epoch, newts);
+      validate();
+      start_time = newts;
+    }
+    // Add old value to undo log, update memory, and return
+    typename undolog_t::undo_t u;
+    u.initFromAddr(addr);
+    undolog.push_back(u);
+    undolog_t::safe_write(addr, val);
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // try_irrevoc will return true only if we got the token and quiesced
+    if (!epoch.tryIrrevoc(globals.epoch))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      local_orec_t lo;
+      lo.all = o->curr;
+      if (lo.all > start_time && lo.all != my_lock) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // get a commit time and release locks
+    uint64_t end_time = globals.orecs.increment_get();
+    for (auto o : lockset)
+      o->curr = end_time;
+
+    // clear lists
+    allocator.onCommit();
+    readset.clear();
+    undolog.clear();
+    lockset.clear();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than our start time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      local_orec_t lo;
+      lo.all = o->curr;
+      if (lo.all > start_time && lo.all != my_lock) {
+        abortTx();
+      }
+    }
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // undo any writes
+    undolog.undo_writes();
+
+    // At this point, we can exit the epoch so that other threads don't have to
+    // wait on this thread
+    epoch.clearEpoch(globals.epoch);
+    cm.afterAbort(globals.cm, epoch.id);
+
+    // Release locks.  Silent store, because of undo and check-once orecs
+    uint64_t release_to = globals.orecs.increment_get();
+    for (auto o : lockset)
+      o->curr = release_to;
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    undolog.clear();
+    lockset.clear();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_algs/orec_eager_c2.h b/artifact/policies/xSTM/libs/stm_algs/orec_eager_c2.h
new file mode 100644
index 0000000..d7fd350
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/orec_eager_c2.h
@@ -0,0 +1,325 @@
+/// A traditional xSTM algorithm implementation using undo logging and a bespoke
+/// table of check-twice orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../include/minivector.h"
+#include "../../../include/undolog.h"
+#include "../include/orec_t.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/stackframe.h"
+
+/// OrecEagerC2 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses a custom orec table with pluggable clock types
+/// - Check-twice orecs
+/// - Encounter-time locking with undo
+///
+/// @param ORECTABLE a table of orecs, and a clock
+/// @param EPOCH     an epoch type, for quiescence and irrevocability
+/// @param CM        a contention manager, invoked only at begin/commit/abort
+template <class ORECTABLE, class EPOCH, class CM> class OrecEagerC2 {
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    ORECTABLE orecs;               // Orecs and a clock
+    typename CM::Globals cm;       // Global Contention Management info
+    typename EPOCH::Globals epoch; // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  EPOCH epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  uint64_t start_time;              // Transaction start time
+  uint64_t my_lock;                 // Per-thread unique lock word
+  minivector<orec_t *> readset;     // Orecs to validate
+  minivector<orec_t *> lockset;     // Locks that are held
+  undolog_t undolog;                // An undo log, for undoing writes on abort
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  OrecEagerC2() : epoch(globals.epoch), cm() {
+    my_lock = ORECTABLE::make_lockword(epoch.id);
+  }
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  epoch.onBegin will wait
+      // until there are no irrevocable transactions.
+      start_time = globals.orecs.get_time_strong_ordering();
+      epoch.onBegin(globals.epoch, start_time);
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (lockset.empty()) {
+        // NB: CM before quiesce, in case CM needs to unblock others
+        epoch.clearEpoch(globals.epoch);
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(globals.epoch, start_time);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: we have all locks, so just validate
+      uint64_t end_time = globals.orecs.increment_get();
+      if (end_time != start_time + 1) {
+        for (auto o : readset) {
+          uint64_t v = o->curr;
+          if (v > start_time && v != my_lock)
+            abortTx();
+        }
+      }
+
+      // release locks, exit epoch
+      epoch.clearEpoch(globals.epoch);
+      for (auto o : lockset)
+        o->curr = end_time;
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(globals.epoch, end_time);
+      undolog.clear();
+      lockset.clear();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // get the orec address, then start a loop to read a consistent value
+    orec_t *o = globals.orecs.get(addr);
+    while (true) {
+      // read the orec, then location
+      local_orec_t pre, post;
+      pre.all = o->curr; // fenced read of o->curr
+      T from_mem = undolog_t::safe_read(addr);
+      // If I've got it locked, we're done, otherwise re-check orec
+      if (pre.all == my_lock)
+        return from_mem;
+
+      // If validation passes, then we can log it and return
+      post.all = o->curr;
+      if ((pre.all == post.all) && (pre.all <= start_time)) {
+        readset.push_back(o);
+        return from_mem;
+      }
+
+      // abort if locked
+      if (post.fields.lock)
+        abortTx();
+
+      // Extend the validity range, then try again
+      uintptr_t newts = globals.orecs.get_time_strong_ordering();
+      epoch.setEpoch(globals.epoch, newts);
+      validate();
+      start_time = newts;
+    }
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+
+    // get the orec address, then start a loop to ensure a consistent value
+    orec_t *o = globals.orecs.get(addr);
+    while (true) {
+      // If I have it or can get it, that's the easy case
+      local_orec_t pre;
+      pre.all = o->curr;
+      if (pre.all <= start_time) {
+        if (!o->curr.compare_exchange_strong(pre.all, my_lock))
+          abortTx();
+        lockset.push_back(o);
+        o->prev = pre.all;
+        break;
+      }
+
+      // If lock held by me, all good
+      if (pre.all == my_lock)
+        break;
+
+      // abort if locked
+      if (pre.fields.lock)
+        abortTx();
+
+      // Extend the validity range, then try again
+      uintptr_t newts = globals.orecs.get_time_strong_ordering();
+      epoch.setEpoch(globals.epoch, newts);
+      validate();
+      start_time = newts;
+    }
+    // Add old value to undo log, update memory, and return
+    typename undolog_t::undo_t u;
+    u.initFromAddr(addr);
+    undolog.push_back(u);
+    undolog_t::safe_write(addr, val);
+    return;
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // try_irrevoc will return true only if we got the token and quiesced
+    if (!epoch.tryIrrevoc(globals.epoch))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      local_orec_t lo;
+      lo.all = o->curr;
+      if (lo.all > start_time && lo.all != my_lock) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // get a commit time and release locks
+    uint64_t end_time = globals.orecs.increment_get();
+    for (auto o : lockset)
+      o->curr = end_time;
+
+    // clear lists
+    allocator.onCommit();
+    readset.clear();
+    undolog.clear();
+    lockset.clear();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than our start time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    for (auto o : readset) {
+      local_orec_t lo;
+      lo.all = o->curr;
+      if (lo.all > start_time && lo.all != my_lock) {
+        abortTx();
+      }
+    }
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // undo any writes
+    undolog.undo_writes();
+
+    // At this point, we can exit the epoch so that other threads don't have to
+    // wait on this thread
+    epoch.clearEpoch(globals.epoch);
+    cm.afterAbort(globals.cm, epoch.id);
+
+    // Release locks.  Bump orecs, because of check-twice orecs
+    uintptr_t max = 0;
+    for (auto o : lockset) {
+      uint64_t val = o->prev + 1;
+      o->curr = val;
+      max = (val > max) ? val : max;
+    }
+    uintptr_t ts = globals.orecs.get_time_strong_ordering();
+    if (max > ts)
+      globals.orecs.increment();
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    undolog.clear();
+    lockset.clear();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_algs/orec_lazy_c1.h b/artifact/policies/xSTM/libs/stm_algs/orec_lazy_c1.h
new file mode 100644
index 0000000..ceba2d1
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/orec_lazy_c1.h
@@ -0,0 +1,321 @@
+/// A traditional xSTM algorithm implementation using redo logging and a bespoke
+/// table of check-once orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../include/minivector.h"
+#include "../include/constants.h"
+#include "../include/orec_t.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/redolog.h"
+#include "include/stackframe.h"
+
+/// OrecLazyC1 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses a custom orec table with pluggable clock types
+/// - Check-once orecs
+/// - Commit-time locking with redo
+///
+/// @param ORECTABLE a table of orecs, and a clock
+/// @param EPOCH     an epoch type, for quiescence and irrevocability
+/// @param CM        a contention manager, invoked only at begin/commit/abort
+template <class ORECTABLE, class EPOCH, class CM> class OrecLazyC1 {
+  /// The type of the redo log
+  using REDOLOG = redolog_t<1 << OREC_COVERAGE>;
+
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    ORECTABLE orecs;               // Orecs and a clock
+    typename CM::Globals cm;       // Global Contention Management info
+    typename EPOCH::Globals epoch; // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  EPOCH epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  uint64_t start_time;              // Transaction start time
+  uint64_t my_lock;                 // Per-thread unique lock word
+  minivector<orec_t *> readset;     // Orecs to validate
+  minivector<orec_t *> lockset;     // Locks that are held
+  REDOLOG redolog;                  // A redo log, for redoing writes at commit
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  OrecLazyC1() : epoch(globals.epoch), cm() {
+    my_lock = ORECTABLE::make_lockword(epoch.id);
+  }
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  epoch.onBegin will wait
+      // until there are no irrevocable transactions.
+      start_time = globals.orecs.get_time_strong_ordering();
+      epoch.onBegin(globals.epoch, start_time);
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (redolog.size() == 0) {
+        // NB: CM before quiesce, in case CM needs to unblock others
+        epoch.clearEpoch(globals.epoch);
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(globals.epoch, start_time);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: acquire locks, then validate
+      acquireLocks();
+      for (auto o : readset) {
+        uint64_t v = o->curr;
+        if (v > start_time && v != my_lock)
+          abortTx();
+      }
+
+      // replay redo log, then release locks and exit epoch table
+      redolog.writeback();
+      uint64_t end_time = globals.orecs.increment_get();
+      epoch.clearEpoch(globals.epoch);
+      releaseLocks(end_time);
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(globals.epoch, end_time);
+      redolog.reset();
+      lockset.clear();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // Lookup in redo log to populate ret.  Note that prior casting can lead to
+    // ret having only some bytes properly set
+    T ret;
+    int found_mask = redolog.find(addr, ret);
+    // If we found all the bytes in the redo log, then it's easy
+    int desired_mask = (1UL << sizeof(T)) - 1;
+    if (desired_mask == found_mask)
+      return ret;
+
+    // get the orec address, then start a loop to read a consistent value
+    orec_t *o = globals.orecs.get(addr);
+    T from_mem;
+    while (true) {
+      // read the location, then orec
+      from_mem = REDOLOG::safe_read(addr);
+      local_orec_t post;
+      post.all = o->curr;
+
+      // If validation passes, then we can log it and reconstruct
+      if (post.all <= start_time) {
+        readset.push_back(o);
+        break;
+      }
+
+      // wait if locked
+      while (post.fields.lock)
+        post.all = o->curr;
+
+      // Extend the validity range, then try again
+      uintptr_t newts = globals.orecs.get_time_strong_ordering();
+      epoch.setEpoch(globals.epoch, newts);
+      validate();
+      start_time = newts;
+    }
+
+    // If redolog was a partial hit, reconstruction is needed
+    if (!found_mask)
+      return from_mem;
+    REDOLOG::reconstruct(from_mem, ret, found_mask);
+    return ret;
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+    redolog.insert(addr, val);
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // try_irrevoc will return true only if we got the token and quiesced
+    if (!epoch.tryIrrevoc(globals.epoch))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      local_orec_t lo;
+      lo.all = o->curr;
+      if (lo.all > start_time) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // replay redo log
+    redolog.writeback();
+
+    // clear lists
+    allocator.onCommit();
+    readset.clear();
+    redolog.reset();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than our start time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    bool to_abort = false;
+    for (auto o : readset)
+      to_abort |= (o->curr > start_time);
+    if (to_abort)
+      abortTx();
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // At this point, we can exit the epoch so that other threads don't have to
+    // wait on this thread
+    epoch.clearEpoch(globals.epoch);
+    cm.afterAbort(globals.cm, epoch.id);
+
+    // release any locks held by this thread
+    for (auto o : lockset)
+      if (o->curr == my_lock)
+        o->curr.store(o->prev);
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    redolog.reset();
+    lockset.clear();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+
+  /// During commit, the transaction acquires all locks for its write set
+  void acquireLocks() {
+    size_t entries = redolog.size();
+    for (size_t i = 0; i < entries; ++i) {
+      orec_t *o = globals.orecs.get(redolog.get_address(i));
+      local_orec_t pre;
+      pre.all = o->curr;
+
+      // If lock unheld, acquire; abort on fail to acquire
+      if (pre.all <= start_time) {
+        if (!o->curr.compare_exchange_strong(pre.all, my_lock))
+          abortTx();
+        o->prev = pre.all;
+        lockset.push_back(o);
+      }
+      // If lock is not held by me, abort
+      else if (pre.all != my_lock)
+        abortTx();
+    }
+  }
+
+  /// Release the locks held by this transaction
+  void releaseLocks(uint64_t end_time) {
+    // NB: there may be unnecessary fences in this loop
+    for (auto o : lockset)
+      if (o->curr == my_lock)
+        o->curr = end_time;
+  }
+};
diff --git a/artifact/policies/xSTM/libs/stm_algs/orec_lazy_c2.h b/artifact/policies/xSTM/libs/stm_algs/orec_lazy_c2.h
new file mode 100644
index 0000000..51f2f50
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_algs/orec_lazy_c2.h
@@ -0,0 +1,326 @@
+/// A traditional xSTM algorithm implementation using redo logging and a bespoke
+/// table of check-twice orecs
+
+#pragma once
+
+#include <setjmp.h>
+
+#include "../../../include/minivector.h"
+#include "../include/constants.h"
+#include "../include/orec_t.h"
+#include "include/alloc.h"
+#include "include/deferred.h"
+#include "include/redolog.h"
+#include "include/stackframe.h"
+
+/// OrecLazyC2 is an STM algorithm that is compatible with our LLVM STM plugin.
+/// It has the following features:
+/// - Uses a custom orec table with pluggable clock types
+/// - Check-twice orecs
+/// - Commit-time locking with redo
+///
+/// @param ORECTABLE a table of orecs, and a clock
+/// @param EPOCH     an epoch type, for quiescence and irrevocability
+/// @param CM        a contention manager, invoked only at begin/commit/abort
+template <class ORECTABLE, class EPOCH, class CM> class OrecLazyC2 {
+  /// The type of the redo log
+  using REDOLOG = redolog_t<1 << OREC_COVERAGE>;
+
+  /// All of the global variables used by this STM algorithm
+  struct Globals {
+    ORECTABLE orecs;               // Orecs and a clock
+    typename CM::Globals cm;       // Global Contention Management info
+    typename EPOCH::Globals epoch; // Quiescence and Irrevocability
+  };
+
+  static Globals globals;           // All metadata shared among threads
+  jmp_buf *checkpoint = nullptr;    // Register checkpoint, for aborts
+  EPOCH epoch;                      // Quiescence and Irrevocability
+  CM cm;                            // Contention manager
+  OptimizedStackFrameManager frame; // For tracking the transaction's stack
+  uint64_t start_time;              // Transaction start time
+  uint64_t my_lock;                 // Per-thread unique lock word
+  minivector<orec_t *> readset;     // Orecs to validate
+  minivector<orec_t *> lockset;     // Locks that are held
+  REDOLOG redolog;                  // A redo log, for redoing writes at commit
+  BasicAllocationManager allocator; // Manage malloc/free/aligned alloc
+  DeferredActionHandler defers;     // Functions to run after commit/abort
+
+public:
+  /// Return the irrevocability state of the thread
+  bool isIrrevoc() { return epoch.isIrrevoc(); }
+
+  /// Set the current bottom of the transactional part of the stack
+  void adjustStackBottom(void *addr) { frame.setBottom(addr); }
+
+  /// construct a thread's transaction context
+  OrecLazyC2() : epoch(globals.epoch), cm() {
+    my_lock = ORECTABLE::make_lockword(epoch.id);
+  }
+
+  /// Instrumentation to run at the beginning of a transaction
+  void beginTx(jmp_buf *b) {
+    // onBegin == false -> flat nesting
+    if (frame.onBegin()) {
+      // Save the checkpoint and set the stack bottom
+      checkpoint = b;
+      frame.setBottom(b);
+
+      // Start logging allocations
+      allocator.onBegin();
+
+      // Get the start time, and put it into the epoch.  epoch.onBegin will wait
+      // until there are no irrevocable transactions.
+      start_time = globals.orecs.get_time_strong_ordering();
+      epoch.onBegin(globals.epoch, start_time);
+
+      // Notify CM of intention to start.  If return true, become irrevocable
+      if (cm.beforeBegin(globals.cm))
+        becomeIrrevocable();
+    }
+  }
+
+  /// Instrumentation to run at the end of a transaction
+  void commitTx() {
+    // onEnd == false -> flat nesting
+    if (frame.onEnd()) {
+      if (epoch.isIrrevoc()) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        cm.afterCommit(globals.cm);
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // fast-path for read-only transactions must still quiesce before freeing
+      if (redolog.size() == 0) {
+        // NB: CM before quiesce, in case CM needs to unblock others
+        epoch.clearEpoch(globals.epoch);
+        cm.afterCommit(globals.cm);
+        epoch.quiesce(globals.epoch, start_time);
+        // Clean up
+        readset.clear();
+        allocator.onCommit();
+        defers.onCommit();
+        frame.onCommit();
+        return;
+      }
+
+      // Writer commit: acquire locks, then validate
+      acquireLocks();
+      uint64_t end_time = globals.orecs.increment_get();
+      // validate if there were any intervening commits
+      if (end_time !=
+          start_time + 1) { // TODO: Is this behavior correct for rdtscp?
+        for (auto o : readset) {
+          uint64_t v = o->curr;
+          if (v > start_time && v != my_lock)
+            abortTx();
+        }
+      }
+
+      // replay redo log, then release locks and exit epoch table
+      redolog.writeback();
+      epoch.clearEpoch(globals.epoch);
+      releaseLocks(end_time);
+
+      // CM, then quiesce, then clean up everything, so that we quiesce before
+      // allocator cleanup
+      cm.afterCommit(globals.cm);
+      epoch.quiesce(globals.epoch, end_time);
+      redolog.reset();
+      lockset.clear();
+      readset.clear();
+      allocator.onCommit();
+      defers.onCommit();
+      frame.onCommit();
+    }
+  }
+
+  /// To allocate memory, we must also log it, so we can reclaim it if the
+  /// transaction aborts
+  void *txAlloc(size_t size) { return allocator.alloc(size); }
+
+  /// To allocate aligned memory, we must also log it, so we can reclaim it if
+  /// the transaction aborts
+  void *txAAlloc(size_t A, size_t size) {
+    return allocator.alignAlloc(A, size);
+  }
+
+  /// To free memory, we simply wait until the transaction has committed, and
+  /// then we free.
+  void txFree(void *addr) { allocator.reclaim(addr); }
+
+  /// Transactional read
+  template <typename T> T read(T *addr) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr))
+      return *addr;
+
+    // Lookup in redo log to populate ret.  Note that prior casting can lead to
+    // ret having only some bytes properly set
+    T ret;
+    int found_mask = redolog.find(addr, ret);
+    // If we found all the bytes in the redo log, then it's easy
+    int desired_mask = (1UL << sizeof(T)) - 1;
+    if (desired_mask == found_mask)
+      return ret;
+
+    // get the orec address, then start a loop to read a consistent value
+    orec_t *o = globals.orecs.get(addr);
+    T from_mem;
+    while (true) {
+      // read the orec, then location, then orec
+      local_orec_t pre, post;
+      pre.all = o->curr; // fenced read of o->curr
+      from_mem = REDOLOG::safe_read(addr);
+      post.all = o->curr; // fenced read of o->curr
+
+      // If validation passes, then we can log it and reconstruct
+      if ((pre.all == post.all) && (pre.all <= start_time)) {
+        readset.push_back(o);
+        break;
+      }
+
+      // wait if locked
+      while (post.fields.lock)
+        post.all = o->curr;
+
+      // Extend the validity range, then try again
+      uintptr_t newts = globals.orecs.get_time_strong_ordering();
+      epoch.setEpoch(globals.epoch, newts);
+      validate();
+      start_time = newts;
+    }
+
+    // If redolog was a partial hit, reconstruction is needed
+    if (!found_mask)
+      return from_mem;
+    REDOLOG::reconstruct(from_mem, ret, found_mask);
+    return ret;
+  }
+
+  /// Transactional write
+  template <typename T> void write(T *addr, T val) {
+    // No instrumentation if on stack or we're irrevocable
+    if (accessDirectly(addr)) {
+      *addr = val;
+      return;
+    }
+    redolog.insert(addr, val);
+  }
+
+  /// Instrumentation to become irrevocable in-flight.  This is essentially an
+  /// early commit
+  void becomeIrrevocable() {
+    // Immediately return if we are already irrevocable
+    if (epoch.isIrrevoc())
+      return;
+
+    // try_irrevoc will return true only if we got the token and quiesced
+    if (!epoch.tryIrrevoc(globals.epoch))
+      abortTx();
+
+    // now validate.  If it fails, release irrevocability
+    for (auto o : readset) {
+      local_orec_t lo;
+      lo.all = o->curr;
+      if (lo.all > start_time) {
+        epoch.onCommitIrrevoc(globals.epoch);
+        abortTx();
+      }
+    }
+
+    // replay redo log
+    redolog.writeback();
+
+    // clear lists
+    allocator.onCommit();
+    readset.clear();
+    redolog.reset();
+  }
+
+  /// Register an action to run after transaction commit
+  void registerCommitHandler(void (*func)(void *), void *args) {
+    defers.registerHandler(func, args);
+  }
+
+private:
+  /// Validation.  We need to make sure that all orecs that we've read have
+  /// timestamps older than our start time, unless we locked those orecs. If we
+  /// locked the orec, we did so when the time was smaller than our start time,
+  /// so we're sure to be OK.
+  void validate() {
+    // NB: on relaxed architectures, we may have unnecessary fences here
+    bool to_abort = false;
+    for (auto o : readset)
+      to_abort |= (o->curr > start_time);
+    if (to_abort)
+      abortTx();
+  }
+
+  /// Abort the transaction.  We must handle mallocs and frees, and we need to
+  /// ensure that the descriptor is in an appropriate state for starting a new
+  /// transaction.  Note that we *will* call beginTx again, unlike libITM.
+  void abortTx() {
+    // At this point, we can exit the epoch so that other threads don't have to
+    // wait on this thread
+    epoch.clearEpoch(globals.epoch);
+    cm.afterAbort(globals.cm, epoch.id);
+
+    // release any locks held by this thread
+    for (auto o : lockset)
+      if (o->curr == my_lock)
+        o->curr.store(o->prev);
+
+    // reset all lists, undo mallocs, and try again
+    readset.clear();
+    redolog.reset();
+    lockset.clear();
+    allocator.onAbort();
+    defers.onAbort();
+    frame.onAbort();
+    longjmp(*checkpoint, 1);
+  }
+
+  /// Check if the given address is on the thread's stack, and hence does not
+  /// need instrumentation.  Note that if the thread is irrevocable, we also say
+  /// that instrumentation is not needed.  Also, the allocator may suggest
+  /// skipping instrumentation.
+  bool accessDirectly(void *ptr) {
+    if (epoch.isIrrevoc())
+      return true;
+    if (allocator.checkCaptured(ptr))
+      return true;
+    return frame.onStack(ptr);
+  }
+
+  /// During commit, the transaction acquires all locks for its write set
+  void acquireLocks() {
+    size_t entries = redolog.size();
+    for (size_t i = 0; i < entries; ++i) {
+      orec_t *o = globals.orecs.get(redolog.get_address(i));
+      local_orec_t pre;
+      pre.all = o->curr;
+
+      // If lock unheld, acquire; abort on fail to acquire
+      if (pre.all <= start_time) {
+        if (!o->curr.compare_exchange_strong(pre.all, my_lock))
+          abortTx();
+        o->prev = pre.all;
+        lockset.push_back(o);
+      }
+      // If lock is not held by me, abort
+      else if (pre.all != my_lock)
+        abortTx();
+    }
+  }
+
+  /// Release the locks held by this transaction
+  void releaseLocks(uint64_t end_time) {
+    // NB: there may be unnecessary fences in this loop
+    for (auto o : lockset)
+      if (o->curr == my_lock)
+        o->curr = end_time;
+  }
+};
diff --git a/artifact/policies/xSTM/libs/stm_instances/exo_eager_c1_q.cc b/artifact/policies/xSTM/libs/stm_instances/exo_eager_c1_q.cc
new file mode 100644
index 0000000..872dcf5
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/exo_eager_c1_q.cc
@@ -0,0 +1,34 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - exotm_ps check-once orecs
+/// - rdtscp clock
+/// - undo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/exo_eager_c1.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+
+typedef ExoEagerC1<true, ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>> TxThread;
+
+template <bool Q, class C>
+typename ExoEagerC1<Q, C>::Globals ExoEagerC1<Q, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/exo_eager_c2_q.cc b/artifact/policies/xSTM/libs/stm_instances/exo_eager_c2_q.cc
new file mode 100644
index 0000000..c141ccf
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/exo_eager_c2_q.cc
@@ -0,0 +1,34 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - exotm_ps check-twice orecs
+/// - rdtscp clock
+/// - undo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/exo_eager_c2.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+
+typedef ExoEagerC2<true, ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>> TxThread;
+
+template <bool Q, class C>
+typename ExoEagerC2<Q, C>::Globals ExoEagerC2<Q, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/exo_lazy_c1_q.cc b/artifact/policies/xSTM/libs/stm_instances/exo_lazy_c1_q.cc
new file mode 100644
index 0000000..897e889
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/exo_lazy_c1_q.cc
@@ -0,0 +1,34 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - exotm_ps check-once orecs
+/// - rdtscp clock
+/// - redo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/exo_lazy_c1.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+
+typedef ExoLazyC1<true, ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>> TxThread;
+
+template <bool Q, class C>
+typename ExoLazyC1<Q, C>::Globals ExoLazyC1<Q, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/exo_lazy_c2_q.cc b/artifact/policies/xSTM/libs/stm_instances/exo_lazy_c2_q.cc
new file mode 100644
index 0000000..3250dc5
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/exo_lazy_c2_q.cc
@@ -0,0 +1,34 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - exotm_ps check-twice orecs
+/// - rdtscp clock
+/// - redo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/exo_lazy_c2.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+
+typedef ExoLazyC2<true, ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>> TxThread;
+
+template <bool Q, class C>
+typename ExoLazyC2<Q, C>::Globals ExoLazyC2<Q, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/clone.h b/artifact/policies/xSTM/libs/stm_instances/include/clone.h
new file mode 100644
index 0000000..771b4dd
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/clone.h
@@ -0,0 +1,78 @@
+/// clone.h implements the portion of the API required for managing the mappings
+/// between nontransactional functions and their transactional clones.  It also
+/// implements the portion of the API that is invoked when the compiler cannot
+/// find a clone
+
+#pragma once
+
+#include <unordered_map>
+
+namespace {
+/// The function pointer translation table that TM manipulates in order to track
+/// mappings from functions to their clones
+///
+/// WARNING: This version is not thread safe.  If a program were to have have
+///       dynamic loading and unloading of shared objects during its execution,
+///       this implementation would not be correct.  For the time being, we
+///       assume that a programmer instantiating the TM API will be aware of the
+///       problem, because of the use of a "THREAD_UNSAFE" macro (below).
+///       Longer-term, we need to provide an epoch-based mechanism, where
+///       threads wait to deregister a shared object.  Note that in the current
+///       setting, as long as shared objects are only loaded and unloaded inside
+///       of (irrevocable) transactions, there will be no races.
+///
+/// Note: Some TMs (CGL, HTM_GL) don't use clones, so we have to be able to
+///       compile even when these functions are never called.  Hence the pragmas
+///       to avoid compiler warnings.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+std::unordered_map<void *, void *> *clone_table;
+#pragma clang diagnostic pop
+
+/// Perform a lookup in the clone_table: if a clone is not found, return nullptr
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+void *get_clone(void *func) {
+  auto found = clone_table->find(func);
+  if (found != clone_table->end())
+    return found->second;
+  return nullptr;
+}
+#pragma clang diagnostic pop
+} // namespace
+
+/// Create the API functions that are used for interacting with the clone table.
+/// TM_REG_CLONE is used when main begins, or a shared library is loading, to
+/// add mappings to the clone table.  TM_UNSAFE is for when the compiler can't
+/// find a clone, or can't run an instruction transactionally.  TM_TRANSLATE
+/// calls are inserted by the compiler when it cannot find a clone statically.
+#define API_TM_CLONES_THREAD_UNSAFE                                            \
+  extern "C" {                                                                 \
+  void TM_REG_CLONE(void *from, void *to) {                                    \
+    if (clone_table == nullptr)                                                \
+      clone_table = new std::unordered_map<void *, void *>();                  \
+    clone_table->insert({from, to});                                           \
+  }                                                                            \
+  void TM_UNSAFE() { get_self()->becomeIrrevocable(); }                        \
+  void *TM_TRANSLATE_CALL(void *func) {                                        \
+    auto found = clone_table->find(func);                                      \
+    if (found == clone_table->end()) {                                         \
+      TM_UNSAFE();                                                             \
+      return func;                                                             \
+    } else {                                                                   \
+      return found->second;                                                    \
+    }                                                                          \
+  }                                                                            \
+  }
+
+/// Create the API functions that are used for interacting with a nonexistent
+/// clone table.  This is specifically for single-lock TMs, like MUTEX, which
+/// are always irrevocable, or HTM-only TMs.
+#define API_TM_CLONES_ALWAYS_IRREVOC                                           \
+  extern "C" {                                                                 \
+  void TM_REG_CLONE(void *from, void *to) {}                                   \
+  /* This function will never be called, because we don't deal with clones */  \
+  /* in this file or in the HTM/CGL version of TM_EXECUTE_C. */                \
+  void TM_UNSAFE() { std::terminate(); }                                       \
+  void *TM_TRANSLATE_CALL(void *func) { return func; }                         \
+  }
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/execute.h b/artifact/policies/xSTM/libs/stm_instances/include/execute.h
new file mode 100644
index 0000000..0514db2
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/execute.h
@@ -0,0 +1,125 @@
+/// execute.h implements the portion of the API related to launching
+/// transactions and managing transaction descriptors.
+
+#pragma once
+
+#include <functional>
+#include <setjmp.h>
+
+#include "../../../common/tm_defines.h"
+
+/// Create helper methods that create a thread-local pointer to the TxThread,
+/// and help a caller to get/construct one
+#define API_TM_DESCRIPTOR                                                      \
+  namespace {                                                                  \
+  thread_local TxThread *self = nullptr;                                       \
+  static TxThread *get_self() {                                                \
+    if (__builtin_expect(self == nullptr, false)) {                            \
+      self = new TxThread();                                                   \
+    }                                                                          \
+    return self;                                                               \
+  }                                                                            \
+  }
+
+/// Create the API functions that are used to launch transactions.  These are
+/// the versions for when we are not concerned about exceptions escaping from
+/// transactions (that is, they don't use try/catch internally).
+///
+/// EXECUTE_C_INTERNAL is the call when the C compiler could find a clone.
+/// EXECUTE_C is for when the compiler could not find a clone at compile time,
+/// and needs to do the lookup at run time.  EXECUTE is for the C++ API, which
+/// always uses a lambda, and thus doesn't need to worry about lookup.
+#define API_TM_EXECUTE_NOEXCEPT                                                \
+  extern "C" {                                                                 \
+  void TM_EXECUTE_C_INTERNAL(void *, void (*)(void *), void *args,             \
+                             void (*anno_func)(void *)) {                      \
+    /* get TxThread before making checkpoint, so it doesn't re-run on abort */ \
+    TxThread *self = get_self();                                               \
+    jmp_buf _jmpbuf;                                                           \
+    setjmp(_jmpbuf);                                                           \
+    self->beginTx(&_jmpbuf);                                                   \
+    anno_func(args);                                                           \
+    self->commitTx();                                                          \
+  }                                                                            \
+  void TM_EXECUTE_C(void *, void (*func)(void *), void *args) {                \
+    /* casting function ptr to void* is illegal, but it works on x86 */        \
+    union {                                                                    \
+      void *voidstar;                                                          \
+      TM_C_FUNC cfunc;                                                         \
+    } clone;                                                                   \
+    clone.voidstar = get_clone((void *)func);                                  \
+    /* get TxThread before making checkpoint, so it doesn't re-run on abort */ \
+    TxThread *self = get_self();                                               \
+    jmp_buf _jmpbuf;                                                           \
+    setjmp(_jmpbuf);                                                           \
+    self->beginTx(&_jmpbuf);                                                   \
+    /* If no clone, become irrevocable */                                      \
+    if (clone.voidstar == nullptr) {                                           \
+      self->becomeIrrevocable();                                               \
+      func(args);                                                              \
+    } else {                                                                   \
+      clone.cfunc(args);                                                       \
+    }                                                                          \
+    self->commitTx();                                                          \
+  }                                                                            \
+  void TM_EXECUTE(void *, std::function<void(TM_OPAQUE *)> func) {             \
+    /* get TxThread before making checkpoint, so it doesn't re-run on abort */ \
+    TxThread *self = get_self();                                               \
+    jmp_buf _jmpbuf;                                                           \
+    setjmp(_jmpbuf);                                                           \
+    self->beginTx(&_jmpbuf);                                                   \
+    func((TM_OPAQUE *)0xCAFE);                                                 \
+    self->commitTx();                                                          \
+  }                                                                            \
+  bool TM_RAII_BEGIN(jmp_buf &buffer) {                                        \
+    TxThread *self = get_self();                                               \
+    self->beginTx(&buffer);                                                    \
+    return true;                                                               \
+  }                                                                            \
+  void TM_RAII_END() {                                                         \
+    TxThread *self = get_self();                                               \
+    self->commitTx();                                                          \
+  }                                                                            \
+  }
+
+/// Create the API functions that are used to launch transactions.  These are
+/// the versions for when we never run instrumented code (e.g., HTM, Mutex)
+#define API_TM_EXECUTE_NOEXCEPT_NOINST                                         \
+  extern "C" {                                                                 \
+  void TM_EXECUTE_C_INTERNAL(void *, void (*func)(void *), void *args,         \
+                             void (*)(void *)) {                               \
+    /* get TxThread before making checkpoint, so it doesn't re-run on abort */ \
+    TxThread *self = get_self();                                               \
+    self->beginTx();                                                           \
+    func(args);                                                                \
+    self->commitTx();                                                          \
+  }                                                                            \
+  void TM_EXECUTE_C(void *, void (*func)(void *), void *args) {                \
+    /* get TxThread before making checkpoint, so it doesn't re-run on abort */ \
+    TxThread *self = get_self();                                               \
+    self->beginTx();                                                           \
+    func(args);                                                                \
+    self->commitTx();                                                          \
+  }                                                                            \
+  void TM_EXECUTE(void *, std::function<void(TM_OPAQUE *)> func) {             \
+    /* get TxThread before making checkpoint, so it doesn't re-run on abort */ \
+    TxThread *self = get_self();                                               \
+    self->beginTx();                                                           \
+    func(0);                                                                   \
+    self->commitTx();                                                          \
+  }                                                                            \
+  void TM_RAII_LITE_BEGIN() {                                                  \
+    TxThread *self = get_self();                                               \
+    self->beginTx();                                                           \
+  }                                                                            \
+  void TM_RAII_LITE_END() { self->commitTx(); }                                \
+  bool TM_RAII_BEGIN(jmp_buf &) {                                              \
+    TxThread *self = get_self();                                               \
+    self->beginTx();                                                           \
+    return false;                                                              \
+  }                                                                            \
+  void TM_RAII_END() {                                                         \
+    TxThread *self = get_self();                                               \
+    self->commitTx();                                                          \
+  }                                                                            \
+  }
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/frame.h b/artifact/policies/xSTM/libs/stm_instances/include/frame.h
new file mode 100644
index 0000000..2c20ba3
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/frame.h
@@ -0,0 +1,28 @@
+/// frame.h implements the portion of the API required for the stack frame
+/// optimization.  It also provides support for commit handlers.
+
+#pragma once
+
+#include "../../../common/tm_defines.h"
+
+/// Create the API function that can be explicitly called from a program in
+/// order to change the bottom of the transactional stack, and create a function
+/// for registering commit handlers.
+#define API_TM_STACKFRAME_OPT                                                  \
+  extern "C" {                                                                 \
+  void TM_SET_STACKFRAME(void *addr) { get_self()->adjustStackBottom(addr); }  \
+  void TM_COMMIT_HANDLER(void (*func)(void *), void *args) {                   \
+    get_self()->registerCommitHandler(func, args);                             \
+  }                                                                            \
+  }
+
+/// Create a dummy version of the API function that can be explicitly called
+/// from a program in order to change the bottom of the transactional stack.
+/// Also provide a correct function for registering commit handlers.
+#define API_TM_STACKFRAME_NAIVE                                                \
+  extern "C" {                                                                 \
+  void TM_SET_STACKFRAME(void *) {}                                            \
+  void TM_COMMIT_HANDLER(void (*func)(void *), void *args) {                   \
+    get_self()->registerCommitHandler(func, args);                             \
+  }                                                                            \
+  }
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/loadstore.h b/artifact/policies/xSTM/libs/stm_instances/include/loadstore.h
new file mode 100644
index 0000000..c72b8ea
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/loadstore.h
@@ -0,0 +1,40 @@
+/// loadstore.h implements the portion of the API required for transactional
+/// loads and stores
+
+#pragma once
+
+/// Create the API functions that are substituted for memory loads from within a
+/// transaction
+#define API_TM_LOADFUNCS                                                       \
+  extern "C" {                                                                 \
+  uint8_t TM_LOAD_U1(uint8_t *ptr) { return get_self()->read<>(ptr); }         \
+  uint16_t TM_LOAD_U2(uint16_t *ptr) { return get_self()->read<>(ptr); }       \
+  uint32_t TM_LOAD_U4(uint32_t *ptr) { return get_self()->read<>(ptr); }       \
+  uint64_t TM_LOAD_U8(uint64_t *ptr) { return get_self()->read<>(ptr); }       \
+  float TM_LOAD_F(float *ptr) { return get_self()->read<>(ptr); }              \
+  double TM_LOAD_D(double *ptr) { return get_self()->read<>(ptr); }            \
+  long double TM_LOAD_LD(long double *ptr) { return get_self()->read<>(ptr); } \
+  void *TM_LOAD_P(void **ptr) { return get_self()->read<>(ptr); }              \
+  }
+
+/// Create the API functions that are substituted for memory stores  from within
+/// a transaction
+#define API_TM_STOREFUNCS                                                      \
+  extern "C" {                                                                 \
+  void TM_STORE_U1(uint8_t val, uint8_t *ptr) { get_self()->write(ptr, val); } \
+  void TM_STORE_U2(uint16_t val, uint16_t *ptr) {                              \
+    get_self()->write(ptr, val);                                               \
+  }                                                                            \
+  void TM_STORE_U4(uint32_t val, uint32_t *ptr) {                              \
+    get_self()->write(ptr, val);                                               \
+  }                                                                            \
+  void TM_STORE_U8(uint64_t val, uint64_t *ptr) {                              \
+    get_self()->write(ptr, val);                                               \
+  }                                                                            \
+  void TM_STORE_F(float val, float *ptr) { get_self()->write(ptr, val); }      \
+  void TM_STORE_D(double val, double *ptr) { get_self()->write(ptr, val); }    \
+  void TM_STORE_LD(long double val, long double *ptr) {                        \
+    get_self()->write(ptr, val);                                               \
+  }                                                                            \
+  void TM_STORE_P(void *val, void **ptr) { get_self()->write(ptr, val); }      \
+  }
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/mem.h b/artifact/policies/xSTM/libs/stm_instances/include/mem.h
new file mode 100644
index 0000000..9a21e8a
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/mem.h
@@ -0,0 +1,37 @@
+/// mem.h implements the portion of the API required for malloc/free and
+/// memcpy/memset/memmove
+
+#pragma once
+
+#include "memfuncs.h"
+
+/// Create the API functions that are substituted for transactional calls to
+/// malloc(), aligned_malloc() and free().
+#define API_TM_MALLOC_FREE                                                     \
+  extern "C" {                                                                 \
+  void *TM_MALLOC(size_t size) { return get_self()->txAlloc(size); }           \
+  void *TM_ALIGNED_ALLOC(size_t align, size_t size) {                          \
+    return get_self()->txAAlloc(align, size);                                  \
+  }                                                                            \
+  void TM_FREE(void *ptr) { get_self()->txFree(ptr); }                         \
+  }
+
+/// Create the API functions that are substituted for transactional calls to
+/// memcpy, memset, and memmove.  These are the generic versions, which do
+/// nothing special to optimize instrumentation (i.e., no bulk undo logging,
+/// bulk ownership acquisition, etc.).
+///
+/// NB: By way of a transaction's "irrevocable" state, these can simply forward
+/// to the corresponding C library functions
+#define API_TM_MEMFUNCS_GENERIC                                                \
+  extern "C" {                                                                 \
+  void *TM_MEMCPY(void *dest, const void *src, size_t count, size_t align) {   \
+    return tx_basic_memcpy(dest, src, count, align, *get_self());              \
+  }                                                                            \
+  void *TM_MEMSET(void *dest, int ch, size_t count) {                          \
+    return tx_basic_memset(dest, ch, count, *get_self());                      \
+  }                                                                            \
+  void *TM_MEMMOVE(void *dest, const void *src, size_t count) {                \
+    return tx_basic_memmove(dest, src, count, *get_self());                    \
+  }                                                                            \
+  }
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/memfuncs.h b/artifact/policies/xSTM/libs/stm_instances/include/memfuncs.h
new file mode 100644
index 0000000..cc04eb7
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/memfuncs.h
@@ -0,0 +1,135 @@
+/// memfuncs.h provides generic transactional versions of the standard memory
+/// functions (memcpy, memset, memmove) from the C library's string.h header.
+/// Any STM implementation should be able to call these, passing in a reference
+/// to the caller's transaction descriptor in order to provide the template with
+/// the ability to make appropriate read/write calls.
+
+#pragma once
+
+#include <cstring>
+
+/// Basic transactional memcpy.  This does not optimize in any way for specific
+/// TM algorithms (e.g., by doing a bulk orec acquire or a bulk undo logging).
+/// Instead, it moves through the input and output regions, one chunk at a time,
+/// performing an instrumented read from the former and an instrumented write to
+/// the latter.
+///
+/// Note that this code *does* handle allignment and jagged start and end
+/// points.
+template <class T>
+void *tx_basic_memcpy(void *dest, const void *src, size_t len, size_t, T &t) {
+  if (t.isIrrevoc()) {
+    memcpy(dest, src, len);
+    return dest;
+  }
+  uintptr_t dp = (uintptr_t)dest, sp = (uintptr_t)src;
+  uint8_t *d = (uint8_t *)dest, *s = (uint8_t *)src;
+  // slowpath for incompatible alignment
+  if (dp % 8 != sp % 8) {
+    while (len--) {
+      t.write(d, t.read(s)); // instrumented
+      ++d;
+      ++s;
+    }
+  }
+  // compatible alignment
+  else {
+    // advance to next boundary
+    while (len > 0 && dp % sizeof(uint64_t) != 0) {
+      t.write(d, t.read(s)); // instrumented
+      ++d;
+      ++s;
+      ++dp;
+      --len;
+    }
+    if (len == (size_t)-1)
+      return dest;
+    // aligned copying
+    while (len >= sizeof(uint64_t)) {
+      t.write((uint64_t *)d, t.read((uint64_t *)s)); // instrumented, wide
+      d += sizeof(uint64_t);
+      s += sizeof(uint64_t);
+      len -= sizeof(uint64_t);
+    }
+    // handle unaligned end
+    while (len--) {
+      t.write(d, t.read(s)); // instrumented
+      ++d;
+      ++s;
+    }
+  }
+  return dest;
+}
+
+/// Basic transactional memset.  This does not optimize in any way for specific
+/// TM algorithms (e.g., by doing a bulk orec acquire).
+/// Instead, it moves through the output region, one chunk at a time,
+/// performing an instrumented write at each position.
+///
+/// Note that this code *does* handle allignment and jagged start and end
+/// points.
+template <class T>
+void *tx_basic_memset(void *dest, int val, size_t len, T &t) {
+  if (t.isIrrevoc()) {
+    memset(dest, val, len);
+    return dest;
+  }
+  uintptr_t dp = (uintptr_t)dest;
+  uint8_t *d = (uint8_t *)dest, v = val;
+  // build wide content
+  uint64_t vv = v | v << 8 | v << 16 | v << 24;
+  vv = vv << 32 | vv;
+  // advance until aligned
+  while (len > 0 && dp % sizeof(uint64_t) != 0) {
+    t.write(d, v); // instrumented
+    ++d;
+    ++dp;
+    --len;
+  }
+  if (len == (size_t)-1)
+    return dest;
+  // aligned, wide writes
+  while (len >= sizeof(uint64_t)) {
+    t.write((uint64_t *)d, vv); // instrumented
+    d += sizeof(uint64_t);
+    len -= sizeof(uint64_t);
+  }
+  // end is unaligned?
+  while (len--) {
+    t.write(d, v); // instrumented
+    ++d;
+  }
+  return dest;
+}
+
+/// Basic transactional memmove.  This does not optimize in any way for specific
+/// TM algorithms (e.g., by doing a bulk orec acquire or bulk undo logging).
+/// Instead, it moves through the input and output regions, copying one byte at
+/// a time using instrumented reads and writes.
+///
+/// Note that this code *does* handle allignment and jagged start and end
+/// points.
+template <class T>
+void *tx_basic_memmove(void *dest, const void *src, size_t len, T &t) {
+  if (t.isIrrevoc()) {
+    memmove(dest, src, len);
+    return dest;
+  }
+
+  uint8_t *d = (uint8_t *)(dest), *s = (uint8_t *)(const_cast<void *>(src));
+  if (d < s) {
+    while (len--) {
+      t.write(d, t.read(s));
+      ++d;
+      ++s;
+    }
+  } else {
+    uint8_t *lasts = s + (len - 1), *lastd = d + (len - 1);
+    while (len--) {
+      t.write(lastd, t.read(lasts));
+      --lastd;
+      --lasts;
+    }
+  }
+  return dest;
+}
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/include/stats.h b/artifact/policies/xSTM/libs/stm_instances/include/stats.h
new file mode 100644
index 0000000..6ddf113
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/include/stats.h
@@ -0,0 +1,12 @@
+/// stats.h implements the portion of the API required for reporting statistics
+/// gathered by the TM algorithm
+
+#pragma once
+
+/// Create the API functions that can be explicitly called from a program in
+/// order to report stats.  This version is for TM implementations that don't
+/// actually count stats.
+#define API_TM_STATS_NOP                                                       \
+  extern "C" {                                                                 \
+  void TM_REPORT_ALL_STATS() {}                                                \
+  }
\ No newline at end of file
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c1_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c1_q.cc
new file mode 100644
index 0000000..6f45d76
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c1_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-once orecs
+/// - gv1 clock
+/// - undo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_eager_c1.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecEagerC1<OrecTable<NUM_STRIPES, OREC_COVERAGE, CounterTimesource>,
+                    IrrevocQuiesceEpochManager<MAX_THREADS>,
+                    ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecEagerC1<O, E, C>::Globals OrecEagerC1<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c2_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c2_q.cc
new file mode 100644
index 0000000..d4a0a8d
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_eager_c2_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-twice orecs
+/// - gv1 clock
+/// - undo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_eager_c2.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecEagerC2<OrecTable<NUM_STRIPES, OREC_COVERAGE, CounterTimesource>,
+                    IrrevocQuiesceEpochManager<MAX_THREADS>,
+                    ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecEagerC2<O, E, C>::Globals OrecEagerC2<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c1_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c1_q.cc
new file mode 100644
index 0000000..2047bdb
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c1_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-once orecs
+/// - gv1 clock
+/// - redo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_lazy_c1.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecLazyC1<OrecTable<NUM_STRIPES, OREC_COVERAGE, CounterTimesource>,
+                   IrrevocQuiesceEpochManager<MAX_THREADS>,
+                   ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecLazyC1<O, E, C>::Globals OrecLazyC1<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c2_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c2_q.cc
new file mode 100644
index 0000000..22b4a0d
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_gv1_lazy_c2_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-twice orecs
+/// - gv1 clock
+/// - redo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_lazy_c2.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecLazyC2<OrecTable<NUM_STRIPES, OREC_COVERAGE, CounterTimesource>,
+                   IrrevocQuiesceEpochManager<MAX_THREADS>,
+                   ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecLazyC2<O, E, C>::Globals OrecLazyC2<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c1_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c1_q.cc
new file mode 100644
index 0000000..d32027a
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c1_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-once orecs
+/// - rdtscp clock
+/// - undo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_eager_c1.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecEagerC1<OrecTable<NUM_STRIPES, OREC_COVERAGE, RdtscpTimesource>,
+                    IrrevocQuiesceEpochManager<MAX_THREADS>,
+                    ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecEagerC1<O, E, C>::Globals OrecEagerC1<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c2_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c2_q.cc
new file mode 100644
index 0000000..f006552
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_eager_c2_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-twice orecs
+/// - rdtscp clock
+/// - undo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_eager_c2.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecEagerC2<OrecTable<NUM_STRIPES, OREC_COVERAGE, RdtscpTimesource>,
+                    IrrevocQuiesceEpochManager<MAX_THREADS>,
+                    ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecEagerC2<O, E, C>::Globals OrecEagerC2<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c1_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c1_q.cc
new file mode 100644
index 0000000..fbe0345
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c1_q.cc
@@ -0,0 +1,32 @@
+#include "../stm_algs/orec_lazy_c1.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecLazyC1<OrecTable<NUM_STRIPES, OREC_COVERAGE, RdtscpTimesource>,
+                   IrrevocQuiesceEpochManager<MAX_THREADS>,
+                   ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecLazyC1<O, E, C>::Globals OrecLazyC1<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c2_q.cc b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c2_q.cc
new file mode 100644
index 0000000..3de7bab
--- /dev/null
+++ b/artifact/policies/xSTM/libs/stm_instances/orec_tsc_lazy_c2_q.cc
@@ -0,0 +1,39 @@
+/// An xSTM algorithm instantiation with the following features:
+/// - traditional check-twice orecs
+/// - rdtscp clock
+/// - redo logging
+/// - quiescence and irrevocability
+/// - exponential backoff for contention management
+
+#include "../stm_algs/orec_lazy_c2.h"
+
+#include "include/clone.h"
+#include "include/execute.h"
+#include "include/frame.h"
+#include "include/loadstore.h"
+#include "include/mem.h"
+#include "include/stats.h"
+
+#include "../include/cm.h"
+#include "../include/constants.h"
+#include "../include/epochs.h"
+#include "../include/orec_t.h"
+#include "../include/timesource.h"
+
+typedef OrecLazyC2<OrecTable<NUM_STRIPES, OREC_COVERAGE, RdtscpTimesource>,
+                   IrrevocQuiesceEpochManager<MAX_THREADS>,
+                   ExpBackoffCM<BACKOFF_MIN, BACKOFF_MAX>>
+    TxThread;
+
+template <class O, class E, class C>
+typename OrecLazyC2<O, E, C>::Globals OrecLazyC2<O, E, C>::globals;
+
+API_TM_DESCRIPTOR;
+API_TM_MALLOC_FREE;
+API_TM_MEMFUNCS_GENERIC;
+API_TM_LOADFUNCS;
+API_TM_STOREFUNCS;
+API_TM_STATS_NOP;
+API_TM_EXECUTE_NOEXCEPT;
+API_TM_CLONES_THREAD_UNSAFE;
+API_TM_STACKFRAME_OPT;
diff --git a/artifact/policies/xSTM/libs/tm_names.mk b/artifact/policies/xSTM/libs/tm_names.mk
new file mode 100644
index 0000000..c144762
--- /dev/null
+++ b/artifact/policies/xSTM/libs/tm_names.mk
@@ -0,0 +1,10 @@
+# This file provides the names of all of our TM algorithms, in a format that 
+# can be used by Makefiles
+STM_NAMES = orec_gv1_eager_c1_q orec_gv1_eager_c2_q \
+            orec_tsc_eager_c1_q orec_tsc_eager_c2_q \
+            orec_gv1_lazy_c1_q  orec_gv1_lazy_c2_q  \
+            orec_tsc_lazy_c1_q  orec_tsc_lazy_c2_q  \
+            exo_eager_c1_q      exo_eager_c2_q      \
+            exo_lazy_c1_q       exo_lazy_c2_q
+
+TM_LIB_NAMES = $(STM_NAMES)
diff --git a/artifact/policies/xSTM/plugin/Makefile b/artifact/policies/xSTM/plugin/Makefile
new file mode 100644
index 0000000..61d5b4e
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/Makefile
@@ -0,0 +1,5 @@
+all:
+	$(MAKE) -C plugin
+
+clean:
+	$(MAKE) -C plugin clean
diff --git a/artifact/policies/xSTM/plugin/plugin/.gitignore b/artifact/policies/xSTM/plugin/plugin/.gitignore
new file mode 100644
index 0000000..a153af7
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/.gitignore
@@ -0,0 +1,3 @@
+build/
+.DS_Store
+.idea/
diff --git a/artifact/policies/xSTM/plugin/plugin/CMakeLists.txt b/artifact/policies/xSTM/plugin/plugin/CMakeLists.txt
new file mode 100644
index 0000000..b11ef08
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/CMakeLists.txt
@@ -0,0 +1,61 @@
+#
+# Standard configuration when building anything related to LLVM
+#
+cmake_minimum_required(VERSION 3.1)
+project(llvm_transmem)
+find_package(LLVM REQUIRED CONFIG)
+add_definitions(${LLVM_DEFINITIONS})
+include_directories(${LLVM_INCLUDE_DIRS})
+link_directories(${LLVM_LIBRARY_DIRS})
+
+#
+# We will build two libraries: libtmplugin.so and libtmplugin_wo.so.  Each is a
+# plugin that we run when compiling, in order to achieve transactional
+# instrumentation.  They are built from the same code base, but with different
+# #define-based customizations
+#
+
+#
+# Files associated with libtmplugin.so... this is the default version of the
+# plugin
+#
+add_library(tmplugin MODULE
+    tm_plugin.cc
+    signatures.cc
+    mappings.cc
+    discovery.cc
+    function_transform.cc
+    boundary_transform.cc
+    raii_lite.cc
+    optimizations.cc
+)
+
+#
+# Set compiler features that are the same for both libraries
+#
+set_property(TARGET tmplugin PROPERTY CXX_STANDARD 17)
+target_compile_features(tmplugin PRIVATE cxx_range_for cxx_auto_type)
+set_target_properties(tmplugin PROPERTIES COMPILE_FLAGS "-fno-rtti")
+
+# For debugging the plugin:
+# set(CMAKE_BUILD_TYPE Debug)
+
+#
+# Set up compiler flags that let us build two libraries with different behaviors
+#
+set_target_properties(tmplugin PROPERTIES COMPILE_FLAGS "-DINST_READ=true")
+
+#
+# per https://github.com/sampsyo/llvm-pass-skeleton/issues/7, we need to
+# link these .so files specially since we are still using the legacy pass
+# manager
+#
+set_target_properties(tmplugin PROPERTIES LINK_FLAGS "-Wl,-znodelete")
+
+#
+# OS X-specific configuration.  Not tested, as we always work in Docker, but
+# still good to have for reference.
+#
+if(APPLE)
+    set_target_properties(tmplugin PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+endif(APPLE)
diff --git a/artifact/policies/xSTM/plugin/plugin/Makefile b/artifact/policies/xSTM/plugin/plugin/Makefile
new file mode 100644
index 0000000..5a60666
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/Makefile
@@ -0,0 +1,10 @@
+CMAKE = cmake
+ODIR  = build
+
+all:
+	$(CMAKE) -B $(ODIR)
+	$(MAKE) -C $(ODIR)
+
+clean:
+	rm -rf $(ODIR)
+
diff --git a/artifact/policies/xSTM/plugin/plugin/README.md b/artifact/policies/xSTM/plugin/plugin/README.md
new file mode 100644
index 0000000..a4fa630
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/README.md
@@ -0,0 +1,34 @@
+# Compiler Plug In: Full Version
+
+This folder contains the source code for an LLVM plugin that performs
+transactional instrumentation.  The plugin will transform a program by inserting
+calls to a transactional library.  At link time, one of many libraries can be
+linked to the program to achieve the desired TM instrumentation.
+
+## Learning About the Plugin
+
+For more information about the behavior of the plugin, see `tm_plugin.h`.  It
+describes the behavior of the plugin and its structure.
+
+## Customizing the Plugin
+
+The file `local_config.h` enables users of the plugin to modify its behavior at
+compile time.  See `local_config.h` for more details about available
+customizations.
+
+## Notes about Code Quality
+
+The folder `tests/` contains a set of unit tests that ensure that the plugin
+behaves correctly across all of the cases we could come up with.  We are not
+using a comprehensive test suite, so it is possible that we have less than 100%
+coverage.  However, the tests are extensive enough to cover all of the expected
+behaviors of the plugin.
+
+This is not production-quality code.  Within the code, comments prefixed with
+`WARNING` represent places where production-quality code would need to be
+hardened relative to this code.
+
+The plugin also does not currently handle the "-g" flag correctly in all cases.
+
+Finally, please note that there are a few places (most notably handling of `long
+double`) where the plugin assumes that it is running on an x86 CPU.
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/boundary_transform.cc b/artifact/policies/xSTM/plugin/plugin/boundary_transform.cc
new file mode 100644
index 0000000..aa6c0ab
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/boundary_transform.cc
@@ -0,0 +1,108 @@
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#include "../../common/tm_defines.h"
+
+#include "tm_plugin.h"
+
+using namespace llvm;
+
+/// Handle conversion of transaction region begin/end for C API
+void tm_plugin::convert_region_begin_c_api(llvm::Module &M) {
+  // Iterate through the instructions of each function in the module
+  for (auto fn = M.getFunctionList().begin(), e = M.getFunctionList().end();
+       fn != e; ++fn) {
+    for (BasicBlock &B : (*fn)) {
+      BasicBlock::iterator I = B.begin();
+      while (I != B.end()) {
+        // If we find a callsite to TM_EXECUTE_C, we can translate it
+        if (isa<CallInst>(*I) || isa<InvokeInst>(*I)) {
+          CallBase *CS = cast<CallBase>(&*I);
+          if (Function *Callee = CS->getCalledFunction()) {
+            if (Callee->getName() == TM_EXECUTE_C_STR) {
+              // Find the called function (arg 1) and the clone version of it.
+              //
+              // NB: If we get a function pointer, then we don't transform the
+              //     call.  It's on the TM library to do a dynamic translation
+              //     in this case.
+              if (Function *Orig = dyn_cast<Function>(CS->getArgOperand(1))) {
+                if (Function *ClonedFunc = get_clone(Orig)) {
+                  // Create the replacement call instruction... it can be a call
+                  // or an invoke
+                  SmallVector<Value *, 3> Args(CS->arg_begin(), CS->arg_end());
+                  Args.push_back(dyn_cast<Value>(ClonedFunc));
+                  Instruction *NewCI;
+                  if (isa<InvokeInst>(*I)) {
+                    InvokeInst *invokeinst = dyn_cast<InvokeInst>(I);
+                    NewCI = InvokeInst::Create(sigs.get_func(signatures::CAPI),
+                                               invokeinst->getNormalDest(),
+                                               invokeinst->getUnwindDest(),
+                                               Args, "");
+                  } else {
+                    NewCI = CallInst::Create(sigs.get_func(signatures::CAPI),
+                                             Args, "");
+                  }
+                  // Replace the instruction, and update the iterator
+                  ReplaceInstWithInst(dyn_cast<Instruction>(&*I), NewCI);
+                  I = BasicBlock::iterator(NewCI);
+                }
+              }
+            }
+          }
+        }
+        I++;
+      }
+    }
+  }
+}
+
+/// Modify the body of any lambda that is going to be called, so that it has a
+/// mechanism for calling its clone instead.  Note that these bodies are always
+/// going to be transactional-only, so we don't need to worry about calls from a
+/// nontransactional context.
+///
+/// NB: At -O3 optimization, the clone ends up inlining into the lambda :)
+void tm_plugin::convert_lambdas_cxx_api(Module &M) {
+  // We do this for every lambda that we identified before:
+  for (auto i : lambdas) {
+    // WARNING: If we don't have a clone, we fail silently.  That's probably not
+    //          the right behavior, but we think that there will always be a
+    //          clone.  If not, should we just insert a call to UNSAFE?
+    auto clone = functions.find(i);
+    if (clone == functions.end()) {
+      continue;
+    }
+
+    // We need the args to the function, so we can use them to call the clone
+    Function *fn = i;
+    Function::arg_iterator args = fn->arg_begin();
+    Value *obj = dyn_cast<Value>(args++);
+    Value *opaque = dyn_cast<Value>(args++); // expected to be TM_OPAQUE
+
+    // We also need a nullptr value of type TM_OPAQUE
+    Type *opaque_type = opaque->getType();
+    Constant *opaque_null = Constant::getNullValue(opaque_type);
+
+    // Get the first basic block of the function.  We put everything before it
+    Function::iterator BBI = fn->begin();
+    BasicBlock *BB = dyn_cast<BasicBlock>(BBI);
+
+    // Create two basic blocks.  The first is the new entry to the function, and
+    // holds a condition.  The second is for if the condition is true.  The BB
+    // from above will be the 'false' target of the compare
+    BasicBlock *compare = BasicBlock::Create(M.getContext(), "", fn, BB);
+    BasicBlock *iftrue = BasicBlock::Create(M.getContext(), "", fn, BB);
+
+    // Build the comparison block: if the opaque pointer is not null, go to
+    // iftrue, else to BB
+    IRBuilder<> builder(compare);
+    Value *NEQ = builder.CreateICmpNE(opaque, opaque_null);
+    builder.CreateCondBr(NEQ, iftrue, BB);
+
+    // Build the "if true" block... it just calls the clone and returns
+    builder.SetInsertPoint(iftrue);
+    builder.CreateCall(clone->second.clone, {obj, opaque});
+    builder.CreateRetVoid();
+  }
+}
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/discovery.cc b/artifact/policies/xSTM/plugin/plugin/discovery.cc
new file mode 100644
index 0000000..16fe516
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/discovery.cc
@@ -0,0 +1,312 @@
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#include "../../common/tm_defines.h"
+
+#include "local_config.h"
+#include "tm_plugin.h"
+
+using namespace llvm;
+
+/// Iterate through all of the annotations in the Module, and whenever we find
+/// an annotation that ought to be associated with a Function, we get the
+/// corresponding Function object from the Module and attach the annotation
+/// directly to it.
+void tm_plugin::attach_annotations_to_functions(Module &M) {
+  // Get the global annotations.  If there aren't any, we're done.
+  auto annotations = M.getGlobalVariable("llvm.global.annotations");
+  if (!annotations)
+    return;
+
+  // Iterate through the ConstArray of annotations, for each that is associated
+  // with a function, attach it to the function *regardless of what the
+  // annotation is*
+  auto *CA = dyn_cast<ConstantArray>(annotations->getOperand(0));
+  for (auto &OI : CA->operands()) {
+    auto *e = dyn_cast<ConstantStruct>(OI.get());
+    if (auto *f = dyn_cast<Function>(e->getOperand(0))) {
+      auto anno = cast<ConstantDataArray>(e->getOperand(1)->getOperand(0));
+      f->addFnAttr(anno->getAsCString());
+    }
+  }
+}
+
+/// Populate the pure list with things we know are pure
+void tm_plugin::populate_purelist(Module &M) {
+  // We don't want to instrument TM API calls, or things we know need to be left
+  // alone.
+  //
+  // TODO: __clang_call_terminate and setjmp are not really appropriate, but we
+  //       have to have them for now.
+  for (auto name :
+       {TM_TRANSLATE_CALL_STR, TM_EXECUTE_STR, TM_EXECUTE_C_STR,
+        TM_EXECUTE_C_INTERNAL_STR, TM_SETJUMP_NAME, TM_RAII_CTOR, TM_RAII_DTOR,
+        TM_CLANG_STDTERMINATE, TM_RAII_BEGIN_STR, TM_RAII_END_STR}) {
+    if (Function *f = M.getFunction(name))
+      purelist.push_back(f);
+  }
+
+  // populate the pure list with any additional custom overrides
+  for (const char *name : discovery_pure_overrides) {
+    if (Function *f = M.getFunction(name))
+      purelist.push_back(dyn_cast<Function>(f));
+  }
+}
+
+/// Find the functions in the module that have TM_ANNOTATE, TM_PURE, or
+/// TM_RENAME annotations, and put them in the work list
+void tm_plugin::discover_annotated_funcs(Module &M) {
+  // Go through the functions, searching for annotated definitions.  Note that
+  // annotations can only be attached to definitions, not declarations.
+  for (auto fn = M.getFunctionList().begin(), e = M.getFunctionList().end();
+       fn != e; ++fn) {
+    if (!(fn->isDeclaration())) {
+      if (fn->hasFnAttribute(TM_ANNOTATE_STR)) {
+        func_worklist.push(dyn_cast<Function>(fn));
+      } else if (fn->hasFnAttribute(TM_PURE_STR)) {
+        purelist.push_back(dyn_cast<Function>(fn));
+      }
+
+      // Find functions with the TM_RENAME attribute, change their names, and
+      // put them in the work list.  We assume that these functions have
+      // TM_RENAME as attribute #1
+      //
+      // TODO: revisit this part later.  Why can't we use hasFnAttribute?  Why
+      //       isn't this an ELSE?  Is this really working correctly?
+      auto FnAttr = fn->getAttributes().getFnAttrs();
+      auto AttrSTR = FnAttr.getAsString(1);
+      auto begin_pos = AttrSTR.find(TM_RENAME_STR);
+      if (begin_pos != StringRef::npos) {
+        auto end_pos = AttrSTR.find("\"", begin_pos);
+        if (end_pos != StringRef::npos && end_pos != begin_pos) {
+          fn->setName(Twine(
+              TM_PREFIX_STR,
+              AttrSTR.substr(begin_pos + strlen(TM_RENAME_STR),
+                             end_pos - (begin_pos + strlen(TM_RENAME_STR)))));
+          func_worklist.push(dyn_cast<Function>(fn));
+          // Find the original version of functions that are being renamed, and
+          // put them into the rename list
+          Function *func =
+              M.getFunction(fn->getName().substr(strlen(TM_PREFIX_STR)));
+          renamelist.insert({dyn_cast<Function>(fn), func});
+        }
+      }
+    }
+  }
+}
+
+/// Find the functions in the module that are actually lambda bodies that
+/// conform to our API, and add them to the work list
+void tm_plugin::discover_lambda_funcs(Module &M) {
+  // Strategy: Find all of the call instructions in the program
+  // - If the call is to TM_EXECUTE_STR, then the second argument is the lambda
+  //   object
+  // - Follow def-use chains from the definition of the second argument to GEPs
+  //   to stores of functions.  These are the (pure) manager functions and the
+  //   (inst-needed) lambda bodies.
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue; // Skip declarations
+    for (auto &BB : F) {
+      for (auto &I : BB) {
+        auto *CB = dyn_cast<CallBase>(&I);
+        if (nullptr == CB)
+          continue; // Skip if not a call
+        auto FF = CB->getCalledFunction();
+        if (nullptr == FF)
+          continue; // Skip indirect function calls
+        if (!FF->getName().equals(TM_EXECUTE_STR))
+          continue; // Skip if it's not a call to the lambda executor
+
+        // Find all GEPs that use the Lambda
+        // TODO: when we get rid of TM_OPAQUE, 1 will become 0
+        auto LO = CB->getOperand(1); // The Lambda Object
+        for (auto U : LO->users()) {
+          auto GEP = dyn_cast<GetElementPtrInst>(U);
+          if (GEP == nullptr)
+            continue; // not a GEP
+
+          // Find all uses of the GEP that store a function into the object
+          for (auto UUU : GEP->users()) {
+            auto US = dyn_cast<StoreInst>(UUU);
+            if (US == nullptr)
+              continue; // Not a store
+            auto USF = dyn_cast<Function>(US->getValueOperand());
+            if (nullptr == USF)
+              continue; // Operand not a function
+            auto name = USF->getName();
+            // TODO: don't hard-code strings!
+            if (name.endswith("18_Manager_operation")) {
+              purelist.push_back(USF);
+            } else {
+              // We're going to need a clone of this lambda body
+              func_worklist.push(USF);
+              // We're also going to need to stick a branch in the original
+              // body, so that we can dispatch to instrumented code if the TM
+              // uses instrumentation.
+              lambdas.push_back(USF);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/// Find the functions in the module that are called via TM_EXECUTE_C, and put
+/// them in the work list
+void tm_plugin::discover_capi_funcs(Module &M) {
+  // Find any call to TM_EXECUTE_C in any function's body
+  for (auto fn = M.getFunctionList().begin(), e = M.getFunctionList().end();
+       fn != e; ++fn) {
+    // TODO: On one hand, we should use this style (inst_begin) in
+    //       discover_lambda_funcs.  On the other, use the callbase cast early
+    //       to avoid so much nesting?
+    for (inst_iterator I = inst_begin(*fn), E = inst_end(*fn); I != E; ++I) {
+      if (isa<CallInst>(*I) || isa<InvokeInst>(*I)) {
+        CallBase *CS = cast<CallBase>(&*I);
+        if (Function *Callee = CS->getCalledFunction()) {
+          // If this is a call to TM_EXECUTE_C, then operand 1 is a function
+          // that needs to be processed, unless (a) it's a function pointer
+          // that we don't know how to turn into a function, or (b) the
+          // function we're calling does not have a definition in this TU
+          if (Callee->getName() == TM_EXECUTE_C_STR) {
+            // TODO: when we get rid of flags, 1 becomes 0
+            if (Function *f = dyn_cast<Function>(CS->getArgOperand(1))) {
+              if (!f->isDeclaration()) {
+                func_worklist.push(f);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/// Find annotated constructors, and put them in the work list
+void tm_plugin::discover_constructor(Module &M) {
+  /// a worklist for helping us to remove calls to TM_CTOR during discovery
+  SmallVector<Instruction *, 128> ctor_list;
+
+  // Find any call to TM_CTOR in any function's body
+  //
+  // TODO: again, clean up this list to avoid so much nesting.
+  for (auto fn = M.getFunctionList().begin(), e = M.getFunctionList().end();
+       fn != e; ++fn) {
+    for (inst_iterator I = inst_begin(*fn), E = inst_end(*fn); I != E; ++I) {
+      if (isa<CallInst>(*I) || isa<InvokeInst>(*I)) {
+        CallBase *CS = cast<CallBase>(&*I); // CallSite is CallInst | InvokeInst
+        if (Function *Callee = CS->getCalledFunction()) {
+          if (Callee->getName() == TM_CTOR_STR) {
+            if (Function *parent = CS->getCaller()) {
+              if (!parent->isDeclaration()) {
+                func_worklist.push(parent);
+                ctor_list.push_back(&*I);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  // Remove TM_CTOR calls
+  while (!ctor_list.empty()) {
+    auto *I = ctor_list.pop_back_val();
+    I->eraseFromParent();
+  }
+}
+
+/// Process the worklist until we are sure that there are no TM-reachable
+/// functions that we have not found.  We want to find functions that are in
+/// deep call chains, but we can only handle call chains that are entirely
+/// within a Module.  Inter-module calls are not handled efficiently by our TM
+/// plugin, because annotations are not part of the type system.  Thus
+/// inter-module calls will lead to either (a) dynamic lookup, or (b)
+/// serialization.
+void tm_plugin::discover_reachable_funcs() {
+  // Iterate over the annotated function queue to find all functions that
+  // might need to be visited
+  while (!func_worklist.empty()) {
+    auto fn = func_worklist.front();
+    func_worklist.pop();
+    // TODO: Why don't we filter out pure functions eagerly?
+
+    // If the current function is one we haven't seen before, save it and
+    // process it
+    if (functions.find(fn) == functions.end()) {
+      // Track if the function is a lambda
+      function_features f;
+      f.orig = fn;
+      if (std::find(lambdas.begin(), lambdas.end(), fn) != lambdas.end())
+        f.orig_lambda = true;
+      // If pure, clone == self; if TM_RENAME, clone \in renamelist; else delay
+      // on making clone
+      f.clone = nullptr;
+      if (std::find(purelist.begin(), purelist.end(), fn) != purelist.end())
+        f.clone = fn;
+      auto origFn = renamelist.find(fn);
+      if (origFn != renamelist.end()) {
+        f.orig = (*origFn).second;
+        f.clone = (*origFn).first;
+        functions.insert({(*origFn).second, f});
+      } else {
+        functions.insert({fn, f});
+      }
+      // process this function by finding all of its call instructions and
+      // adding them to the worklist
+      for (inst_iterator I = inst_begin(*fn), E = inst_end(*fn); I != E; ++I) {
+        // CallSite is CallInst | InvokeInst | Indirect call
+        // TODO: use casting to handle this with less nesting
+        if (isa<CallInst>(*I) || isa<InvokeInst>(*I)) {
+          CallBase *CS = cast<CallBase>(&*I);
+          // Get the function, figure out if we have a body for it.  If indirect
+          // call, we'll get a nullptr.
+          if (Function *func = CS->getCalledFunction())
+            if (!(func->isDeclaration()))
+              func_worklist.push(func);
+        }
+      }
+    }
+  }
+}
+
+/// Iterate through the list of discovered functions, and for each one, generate
+/// a clone and add it to the module
+///
+/// NB: The interaction between this code and C++ name mangling is not what one
+///     would expect.  If a mangled name is _Z18test_clone_noparamv, the clone
+///     name will be tm__Z18test_clone_noparamv, not _Z21tm_test_clone_noparamv.
+///     This appears to be a necessary cost of memory instrumentation without
+///     front-end support
+void tm_plugin::create_clones() {
+  for (auto &fn : functions) {
+    // If a function is a special function or it has a pure attribute,
+    // there is no need to generate a clone version
+    if (renamelist.find(fn.second.clone) == renamelist.end() &&
+        std::find(purelist.begin(), purelist.end(), fn.first) ==
+            purelist.end()) {
+
+      // Sometimes, when cloning one function into another through the standard
+      // LLVM utilities, the new function will have more arguments, or arguments
+      // in a different order, than the original function.  Consequently,
+      // CloneFunction takes a ValueToValueMap that explains how the arguments
+      // to the original map to arguments of the clone. In our case, we are not
+      // modifying the argument order or count, so an empty map is sufficient.
+      ValueToValueMapTy v2vmap;
+
+      // Create the clone.  We are using the simplest cloning technique that
+      // LLVM offers.  The clone will go into the current module
+      Function *newfunc = CloneFunction(fn.first, v2vmap, nullptr);
+
+      // Give the new function a name by concatenating the TM_PREFIX_STR with
+      // the original name of the function, and then save the new function so we
+      // can instrument it in a later phase
+      newfunc->setName(Twine(TM_PREFIX_STR, fn.first->getName()));
+      fn.second.clone = newfunc;
+    }
+  }
+}
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/function_transform.cc b/artifact/policies/xSTM/plugin/plugin/function_transform.cc
new file mode 100644
index 0000000..a12c8c7
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/function_transform.cc
@@ -0,0 +1,518 @@
+#include <cstdlib>
+
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#include "../../common/tm_defines.h"
+
+#include "tm_plugin.h"
+
+using namespace llvm;
+
+void tm_plugin::instrument_bb(BasicBlock *bb,
+                              SmallVector<Instruction *, 8> &skips) {
+  for (auto inst = bb->begin(), E = bb->end(); inst != E;) {
+    if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
+      bool skip_this_invoke = false;
+      if (auto *INV = dyn_cast<InvokeInst>(inst)) {
+        for (auto I : skips) {
+          if (INV == I) {
+            skip_this_invoke = true;
+            break;
+          }
+        }
+      }
+      if (!skip_this_invoke) {
+        CallBase *callsite = cast<CallBase>(&*inst);
+        // If transform_callsite returns an instruction, then we should
+        // use that instruction instead of the one we had
+        if (Instruction *new_inst = transform_callsite(callsite, inst)) {
+          ReplaceInstWithValue(bb->getInstList(), inst, new_inst);
+          inst = BasicBlock::iterator(new_inst); // update iterator
+        }
+      }
+    }
+
+    // If this is an atomic/volatile RMW memory access, or a fence, insert
+    // TM_UNSAFE before it
+    else if (isa<AtomicRMWInst>(inst) || isa<AtomicCmpXchgInst>(inst)) {
+      prefix_with_unsafe(inst);
+    }
+
+    // If this is a store instruction, either convert it to a function call
+    // or treat it as unsupported (conversion returns nullptr for
+    // atomic/volatile stores)
+    else if (isa<StoreInst>(inst)) {
+      StoreInst *store = dyn_cast<StoreInst>(&*inst);
+      if (CallInst *new_store = convert_store(store)) {
+        ReplaceInstWithInst(store, new_store);
+        inst = BasicBlock::iterator(new_store); // update iterator
+      } else {
+        prefix_with_unsafe(inst);
+      }
+    }
+
+    // If this is a load instruction, either convert it to a function call
+    // or treat it as unsupported (conversion returns nullptr for
+    // atomic/volatile loads)
+    else if (isa<LoadInst>(inst)) {
+      LoadInst *load = dyn_cast<LoadInst>(&*inst);
+      // The normal behavior is to use convert_load to instrument this load.
+      // If convert_load returns nullptr, then we need to prefix with
+      // unsafe, because it's a volatile or atomic, which is not supported.
+      if (Instruction *new_load = convert_load(load)) {
+        ReplaceInstWithInst(load, new_load);
+        inst = BasicBlock::iterator(new_load);
+      } else {
+        prefix_with_unsafe(inst);
+      }
+    }
+
+    // For other llvm instructions, we just ignore them because they do not
+    // access memory
+
+    // Terminator operations
+    else if (isa<ReturnInst>(inst) || isa<BranchInst>(inst) ||
+             isa<SwitchInst>(inst) || isa<IndirectBrInst>(inst) ||
+             isa<ResumeInst>(inst) || isa<CatchSwitchInst>(inst) ||
+             isa<CatchReturnInst>(inst) || isa<CleanupReturnInst>(inst) ||
+             isa<UnreachableInst>(inst)) {
+    }
+
+    // Binary operations are safe to ignore
+    // Bitwise binary operations are subset of binary operations
+    else if (isa<BinaryOperator>(inst)) {
+      // Register to register operations
+    }
+
+    // Vector operations
+    else if (isa<ShuffleVectorInst>(inst) || isa<InsertElementInst>(inst) ||
+             isa<ExtractElementInst>(inst)) {
+    }
+
+    // Unary operations
+    else if (isa<UnaryInstruction>(inst)) {
+    }
+
+    // Aggregate operations
+    else if (isa<InsertValueInst>(inst) || isa<ExtractValueInst>(inst)) {
+    }
+
+    // Addressing operations
+    else if (isa<GetElementPtrInst>(inst) || isa<FenceInst>(inst) ||
+             isa<AllocaInst>(inst)) {
+    }
+
+    // Conversion operations
+    else if (isa<TruncInst>(inst) || isa<FPTruncInst>(inst) ||
+             isa<SExtInst>(inst) || isa<ZExtInst>(inst) ||
+             isa<FPExtInst>(inst) || isa<PtrToIntInst>(inst) ||
+             isa<IntToPtrInst>(inst) || isa<FPToUIInst>(inst) ||
+             isa<FPToSIInst>(inst) || isa<UIToFPInst>(inst) ||
+             isa<SIToFPInst>(inst) || isa<BitCastInst>(inst) ||
+             isa<AddrSpaceCastInst>(inst)) {
+    }
+
+    // Other safe operations
+    else if (isa<ICmpInst>(inst) || isa<FCmpInst>(inst) || isa<PHINode>(inst) ||
+             isa<SelectInst>(inst) || isa<VAArgInst>(inst) ||
+             isa<LandingPadInst>(inst) || isa<CatchPadInst>(inst) ||
+             isa<CleanupPadInst>(inst)) {
+    }
+    // Unknown / unsupported
+    else {
+      exit(EXIT_FAILURE);
+    }
+    inst++;
+  }
+}
+
+// TODO: LLVM has become much better at producing vectorized loads/stores.  We
+//       need the load/store instrumentation to de-optimize that, or we need
+//       something in-library to address it.
+
+/// Transform the body of each clone we created, so that it calls instrumented
+/// functions and uses instrumentation on loads and stores.
+void tm_plugin::instrument_function_bodies() {
+  // Iterate over each basic block of each function that has a clone.  We must
+  // iterate over basic blocks, instead of over the function's instructions
+  // directly, in order to use ReplaceInstWithValue.  Within the BB, iterate
+  // over all instructions.
+  for (auto func : functions) {
+    Function *clone = func.second.clone;
+    if (std::find(purelist.begin(), purelist.end(), clone) != purelist.end())
+      continue; // Skip this function, because it is pure
+
+    // Warning: This is a mess
+    //
+    // The issue here is that lambdas are hard.  We could just say "no lambdas"
+    // inside of transactions, but for the LAMBDA API, that would mean "no
+    // nesting".
+    //
+    // We're not going to solve for all lambdas, and we're going to wind up with
+    // undefined behavior for some lambdas.  But for nested transactions, this
+    // should work.
+    //
+    // We can take advantage of the fact that there is a pattern to how clang++
+    // is producing code.  Suppose we see a function like this:
+    //
+    // TX_SAFE void func() {
+    //   // some code
+    //   TX_BEGIN { x = x + 6; }
+    //   TX_END;
+    //   // some more code
+    // }
+    //
+    // The TX_BEGIN is going to become an InvokeInst to tm_execute.  InvokeInst
+    // has two returns: Normal and Unwind.  In Normal, we want to keep running,
+    // but we might need to reclaim a lambda object first.  In Unwind, we want
+    // to keep propagating the exception, but we may need to reclaim a lambda
+    // object first.  In both cases, reclamation could fail, necessitating an
+    // instant termination of the program.
+    //
+    // The problem is that the reclamation act is achieved via an Invoke, and
+    // that Invoke is an indirect call.  In mermaid, it looks like this:
+    //
+    // graph TD
+    //    A[INVOKE] -->|Normal| B(Need cleanup?)
+    //    A         -->|Unwind| L(Landing... Need cleanup?)
+    //    B         -->|No|     C(Next Instruction)
+    //    B         -->|Yes|    D(Destruct via indirect invoke)
+    //    D         -->|Ok|     C
+    //    D         -->|Err|    E(Get Error Code)
+    //    E         -->         TERMINATE
+    //    L         -->|No|     R(Resume exception propagation)
+    //    L         -->|Yes|    F(Destruct via indirect invoke)
+    //    F         -->|Ok|     R
+    //    F         -->|Err|    G(Get Error Code)
+    //    G         -->         TERMINATE
+    //
+    // This code is crashing because it tries to translate an invoke to an
+    // indirect fuction with no name (D, F).  The trick is that we need some
+    // symmetry relative to launching the lambda.
+    //
+    // Recall that on the launch side, we marked the manager as pure.  That
+    // means we can leak memory if a nested transaction aborts, because we don't
+    // monitor any allocations inside of the monitor.  We can live with that.
+    //
+    // On this side, we can't mark the invoked function in D as pure, because we
+    // don't have it... it's an indirect call (field of the lambda object).  So
+    // instead we're going to record the invoke instruction and exempt it from
+    // function transformation.
+    SmallVector<Instruction *, 8> skips;
+    // Go through all the instructions of every basic block, searching for
+    // Invoke(TM_EXECUTE_STR).  When we find it, trace out its successor
+    for (auto &BB : *clone) {
+      for (auto &I : BB) {
+        auto *II = dyn_cast<InvokeInst>(&I);
+        if (nullptr == II)
+          continue; // Not an invoke, so skip it
+        if (auto F = II->getCalledFunction()) {
+          if (F->getName() == TM_EXECUTE_STR) {
+            // It's an Invoke, and it's TM_EXECUTE_STR.  Take its Normal
+            // Destination BB, and find the branch at the end of it
+            auto N = II->getNormalDest();
+            for (auto &NI : *N) {
+              auto *BR = dyn_cast<BranchInst>(&NI);
+              if (nullptr == BR)
+                continue;
+              // The second target of the branch is a BB with just one
+              // instruction (InvokeInst).  That's the one to skip.
+              auto BBWI = BR->getSuccessor(1);
+              skips.push_back(dyn_cast<Instruction>(BBWI->begin()));
+            }
+            // TODO: we're doing getUnwindDest() too
+            // auto U = II->getUnwindDest();
+            // for (auto &UI : *U) {
+            //   auto *BR = dyn_cast<BranchInst>(&UI);
+            //   if (nullptr == BR)
+            //     continue;
+            //   auto BBWI = BR->getSuccessor(1);
+            //   skips.push_back(dyn_cast<Instruction>(BBWI->begin()));
+            // }
+          }
+        }
+      }
+    }
+
+    for (auto bb = clone->begin(), bbe = clone->end(); bb != bbe; ++bb) {
+      instrument_bb(&*bb, skips);
+    }
+  }
+}
+
+/// Given a call site, attempt to create a call to the clone of the function
+/// that is called by the call site, or a call through the TM_TRANSLATE
+/// mechanism.  This may insert several instructions into the basic block, but
+/// will always return a CallInst that can be passed to ReplaceInstWithValue to
+/// replace @param inst.
+Instruction *tm_plugin::transform_callsite(CallBase *callsite,
+                                           BasicBlock::iterator inst) {
+  // For inline assembly, serialize the transaction
+  if (const CallInst *CI = dyn_cast<CallInst>(inst)) {
+    if (CI->isInlineAsm()) {
+      prefix_with_unsafe(inst);
+      return nullptr;
+    }
+  }
+
+  // If the called function is indirect, use the TM_TRANSLATE infrastructure.
+  //
+  // WARNING: this is not tested for indirect calls within a try block
+  Function *callee = callsite->getCalledFunction();
+  if (!callee) {
+    if (callsite->isIndirectCall()) {
+      // Put everything *before* ins_pt, so the caller can replace ins_pt
+      Instruction *ins_pt = dyn_cast<Instruction>(&*inst);
+      // Turn the original function into a void*
+      Value *orig = callsite->getCalledOperand();
+      BitCastInst *erased =
+          new BitCastInst(orig, sigs.get_type(signatures::PTR), "", ins_pt);
+      // call TM_TRANSLATE, then cast the result back to the original func type
+      CallInst *xlate = CallInst::Create(sigs.get_func(signatures::TRANSLATE),
+                                         {erased}, "", ins_pt);
+      BitCastInst *updated =
+          new BitCastInst(xlate, orig->getType(), "", ins_pt);
+      if (dyn_cast<CallInst>(inst)) {
+        return create_callinst(callsite, inst, updated, orig);
+      } else {
+        return create_invokeinst(callsite, inst, updated, orig);
+      }
+    }
+    // WARNING: this fallthrough code is not tested.  If we wind up here, we
+    // will call the original, uninstrumented function
+    errs() << "  [llvm-tm plugin] Untested Code Path #1\n";
+    return nullptr;
+  }
+
+  // If the clone version is in purelist, return the original version
+  if (std::find(purelist.begin(), purelist.end(), callee) != purelist.end()) {
+    return nullptr;
+  }
+
+  // TODO: For now, we serialize on any exception in a transaction, even one
+  //       that gets caught before commit.  Add better Control-Flow Analysis
+  //       eventually
+  if (callee->getName() == "__cxa_allocate_exception" ||
+      callee->getName() == "__cxa_free_exception" ||
+      callee->getName() == "__cxa_throw" ||
+      callee->getName() == "__cxa_begin_catch" ||
+      callee->getName() == "__cxa_end_catch" ||
+      callee->getName() == "__cxa_get_exception_ptr" ||
+      callee->getName() == "__cxa_rethrow") {
+    prefix_with_unsafe(inst);
+    return nullptr;
+  }
+
+  // We ignore calls to TM_COMMIT_HANDLER, since they are to the TM API
+  // TODO: why isn't this just PURE?
+  if (callee->getName() == TM_COMMIT_HANDLER_STR) {
+    return nullptr;
+  }
+
+  // Try to find the clone of the called function.  Normally, we get it from the
+  // clone list.  However, we hard-code malloc, free, memcpy, memset and
+  // memmove, since they redirect to the TM library.
+  //
+  // IF this instruction is intrinsic, check whether it is a safe call
+  // or an unsafe call in a transaction
+  Function *clone = nullptr;
+  if (callee->getName() == "malloc") {
+    clone = sigs.get_func(signatures::MALLOC);
+  } else if (callee->getName() == "aligned_alloc") {
+    clone = sigs.get_func(signatures::ALIGNED_ALLOC);
+  } else if (callee->getName() == "free") {
+    clone = sigs.get_func(signatures::FREE);
+  } else if (callee->getName() == "llvm.memcpy.p0.p0.i64") {
+    clone = sigs.get_func(signatures::MEMCPY);
+  } else if (callee->getName() == "llvm.memset.p0.i64") {
+    // TODO: I do not trust these names anymore.  this one used to be p0i8, now
+    // it's just p0
+    clone = sigs.get_func(signatures::MEMSET);
+  } else if (callee->getName() == "llvm.memmove.p0.p0.i64") {
+    clone = sigs.get_func(signatures::MEMMOVE);
+  } else if (callee->isIntrinsic()) {
+    convert_intrinsics(callee, inst);
+    return nullptr;
+  } else {
+    clone = get_clone(callee);
+  }
+
+  // If there's no clone in this TU, fall back to TM_TRANSLATE.  This code is
+  // mostly like the indirect call code above, but it handles invoke a bit more
+  // carefully.
+  if (!clone) {
+    Instruction *ins_pt = dyn_cast<Instruction>(&*inst);
+    Function *orig = callsite->getCalledFunction();
+    // TODO: this cannot possibly be correct
+    BitCastInst *erased =
+        new BitCastInst(orig, sigs.get_type(signatures::PTR), "", ins_pt);
+    CallInst *xlate = CallInst::Create(sigs.get_func(signatures::TRANSLATE),
+                                       {erased}, "", ins_pt);
+    BitCastInst *updated = new BitCastInst(xlate, orig->getType(), "", ins_pt);
+    if (isa<InvokeInst>(inst))
+      return create_invokeinst(callsite, inst, updated, orig);
+    else
+      return create_callinst(callsite, inst, updated, orig);
+  }
+
+  // If we found a clone, create a call or invoke using the clone
+  if (CallInst *callinst = dyn_cast<CallInst>(inst))
+    return create_callinst(callsite, inst, clone, clone);
+  else if (InvokeInst *invokeinst = dyn_cast<InvokeInst>(inst))
+    return create_invokeinst(callsite, inst, clone, clone);
+
+  // WARNING: this code path is not tested.  It seems dangerous to return a
+  //          nullptr in this case.
+  errs() << "  [llvm-tm plugin] Untested Code Path #2\n";
+  return nullptr;
+}
+
+/// Replace a call instruction to the original code with a call to something
+/// safe
+Instruction *tm_plugin::create_callinst(CallBase *callsite,
+                                        BasicBlock::iterator inst, Value *val,
+                                        Value *orig_val) {
+  CallInst *callinst = dyn_cast<CallInst>(inst);
+  FunctionType *ft = callinst->getFunctionType();
+  CallInst *new_call = CallInst::Create(
+      ft, val,
+      SmallVector<Value *, 8>(callsite->arg_begin(), callsite->arg_end()), "",
+      dyn_cast<Instruction>(&*inst));
+  // If it's an indirect call, use the calling conventions of the original
+  if (!callsite->isIndirectCall())
+    new_call->setCallingConv(dyn_cast<Function>(orig_val)->getCallingConv());
+  if (!new_call->getDebugLoc())
+    new_call->setDebugLoc(inst->getDebugLoc());
+  return new_call;
+}
+
+/// Replace an invoke instruction to the original code with a call to something
+/// safe
+Instruction *tm_plugin::create_invokeinst(CallBase *callsite,
+                                          BasicBlock::iterator inst, Value *val,
+                                          Value *orig_val) {
+  // build a new invoke, using the landing pad and calling conventions of the
+  // old invoke
+  InvokeInst *invokeinst = dyn_cast<InvokeInst>(inst);
+  FunctionType *ft = invokeinst->getFunctionType();
+  InvokeInst *newinst = InvokeInst::Create(
+      ft, val, invokeinst->getNormalDest(), invokeinst->getUnwindDest(),
+      SmallVector<Value *, 8>(invokeinst->arg_begin(), invokeinst->arg_end()),
+      "", invokeinst);
+  auto X = dyn_cast<InvokeInst>(callsite)->getFunction();
+  auto CCC = X->getCallingConv();
+  newinst->setCallingConv(CCC);
+  if (!newinst->getDebugLoc())
+    newinst->setDebugLoc(inst->getDebugLoc());
+  return newinst;
+}
+
+/// Insert a call to TM_UNSAFE before the current instruction
+void tm_plugin::prefix_with_unsafe(BasicBlock::iterator inst) {
+  CallInst *new_call = CallInst::Create(sigs.get_func(signatures::UNSAFE), {},
+                                        "", dyn_cast<Instruction>(&*inst));
+}
+
+/// Given a store, either replace it with a call to TM_STORE, or return
+/// nullptr to indicate that the store is not TM-safe
+CallInst *tm_plugin::convert_store(StoreInst *store) {
+  if (store->isVolatile() || store->isAtomic())
+    return nullptr;
+  // Get pointer and value operands of the store, and the base type
+  Value *ptr = store->getPointerOperand();
+  Value *val = store->getValueOperand();
+  Type *type = val->getType();
+
+  // If the type is unknown return nullptr
+  if (!sigs.get_store(val->getType()))
+    return nullptr;
+
+  // If it's not a pointer type, then we just make a replacement call
+  // instruction
+  if (!type->isPointerTy())
+    return CallInst::Create(sigs.get_store(val->getType()), {val, ptr}, "");
+
+  // It's a pointer type.  We need to convert the arguments from type* and
+  // type** to void* and void** (by adding two bitcasts), and then we can make
+  // the replacement call instruction
+  //
+  // NB: both bitcasts go before the original store
+  BitCastInst *VS =
+      new BitCastInst(val, sigs.get_type(signatures::PTR), "", store);
+  BitCastInst *VSS =
+      new BitCastInst(ptr, sigs.get_type(signatures::PTR), "", store);
+  return CallInst::Create(sigs.get_store(val->getType()), {VS, VSS}, "");
+}
+
+/// Given a load, either replace it with a call to TM_LOAD, or return nullptr to
+/// indicate that the load is not TM-safe
+Instruction *tm_plugin::convert_load(LoadInst *load) {
+  if (load->isVolatile() || load->isAtomic())
+    return nullptr;
+  // Get pointer and return type of the load
+  Value *ptr = load->getPointerOperand();
+  Type *type = load->getType();
+
+  // If the type is unknown reurn nullptr
+  if (!sigs.get_load(type))
+    return nullptr;
+
+  // If it's not a pointer type, then we just make a replacement instruction
+  if (!type->isPointerTy())
+    return CallInst::Create(sigs.get_load(type), {ptr}, "");
+
+  // It's a pointer type.  We need to convert the argument from type** to
+  // void**, by adding a bitcast, and then we need to bitcast the return value
+  // of the call from void* to type*.  Then we return the bitcast instruction,
+  // so that the caller can replace load with it
+  //
+  // NB: first bitcast and new load go before the original load
+  BitCastInst *VSS =
+      new BitCastInst(ptr, sigs.get_type(signatures::PTR), "", load);
+  CallInst *new_load = CallInst::Create(sigs.get_load(type), {VSS}, "", load);
+  return new BitCastInst(new_load, type);
+}
+
+/// Handle LLVM intrinsic instructions specially, since some are supported and
+/// others require serialization (but should not happen in C++ code?)
+void tm_plugin::convert_intrinsics(llvm::Function *callee,
+                                   BasicBlock::iterator inst) {
+  // These are unsafe llvm intrinsics, insert TM_UNSAFE before it
+  //          - Some of Code Generator Intrinsics
+  //          - Trampoline Intrinsics
+  //          - Some of General Intrinsics
+  //          - Element Wise Atomic Memory Intrinsics
+  //
+  // WARNING: for other llvm intrinsics, we just ignore them for now.  Based on
+  //          prior analysis, this is OK, but if the LLVM IR grows, we need to
+  //          revisit this.
+  if (callee->getName() == "llvm.clear_cache" ||
+      callee->getName() == "llvm.init.trampoline" ||
+      callee->getName() == "llvm.adjust.trampoline" ||
+      callee->getName() == "llvm.trap" ||
+      callee->getName() == "llvm.debugtrap" ||
+      callee->getName() ==
+          "llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64" ||
+      callee->getName() ==
+          "llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64" ||
+      callee->getName() == "llvm.memset.element.unordered.atomic.p0i8.i64" ||
+      callee->getName().startswith("llvm.load.relative.")) {
+    prefix_with_unsafe(inst);
+  }
+  // WARNING: The LLVM optimizer translates a masked intrinsic like
+  //          llvm.masked.gather or llvm.masked.scatter to a chain of basic
+  //          blocks, and loads elements one-by-one if the appropriate mask bit
+  //          is set. Therefore, we could not discover these intrinsics in
+  //          bitcode, so for now we just insert UNSAFE before them
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(inst)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::masked_load:
+    case Intrinsic::masked_store:
+    case Intrinsic::masked_gather:
+    case Intrinsic::masked_scatter:
+      prefix_with_unsafe(inst);
+    }
+  }
+}
diff --git a/artifact/policies/xSTM/plugin/plugin/local_config.h b/artifact/policies/xSTM/plugin/plugin/local_config.h
new file mode 100644
index 0000000..338e356
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/local_config.h
@@ -0,0 +1,15 @@
+#pragma once
+
+/// The purpose of this file is to provide a place where users of the plugin can
+/// provide information to customize the behavior of the plugin.  For now, there
+/// is only one supported customization: providing names of functions that need
+/// to be treated as pure.
+
+/// Any function named in this enum will be treated as pure by the plugin
+const char *discovery_pure_overrides[] = {
+    // These three functions are called by the pbzip2 benchmark, and they are
+    // pure.
+    "_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5c_strEv",
+    "_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_M_dataEv",
+    "stat",
+};
diff --git a/artifact/policies/xSTM/plugin/plugin/mappings.cc b/artifact/policies/xSTM/plugin/plugin/mappings.cc
new file mode 100644
index 0000000..174a97f
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/mappings.cc
@@ -0,0 +1,56 @@
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include "../../common/tm_defines.h"
+
+#include "tm_plugin.h"
+
+using namespace llvm;
+
+/// Create a function that contains calls to TM_REG_CLONE for each of the
+/// clones that exists in this module, and then add that function to the list of
+/// module constructors, so that it will run automatically when the program
+/// starts
+///
+/// WARNING: we don't currently put a destructor into the module.  When we begin
+///          supporting loading/unloading .so files, we will need to add it.
+void tm_plugin::create_runtime_mappings(llvm::Module &M) {
+  // Create a Function* to represent the TM_REG_CLONE function. Its signature is
+  // void(*)(void*, void*).
+  Function *reg_clone = cast<Function>(
+      M.getOrInsertFunction(TM_REG_CLONE_STR,
+                            FunctionType::get(sigs.get_type(signatures::VOID),
+                                              {sigs.get_type(signatures::PTR),
+                                               sigs.get_type(signatures::PTR)},
+                                              false))
+          .getCallee());
+
+  // Create a function that will make the calls to TM_REG_CLONE repeatedly to
+  // register the entries in clones.  Its signature is void(*)(void).
+  Function *static_initializer = Function::Create(
+      FunctionType::get(sigs.get_type(signatures::VOID), {}, false),
+      GlobalValue::LinkageTypes::InternalLinkage, TM_STATIC_INITIALIZER_STR,
+      &M);
+
+  // Iterate through the list of clones and use it to insert calls to
+  // reg_clone() into static_initializer().
+  BasicBlock *calls =
+      BasicBlock::Create(M.getContext(), "", static_initializer);
+  IRBuilder<> builder(calls);
+  for (auto b = functions.begin(), e = functions.end(); b != e; ++b) {
+    auto from =
+        builder.CreateBitCast(b->second.orig, sigs.get_type(signatures::PTR));
+    auto to =
+        builder.CreateBitCast(b->second.clone, sigs.get_type(signatures::PTR));
+    builder.CreateCall(reg_clone, {from, to});
+  }
+
+  // Finish the body by adding a void return statement.
+  builder.CreateRetVoid();
+
+  // Insert our function into the global ctors list for this module
+  // NB: the priority shouldn't matter
+  llvm::appendToGlobalCtors(M, static_initializer, 65535);
+}
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/optimizations.cc b/artifact/policies/xSTM/plugin/plugin/optimizations.cc
new file mode 100644
index 0000000..aa3bd9c
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/optimizations.cc
@@ -0,0 +1,45 @@
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#include "../../common/tm_defines.h"
+
+#include "tm_plugin.h"
+
+using namespace llvm;
+
+/// Process the module to find situations in which a call to TM_UNSAFE is
+/// dominated by another call to TM_UNSAFE.  When such a situation is found,
+/// remove the second call.
+void tm_plugin::optimize_unsafe(llvm::Module &M) {
+  /// a worklist for helping us to search for dead function calls
+  llvm::SmallVector<llvm::Instruction *, 128> worklist;
+
+  // Iterate through the instructions of each function in the module
+  for (auto fn = M.getFunctionList().begin(), e = M.getFunctionList().end();
+       fn != e; ++fn) {
+    for (auto bb = fn->begin(), bbe = fn->end(); bb != bbe; ++bb) {
+      // track if we have already found a call to TM_UNSAFE *in this block*
+      bool unsafe_flag = false;
+      for (auto I = bb->begin(), E = bb->end(); I != E; ++I) {
+        if (isa<CallInst>(*I)) {
+          if (Function *Callee = cast<CallInst>(*I).getCalledFunction()) {
+            if (Callee->getName() == TM_UNSAFE_STR) {
+              if (unsafe_flag) {
+                worklist.push_back(&*I);
+              } else {
+                unsafe_flag = true;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Remove dead unsafe calls
+  while (!worklist.empty()) {
+    auto *I = worklist.pop_back_val();
+    I->eraseFromParent();
+  }
+}
diff --git a/artifact/policies/xSTM/plugin/plugin/raii_lite.cc b/artifact/policies/xSTM/plugin/plugin/raii_lite.cc
new file mode 100644
index 0000000..5360907
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/raii_lite.cc
@@ -0,0 +1,252 @@
+#include <stack>
+
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#include "../../common/tm_defines.h"
+
+#include "tm_plugin.h"
+
+using namespace llvm;
+using namespace std;
+
+namespace {
+
+/// In the RAII API, transactions are delineated by a special constructor (ctor)
+/// and destructor (dtor).  We will ultimately need to instrument the sub-graph
+/// of the CFG that starts with the instruction immediately succeeding the ctor,
+/// and ends with the instruction immediately preceding the dtor.
+///
+/// The complicating factors are that (1) the ctor and dtor could be at *any*
+/// point within the CFG, and (2) transactions can be nested.
+///
+/// To keep things simple, we split BBs that have ctors and dtors.  The goal is
+/// that each ctor is in its own BB, and each dtor is the first instruction in
+/// its BB.  Organizing the BBs like this gives us a nice invariant: *every*
+/// instruction in the graph of blocks between the ctor and dtor is supposed to
+/// be instrumented.
+///
+/// @returns A vector of all the ctors in the module.  Note that while we return
+///          a vector of raii_region_t structs, those structs are incomplete:
+///          they only have the begin instructions.
+set<Function *> normalize_raii_boundaries(Module &M) {
+
+  // TODO: clean it up a ton
+  set<Function *> interesting_functions;
+
+  // Process one function at a time.
+  for (auto &F : M) {
+    set<Value *> jbset;
+    // We start by looking for calls to TM_RAII_BEGIN_STR.  For each, we extract
+    // its argument.  Those are the setjmp buffers.  Since RAII_BEGIN and
+    // RAII_END are `noexcept`, the lifetimes of the setjmp buffers will bind
+    // tightly to the boundaries of transactions.
+    for (auto &BB : F) {
+      for (auto &I : BB) {
+        auto *CB = dyn_cast<CallBase>(&I);
+        if (nullptr == CB)
+          continue; // We're looking for calls
+        auto FN = CB->getCalledFunction();
+        if (nullptr == FN)
+          continue; // Skip indirect calls
+        if (FN->getName().equals(TM_RAII_BEGIN_STR)) {
+          jbset.insert(CB->getOperand(0));
+        }
+      }
+    }
+
+    if (jbset.empty())
+      continue;
+
+    interesting_functions.insert(&F);
+
+    // Now iterate the instructions again.  This time, we record our splitbefore
+    // set as lifetime begin and TM_RAII_END_STR, and our splitafter set as
+    // TM_RAII_BEGIN_STR and lifetime and
+    set<Instruction *> splitbefore, splitafter;
+    for (auto &BB : F) {
+      for (auto &I : BB) {
+        auto *CB = dyn_cast<CallBase>(&I);
+        if (nullptr == CB)
+          continue; // We're looking for calls
+        auto FN = CB->getCalledFunction();
+        if (nullptr == FN)
+          continue; // Skip indirect calls
+        if (FN->getName().equals(TM_RAII_BEGIN_STR))
+          splitafter.insert(CB);
+        else if (FN->getName().equals(TM_RAII_END_STR))
+          splitbefore.insert(CB);
+        else if ((FN->getName().equals("llvm.lifetime.start.p0")) &&
+                 (jbset.find(CB->getOperand(1)) != jbset.end()))
+          splitbefore.insert(CB);
+        else if ((FN->getName().equals("llvm.lifetime.end.p0")) &&
+                 (jbset.find(CB->getOperand(1)) != jbset.end()))
+          splitafter.insert(CB);
+      }
+    }
+
+    // Now we can split everything easily
+    for (auto I : splitbefore) {
+      auto parent = I->getParent();
+      bool first = false;
+      for (auto &IN : *parent) {
+        if (&IN == I)
+          first = true;
+        break;
+      }
+      if (!first)
+        parent->splitBasicBlock(I);
+    }
+    for (auto I : splitafter) {
+      auto parent = I->getParent();
+      bool found = false;
+      Instruction *succ = nullptr;
+      for (auto &IN : *parent) {
+        if (found) {
+          succ = &IN;
+          break;
+        }
+        if (&IN == I)
+          found = true;
+      }
+      if (succ != nullptr)
+        parent->splitBasicBlock(succ);
+    }
+  }
+  return interesting_functions;
+
+  // putting noexcept into the begin/end certainly does simplify the cfg.
+  // Perhaps too much?
+  //
+  // Maybe the key here is that our transactions are lexically scoped, and
+  // throwing across transaction boundaries is not allowed.  That means we don't
+  // really need to be so worried: if the begin/end count matches, we're not in
+  // a tx.  We can augment each BB with two ints: pre_count and post_count.
+  //
+  // Lexical scoping also means that a self edge in the CFG can't change the
+  // count.  It also means that a back edge in the CFG can't change the count.
+  //
+  // Splitting any BB that has a begin or end
+}
+
+/// Recursive step of a lightweight DFS to find all BBs that are within a
+/// lexically-scoped transaction
+///
+/// NB: Due to invokes and landing pads, we can't quite get away with using the
+/// LLVM Dominators.
+///
+/// @param current  The BB we're analyzing
+/// @param depth    The transaction depth
+/// @param xSet     Set of BBs that are in transactions
+/// @param visited  Set of BBs that have been visited
+/// @param starters Set of BBs that start a transaction
+/// @param enders   Set of BBs that end a transaction
+void trace(BasicBlock *current, int depth, set<BasicBlock *> *xSet,
+           set<BasicBlock *> &visited, set<BasicBlock *> &starters,
+           set<BasicBlock *> &enders) {
+  if (starters.find(current) != starters.end())
+    ++depth; // This block starts a transaction
+  else if (enders.find(current) != enders.end())
+    --depth; // This block ends a transaction
+  else if (depth > 0)
+    xSet->insert(current); // This block is in a transaction
+  visited.insert(current); // Don't process it again
+
+  // Recurse to analyze successors
+  for (auto *SUCC : successors(current))
+    if (visited.find(SUCC) == visited.end())
+      trace(SUCC, depth, xSet, visited, starters, enders);
+}
+
+set<BasicBlock *> *find_tx_blocks(Function &F) {
+  // Find all the blocks that begin or end a transaction
+  set<BasicBlock *> starters, enders;
+  BasicBlock *firstBB = nullptr;
+  for (auto &BB : F) {
+    if (firstBB == nullptr)
+      firstBB = &BB; // Cache the first BB, we need it later
+    for (auto &I : BB) {
+      auto *CB = dyn_cast<CallBase>(&I);
+      if (nullptr == CB)
+        continue; // We're looking for calls
+      auto FN = CB->getCalledFunction();
+      if (nullptr == FN)
+        continue; // Skip indirect calls
+      if (FN->getName().equals(TM_RAII_BEGIN_STR)) {
+        starters.insert(&BB);
+        break;
+      }
+      if (FN->getName().equals(TM_RAII_END_STR)) {
+        enders.insert(&BB);
+        break;
+      }
+    }
+  }
+  // Populate xSets with all BBs in F that have a nesting depth > 0
+  set<BasicBlock *> *xSets = new set<BasicBlock *>();
+  set<BasicBlock *> visits; // TODO: I don't like needing visits :(
+  trace(firstBB, 0, xSets, visits, starters, enders);
+
+  return xSets;
+}
+
+} // namespace
+
+/// Search through the module and find every RAII region.  An RAII region is
+/// defined by a matched pair of ctor and dtor calls.  Note that there is
+/// transaction nesting, so we can't just start at a ctor and stop at the first
+/// dtor we find.
+///
+/// In addition to *finding* these pairs, we do some slight manipulation, so
+/// that each ctor or dtor call is in its own basic block.  The end state of the
+/// call is that the plugin's raii_regions collection is properly populated.
+void tm_plugin::discover_raii_lite(Module &M) {
+  // Find the RAII start points, and make sure each ctor/dtor is in its own BB
+  //
+  // NB: entries in raii_regions will only have the ctor field set, not dtor or
+  //     instruction_blocks
+  auto interesting_functions = normalize_raii_boundaries(M);
+  for (auto F : interesting_functions) {
+    auto blocks = find_tx_blocks(*F);
+    raii_lite_state.insert({F, blocks});
+  }
+}
+
+/// For each sub-CFG of the program that is bounded by an RAII ctor and dtor,
+/// search through all of its basic blocks and find any function calls/invokes.
+/// For each, add the function to func_worklist, but only if we have the
+/// definition for the function.
+void tm_plugin::discover_reachable_raii_lite(Module &M) {
+  for (auto r : raii_lite_state) {
+    for (auto bb : *r.second) {
+      for (auto &inst : *bb) {
+        // TODO: cast to callbase instead of if?
+        if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
+          CallBase *CS = cast<CallBase>(&inst);
+          if (Function *Callee = CS->getCalledFunction())
+            if (!(Callee->isDeclaration()))
+              func_worklist.push(Callee);
+        }
+      }
+    }
+  }
+}
+
+// Go through each RAII region, and for each instruction within the region,
+// instrument it.  Our current strategy for the RAII API is to replace each
+// instruction with a "diamond": a branch that looks at whether the TM system
+// has dynamically decided that instrumentation is needed or not, a path that
+// does the instrumentation, and a path that doesn't.  We expect LLVM to be
+// smart enough to combine these branches, since they are based on a value that
+// is immutable for the duration of the transaction.
+void tm_plugin::instrument_regions_raii_lite(llvm::Module &M) {
+  SmallVector<Instruction *, 8> skips; // unused, but API is ugly
+  for (auto r : raii_lite_state)
+    for (auto bb : *r.second)
+      instrument_bb(bb, skips);
+  return;
+}
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/signatures.cc b/artifact/policies/xSTM/plugin/plugin/signatures.cc
new file mode 100644
index 0000000..5384b62
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/signatures.cc
@@ -0,0 +1,148 @@
+#include "../../common/tm_defines.h"
+
+#include "signatures.h"
+
+using namespace llvm;
+
+// to reduce boilerplate code, we use CREATE_FUNC_1 and CREATE_FUNC_2 to produce
+// Module::getOrInsertFunction() calls.  These two macros differ based on the
+// number of arguments to getOrInsertFunction()
+//
+// NB: the 'false' indicates that it is not a varargs function
+#define CREATE_FUNC_1(NAME, RETTY, ARGSTY1)                                    \
+  cast<Function>(                                                              \
+      M.getOrInsertFunction(NAME, FunctionType::get(RETTY, {ARGSTY1}, false))  \
+          .getCallee())
+#define CREATE_FUNC_2(NAME, RETTY, ARGSTY1, ARGSTY2)                           \
+  cast<Function>(                                                              \
+      M.getOrInsertFunction(                                                   \
+           NAME, FunctionType::get(RETTY, {ARGSTY1, ARGSTY2}, false))          \
+          .getCallee())
+
+/// Initialize the signatures object by creating Type* objects that can be
+/// reused throughout the plugin, and by inserting extern Function declarations
+/// into the Module for any TM function we might ever need to call.
+void signatures::init(Module &M) {
+  // Create references to the common types in C/C++ programs
+  types[I8] = Type::getInt8Ty(M.getContext());
+  types[I16] = Type::getInt16Ty(M.getContext());
+  types[I32] = Type::getInt32Ty(M.getContext());
+  types[I64] = Type::getInt64Ty(M.getContext());
+  types[F32] = Type::getFloatTy(M.getContext());
+  types[F64] = Type::getDoubleTy(M.getContext());
+  types[F128] = Type::getX86_FP80Ty(M.getContext());
+  types[VOID] = Type::getVoidTy(M.getContext());
+  // TODO: will the next change after LLVM's opaque pointer transition?
+  types[PTR] = PointerType::getUnqual(types[I8]);
+
+  // create the load functions: each has a single parameter T*, and returns T
+  loads[U1] = CREATE_FUNC_1(TM_LOAD_U1_STR, types[I8], types[PTR]);
+  loads[U2] = CREATE_FUNC_1(TM_LOAD_U2_STR, types[I16], types[PTR]);
+  loads[U4] = CREATE_FUNC_1(TM_LOAD_U4_STR, types[I32], types[PTR]);
+  loads[U8] = CREATE_FUNC_1(TM_LOAD_U8_STR, types[I64], types[PTR]);
+  loads[F] = CREATE_FUNC_1(TM_LOAD_F_STR, types[F32], types[PTR]);
+  loads[D] = CREATE_FUNC_1(TM_LOAD_D_STR, types[F64], types[PTR]);
+  loads[LD] = CREATE_FUNC_1(TM_LOAD_LD_STR, types[F128], types[PTR]);
+  loads[P] = CREATE_FUNC_1(TM_LOAD_P_STR, types[PTR], types[PTR]);
+
+  // create the store functions.  each takes T and T*, returns void
+  stores[U1] =
+      CREATE_FUNC_2(TM_STORE_U1_STR, types[VOID], {types[I8], types[PTR]});
+  stores[U2] =
+      CREATE_FUNC_2(TM_STORE_U2_STR, types[VOID], {types[I16], types[PTR]});
+  stores[U4] =
+      CREATE_FUNC_2(TM_STORE_U4_STR, types[VOID], {types[I32], types[PTR]});
+  stores[U8] =
+      CREATE_FUNC_2(TM_STORE_U8_STR, types[VOID], {types[I64], types[PTR]});
+  stores[F] =
+      CREATE_FUNC_2(TM_STORE_F_STR, types[VOID], {types[F32], types[PTR]});
+  stores[D] =
+      CREATE_FUNC_2(TM_STORE_D_STR, types[VOID], {types[F64], types[PTR]});
+  stores[LD] =
+      CREATE_FUNC_2(TM_STORE_LD_STR, types[VOID], {types[F128], types[PTR]});
+  stores[P] =
+      CREATE_FUNC_2(TM_STORE_P_STR, types[VOID], {types[PTR], types[PTR]});
+
+  // create malloc, aligned_alloc, free, memcpy, memset and memmove functions
+  this->funcs[MALLOC] = CREATE_FUNC_1(TM_MALLOC_STR, types[PTR], types[I64]);
+  this->funcs[ALIGNED_ALLOC] =
+      CREATE_FUNC_2(TM_ALIGNED_ALLOC_STR, types[PTR], {types[I64], types[I64]});
+  this->funcs[FREE] = CREATE_FUNC_1(TM_FREE_STR, types[VOID], types[PTR]);
+  this->funcs[MEMCPY] = cast<Function>(
+      M.getOrInsertFunction(
+           TM_MEMCPY_STR,
+           FunctionType::get(types[PTR],
+                             {types[PTR], types[PTR], types[I64], types[I32]},
+                             false))
+          .getCallee());
+  this->funcs[MEMSET] = cast<Function>(
+      M.getOrInsertFunction(
+           TM_MEMSET_STR,
+           FunctionType::get(types[PTR],
+                             {types[PTR], types[I8], types[I64], types[I32]},
+                             false))
+          .getCallee());
+  this->funcs[MEMMOVE] = cast<Function>(
+      M.getOrInsertFunction(
+           TM_MEMMOVE_STR,
+           FunctionType::get(types[PTR], {types[PTR], types[PTR], types[I64]},
+                             false))
+          .getCallee());
+
+  // create the call for translating function pointers
+  funcs[TRANSLATE] =
+      CREATE_FUNC_1(TM_TRANSLATE_CALL_STR, types[PTR], types[PTR]);
+
+  // create the call for forcing a transaction to become irrevocable.
+  funcs[UNSAFE] = cast<Function>(
+      M.getOrInsertFunction(
+           TM_UNSAFE_STR,
+           FunctionType::get(Type::getVoidTy(M.getContext()), false))
+          .getCallee());
+
+  // The signature for the *internal* c-api execution function is complex, and
+  // not necessarily needed. To simplify, we make the signature only if we can
+  // find the *external* c-api execution function
+  //
+  // TODO: it's probably simpler given the opaque pointer transition
+  if (Function *OriginalFunc = M.getFunction(TM_EXECUTE_C_STR)) {
+    // Create arg types of OriginalFunc
+    std::vector<Type *> arg_types;
+    for (const Argument &arg : OriginalFunc->args())
+      arg_types.push_back(arg.getType());
+    // Duplicate the (first) function parameter and add it as an extra parameter
+    Type *ArgTy = arg_types[0];
+    arg_types.push_back(ArgTy);
+    // Create the new function type and function
+    FunctionType *FuncTy = FunctionType::get(
+        OriginalFunc->getFunctionType()->getReturnType(), arg_types,
+        OriginalFunc->getFunctionType()->isVarArg());
+    funcs[CAPI] =
+        cast<Function>(M.getOrInsertFunction(TM_EXECUTE_C_INTERNAL_STR, FuncTy,
+                                             OriginalFunc->getAttributes())
+                           .getCallee());
+  }
+}
+
+/// Helper function to map a type to a vartype, to simplify array indexing
+///
+/// TODO: The use of x86_fp80ty means that this is actually hard-coded for x86
+int signatures::type_to_vartype(Type *t) {
+  if (t->isPointerTy())
+    return P;
+  if (t->getPrimitiveSizeInBits() == 8 && t->isIntegerTy())
+    return U1;
+  else if (t->getPrimitiveSizeInBits() == 16 && t->isIntegerTy())
+    return U2;
+  else if (t->getPrimitiveSizeInBits() == 32 && t->isIntegerTy())
+    return U4;
+  else if (t->getPrimitiveSizeInBits() == 64 && t->isIntegerTy())
+    return U8;
+  else if (t->getPrimitiveSizeInBits() == 32 && t->isFloatTy())
+    return F;
+  else if (t->getPrimitiveSizeInBits() == 64 && t->isDoubleTy())
+    return D;
+  else if (t->isX86_FP80Ty())
+    return LD;
+  return -1;
+}
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/signatures.h b/artifact/policies/xSTM/plugin/plugin/signatures.h
new file mode 100644
index 0000000..73228ec
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/signatures.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include "llvm/IR/Module.h"
+
+/// signatures is a convenience class that provides references to pre-built LLVM
+/// Type* and Function* representations of the types and functions we use
+/// throughout the plugin
+class signatures {
+public:
+  /// An enum to avoid unnecessary hard-coding of array indices when looking up
+  /// load and store functions
+  enum VarTypes {
+    U1 = 0,       // 8-bit integer
+    U2 = 1,       // 16-bit integer
+    U4 = 2,       // 32-bit integer
+    U8 = 3,       // 64-bit integer
+    F = 4,        // 32-bit float
+    D = 5,        // 64-bit float
+    LD = 6,       // 80 (x86) or 128-bit (other architectures) float
+    P = 7,        // pointer (assume 64 bits)
+    VT_COUNT = 8, // # entries in this enum
+  };
+
+  /// An enum to avoid unnecessary hard-coding of array indices when looking up
+  /// other instrumented functions
+  enum FuncNames {
+    CAPI = 0,          // C API execute transaction function
+    MALLOC = 1,        // malloc
+    ALIGNED_ALLOC = 2, // aligned_alloc
+    FREE = 3,          // free
+    MEMCPY = 4,        // memcpy
+    MEMSET = 5,        // memset
+    MEMMOVE = 6,       // memmove
+    TRANSLATE = 7,     // tm_translate_call
+    UNSAFE = 8,        // tm_unsafe
+    FN_COUNT = 9,      // # entries in this enum
+  };
+
+  /// An enum to avoid unnecessary hard-coding of array indices when looking up
+  /// LLVM Type*s to represent frequently-used types
+  enum TypeNames {
+    I8 = 0,       // 8-bit integer
+    I16 = 1,      // 16-bit integer
+    I32 = 2,      // 32-bit integer
+    I64 = 3,      // 64-bit integer
+    F32 = 4,      // 32-bit float
+    F64 = 5,      // 64-bit float
+    F128 = 6,     // 80 (x86) or 128-bit (other architectures) float
+    VOID = 7,     // void type
+    PTR = 8,      // pointer (untyped)
+    TN_COUNT = 9, // # entries in this enum
+  };
+
+private:
+  /// Helper function to convert from llvm types to VarTypes
+  static int type_to_vartype(llvm::Type *t);
+
+  /// Signatures of the tm_load_* functions
+  llvm::Function *loads[VarTypes::VT_COUNT];
+
+  /// Signatures of the tm_store_* functions
+  llvm::Function *stores[VarTypes::VT_COUNT];
+
+  /// Signatures of the other tm instrumentation functions
+  llvm::Function *funcs[FuncNames::FN_COUNT];
+
+  /// Commonly-used types
+  llvm::Type *types[TypeNames::TN_COUNT];
+
+public:
+  /// Initialize the signatures object by creating Type* objects that can be
+  /// reused throughout the plugin, and by inserting Function*
+  /// declarations into the Module for any TM library function we might ever
+  /// call.
+  void init(llvm::Module &M);
+
+  /// Get the appropriate tm_load_* function for the provided type
+  llvm::Function *get_load(llvm::Type *type) {
+    return (type_to_vartype(type) != -1) ? loads[type_to_vartype(type)]
+                                         : nullptr;
+  }
+
+  /// Get the appropriate tm_store_* function for the provided type
+  llvm::Function *get_store(llvm::Type *type) {
+    return (type_to_vartype(type) != -1) ? stores[type_to_vartype(type)]
+                                         : nullptr;
+  }
+
+  /// Get an instrumented function
+  llvm::Function *get_func(FuncNames f) { return funcs[f]; }
+
+  /// Get a type
+  llvm::Type *get_type(TypeNames t) { return types[t]; }
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/tm_plugin.cc b/artifact/policies/xSTM/plugin/plugin/tm_plugin.cc
new file mode 100644
index 0000000..06d7968
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/tm_plugin.cc
@@ -0,0 +1,71 @@
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassPlugin.h"
+
+#include "../../common/tm_defines.h"
+
+#include "tm_plugin.h"
+
+using namespace llvm;
+
+PreservedAnalyses tm_plugin::run(Module &M, ModuleAnalysisManager &) {
+  // Initialize the signatures object and attach annotations to functions.
+  sigs.init(M);
+  attach_annotations_to_functions(M);
+
+  populate_purelist(M);
+
+  // Discover any RAII regions and any functions reachable from an RAII region
+  discover_raii_lite(M);
+  discover_reachable_raii_lite(M); // Must do this before discover_reachable!
+
+  // Discover annotated functions, functions that start transactions in the
+  // lambda and C apis, and annotated constructors.
+  discover_annotated_funcs(M);
+  discover_capi_funcs(M);
+  discover_lambda_funcs(M);
+  discover_constructor(M);
+
+  // Find all functions reachable from any of the above roots
+  discover_reachable_funcs();
+
+  // Cloning functions and instrument clone bodies
+  create_clones();
+  instrument_function_bodies();
+
+  // Boundary instrumentation for lambda and C apis
+  convert_region_begin_c_api(M);
+  convert_lambdas_cxx_api(M);
+
+  // Boundary and scoped body instrumentation for RAII API
+  instrument_regions_raii_lite(M);
+
+  // Optimizations
+  optimize_unsafe(M);
+
+  // Add a static initializer to the module, so that the run-time system
+  // can see the function-to-clone mapping
+  create_runtime_mappings(M);
+
+  return PreservedAnalyses::none();
+}
+
+PassPluginLibraryInfo getPluginInfo() {
+  return {LLVM_PLUGIN_API_VERSION, "tm_plugin", LLVM_VERSION_STRING,
+          [](PassBuilder &PB) {
+            PB.registerPipelineParsingCallback(
+                [](StringRef Name, ModulePassManager &MPM,
+                   ArrayRef<PassBuilder::PipelineElement>) {
+                  if (Name == "tm_plugin") {
+                    MPM.addPass(tm_plugin());
+                    return true;
+                  }
+                  return false;
+                });
+          }};
+}
+
+extern "C" LLVM_ATTRIBUTE_WEAK ::PassPluginLibraryInfo llvmGetPassPluginInfo() {
+  return getPluginInfo();
+}
diff --git a/artifact/policies/xSTM/plugin/plugin/tm_plugin.h b/artifact/policies/xSTM/plugin/plugin/tm_plugin.h
new file mode 100644
index 0000000..25b82ff
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/tm_plugin.h
@@ -0,0 +1,174 @@
+#pragma once
+
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+#include "signatures.h"
+#include "types.h"
+
+/// tm_plugin is a pass over modules that provides support for transactional
+/// memory. It supports three APIs:
+///   -- A lambda-based API, in which transactional instrumentation is at the
+///      granularity of function bodies.
+///   -- A legacy C API, also at the granularity of functions, that passes
+///      function pointers and opaque argument pointers to a library.
+///   -- An RAII-based API, in which the lifetime of a special "TX" object
+///      dictates the lexical scope that requires instrumentation.
+///
+/// NB: This particular implementation uses a "lite" version of the RAII API. In
+///     this version, we do not care about using HTM/CGL, nor do we care about
+///     efficient irrevocability.  Thus we can simply instrument the RAII
+///     regions, without cloning them.
+class tm_plugin : public llvm::PassInfoMixin<tm_plugin> {
+  /// All of the signatures for types and functions used by the plugin
+  signatures sigs;
+
+  /// Data required for the simplified RAII mechanism
+  std::unordered_map<llvm::Function *, std::set<llvm::BasicBlock *> *>
+      raii_lite_state;
+
+  /// a worklist for managing the functions that require instrumentation.  These
+  /// could be reachable from the RAII API, or could be discovered through the
+  /// lambda or legacy API
+  std::queue<llvm::Function *> func_worklist;
+
+  /// The set of functions that need instrumentation. This set includes all
+  /// of the functions that were explicitly annotated by the programmer, and all
+  /// functions reachable from region launch points and annotated functions
+  ///
+  /// TODO: this is not a useful name
+  std::unordered_map<llvm::Function *, function_features> functions;
+
+  /// a list of all our suspected lambdas, since they require extra attention
+  ///
+  /// TODO: Are these the functions that we think are lambda bodes?
+  ///
+  /// TODO: The use of a vector introduces O(N) lookup overhead later in the
+  ///       code
+  std::vector<llvm::Function *> lambdas;
+
+  /// a list of functions that have the tm_pure attribute
+  std::vector<llvm::Function *> purelist;
+
+  /// a list of functions that use the tm_rename attribute
+  std::unordered_map<llvm::Function *, llvm::Function *> renamelist;
+
+  /// A lookup function for finding things in the functions map
+  llvm::Function *get_clone(llvm::Function *input_function) {
+    auto function = functions.find(input_function);
+    if (function != functions.end())
+      return function->second.clone;
+    else
+      return nullptr;
+  }
+
+  /// Discovery: find all annotations in the Module and attach them to the
+  /// corresponding functions
+  void attach_annotations_to_functions(llvm::Module &M);
+
+  /// Discovery: populate the pure list with things we know are pure
+  void populate_purelist(llvm::Module &M);
+
+  /// Discovery: find the functions in the module that are annotated, and put
+  /// them into the worklist
+  void discover_annotated_funcs(llvm::Module &M);
+
+  /// Discovery: find the functions that are reached via TM_EXECUTE_C, and put
+  /// them into the worklist
+  void discover_capi_funcs(llvm::Module &M);
+
+  /// Discovery: Find the lambda-based executions, and put them in the worklist
+  /// and lambdas list
+  void discover_lambda_funcs(llvm::Module &M);
+
+  /// Discovery: Find all basic blocks that need instrumentation according to
+  /// the RAII API, and put them in raii_regions
+  void discover_raii_lite(llvm::Module &M);
+
+  /// Discovery: Find the annotated constructors, and put them in the worklist
+  void discover_constructor(llvm::Module &M);
+
+  /// Discovery: Process the worklist until it is empty, in order to find all
+  /// reachable functions.
+  void discover_reachable_funcs();
+
+  /// Discovery: Process the raii_regions list to find reachable functions.
+  void discover_reachable_raii_lite(llvm::Module &M);
+
+  /// Cloning: clone all functions in the worklist
+  void create_clones();
+
+  /// Function Instrumentation: Instrument the instrutions within an RAII region
+  void instrument_regions_raii_lite(llvm::Module &M);
+
+  /// Function Instrumentation: helper to transform a callsite to use a clone
+  llvm::Instruction *transform_callsite(llvm::CallBase *callsite,
+                                        llvm::BasicBlock::iterator inst);
+
+  /// Function Instrumentation: helper to replace a call instruction with a new
+  /// instruction
+  llvm::Instruction *create_callinst(llvm::CallBase *callsite,
+                                     llvm::BasicBlock::iterator inst,
+                                     llvm::Value *val, llvm::Value *orig_val);
+
+  /// Function Instrumentation: helper to replace an invoke instruction with a
+  /// new instruction
+  llvm::Instruction *create_invokeinst(llvm::CallBase *callsite,
+                                       llvm::BasicBlock::iterator inst,
+                                       llvm::Value *val, llvm::Value *orig_val);
+
+  /// Function Instrumentation: helper to insert a call to TM_UNSAFE before the
+  /// provided instruction
+  void prefix_with_unsafe(llvm::BasicBlock::iterator inst);
+
+  /// Function Instrumentation: helper to try to convert a store instruction
+  /// into a tm_store
+  llvm::CallInst *convert_store(llvm::StoreInst *store);
+
+  /// Function Instrumentation: helper to try to convert a load instruction into
+  /// a tm_load
+  llvm::Instruction *convert_load(llvm::LoadInst *load);
+
+  /// Function Instrumentation: helper to try to convert a intrinsic instruction
+  /// into a safe call
+  void convert_intrinsics(llvm::Function *callee,
+                          llvm::BasicBlock::iterator inst);
+
+  /// Function Instrumentation: Main routine for function body transformation
+  void instrument_function_bodies();
+
+  /// A helper for instrument_function_bodies
+  void instrument_bb(llvm::BasicBlock *bb,
+                     llvm::SmallVector<llvm::Instruction *, 8> &skips);
+
+  /// Boundary Instrumentation: transform region start commands that use the C
+  /// API
+  void convert_region_begin_c_api(llvm::Module &M);
+
+  /// Boundary Instrumentation: transform the code inside of lambdas
+  void convert_lambdas_cxx_api(llvm::Module &M);
+
+  /// Run-time Mappings: add the static initializer that tells the runtime
+  /// library about function-to-clone mappings
+  void create_runtime_mappings(llvm::Module &M);
+
+  /// Optimization: remove extra unsafe function calls from a basic block
+  void optimize_unsafe(llvm::Module &M);
+
+public:
+  /// Constructor: call the super's constructor and set up the plugin
+  tm_plugin() {}
+
+  /// Instrument a module
+  ///
+  /// @param M The module to instrument
+  /// @param _ The module analysis manager, unused in this case
+  ///
+  /// @return Information about which analyses were preserved.  Typically none.
+  llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &);
+};
\ No newline at end of file
diff --git a/artifact/policies/xSTM/plugin/plugin/types.h b/artifact/policies/xSTM/plugin/plugin/types.h
new file mode 100644
index 0000000..c90c1cf
--- /dev/null
+++ b/artifact/policies/xSTM/plugin/plugin/types.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <vector>
+
+#include "llvm/IR/Function.h"
+
+/// function_features is a packet of information that we associate with each
+/// function that is reachable from an instrumented region.
+struct function_features {
+  /// The original function
+  llvm::Function *orig = nullptr;
+
+  /// The clone of the original function
+  llvm::Function *clone = nullptr;
+
+  /// Is orig a lambda?  If so, we must do some work to it, too...
+  bool orig_lambda = false;
+};
\ No newline at end of file
diff --git a/artifact/scripts/.gitignore b/artifact/scripts/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/artifact/scripts/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/artifact/scripts/ChartCfg.py b/artifact/scripts/ChartCfg.py
new file mode 100644
index 0000000..9552be9
--- /dev/null
+++ b/artifact/scripts/ChartCfg.py
@@ -0,0 +1,17 @@
+#
+# This file defines styles for charts
+#
+
+import Types
+
+LineCfg = Types.LineCfg
+
+# Color and glyph configuration
+lineStyles = {"red": LineCfg("r", "solid", "o"),
+              "blue": LineCfg("b", "solid", "v"),
+              "green": LineCfg("g", "solid", "D"),
+              "cyan": LineCfg("c", "solid", "s"),
+              "magenta": LineCfg("m", "solid", "P"),
+              "yellow": LineCfg("y", "solid", "X"),
+              "black": LineCfg("k", "solid", "*"),
+              "gray": LineCfg("#6E6E6E", "solid", "^")}
diff --git a/artifact/scripts/ExpCfg.py b/artifact/scripts/ExpCfg.py
new file mode 100644
index 0000000..cffd138
--- /dev/null
+++ b/artifact/scripts/ExpCfg.py
@@ -0,0 +1,100 @@
+#
+# This file defines the executables for each data structure, the configuration
+# of each data structure, and the rules for how to run experiments.
+#
+
+import Types
+
+DsCfg = Types.DsCfg
+ExeCfg = Types.ExeCfg
+ExpCfg = Types.ExpCfg
+
+# Common configuration rules for a data structure (bucket size, chunk size,
+# resize threshold, snapshot frequency, max levels, and name)
+dsRules = {"list_default": DsCfg(4, 8, 8, 3, 32, "list_default"),
+           "list_nosnap": DsCfg(4, 8, 8, 65536, 32, "list_nosnap"),
+           "skiplist_default": DsCfg(4, 8, 8, 33, 32, "skiplist_default"),
+           "umap_default": DsCfg(262144, 8, 65536, 33, 32, "umap_default"),
+           "bst_default": DsCfg(4, 8, 8, 33, 32, "tree_default"),
+           "skipvector_default":DsCfg(4,8,8,32,4,"skipvector_default"),
+           "rbt_default": DsCfg(4, 8, 8, 33, 32, "rbtree_default")
+           }
+
+# Paths to executables, and their printable names
+exeNames = {
+    # Baseline
+    "base_lazylist": ExeCfg("baseline/obj64/lazylist_omap.exe", "base_lazylist"),
+    "base_ebst": ExeCfg("baseline/obj64/ebst_ticket_omap.exe", "base_ebst"),
+    "base_caumap": ExeCfg("baseline/obj64/lazylist_caumap.exe", "base_caumap"),
+    "base_skiplist": ExeCfg("baseline/obj64/lfskiplist_omap.exe", "base_skiplist"),
+    "base_ibst": ExeCfg("baseline/obj64/ibst_pathcas_omap.exe", "base_ibst"),
+    "base_ibbst": ExeCfg("baseline/obj64/iavl_pathcas_omap.exe", "base_ibbst"),
+
+    # xSTM (NB: there are many more that we don't currently test)
+    "xstm_ibst": ExeCfg("xSTM/obj64/ibst_omap.exo_eager_c1_q.exe", "xstm_ibst_ee1q"),
+    "xstm_ibst_tiny": ExeCfg("xSTM/obj64/ibst_omap.orec_gv1_eager_c2_q.exe", "xstm_ibst_o1e2"),
+
+    # handSTM (NB: there are many more that we don't currently test)
+    "handstm_ibst": ExeCfg("handSTM/obj64/ibst_omap.eager_c1_po.exe", "handstm_ibst_ee1o"),
+    "handstm_slist": ExeCfg("handSTM/obj64/slist_omap.eager_c1_po.exe", "handstm_slist_ee1o"),
+    "handstm_caumap": ExeCfg("handSTM/obj64/dlist_caumap.eager_c1_po.exe", "handstm_dcaumap_ee1o"),
+    "handstm_carumap": ExeCfg("handSTM/obj64/dlist_carumap.eager_c1_po.exe", "handstm_dcarumap_ee1o"),
+    "handstm_skiplist": ExeCfg("handSTM/obj64/skiplist_omap_bigtx.eager_c1_po.exe", "handstm_skiplist_bigtx_ee1o"),
+    "handstm_irbtree": ExeCfg("handSTM/obj64/rbtree_omap.eager_c1_po.exe", "handstm_rbtree_ee1o"),
+
+    # Hybrid
+    "hybrid_irbtree": ExeCfg("hybrid/obj64/rbtree_omap_drop.lazy_po.exe", "hybrid_rbtree_lzpo"),
+    "hybrid_carumap": ExeCfg("hybrid/obj64/dlist_carumap.lazy_po.exe", "hybrid_carumap_lzpo"),
+
+    # STMCAS (NB: there are many more that we don't currently test)
+    "stmcas_ibst": ExeCfg("STMCAS/obj64/ibst_omap.stmcas_po.exe", "stmcas_ibst"),
+    "stmcas_ibst_ps": ExeCfg("STMCAS/obj64/ibst_omap.stmcas_ps.exe", "stmcas_ibst_ps"),
+    "stmcas_dlist": ExeCfg("STMCAS/obj64/dlist_opt_omap.stmcas_po.exe", "stmcas_dlist"),
+    "stmcas_dlist_noopt": ExeCfg("STMCAS/obj64/dlist_omap.stmcas_po.exe", "stmcas_dlist_noopt"),
+    "stmcas_slist": ExeCfg("STMCAS/obj64/slist_omap.stmcas_po.exe", "stmcas_slist"),
+    "stmcas_slist_ps": ExeCfg("STMCAS/obj64/slist_omap.stmcas_ps.exe", "stmcas_slist_ps"),
+    "stmcas_slist_noopt": ExeCfg("STMCAS/obj64/slist_omap.stmcas_po.exe", "stmcas_slist_noopt"),
+    "stmcas_caumap": ExeCfg("STMCAS/obj64/dlist_opt_caumap.stmcas_po.exe", "stmcas_dcaumap"),
+    "stmcas_caumap_noopt": ExeCfg("STMCAS/obj64/dlist_caumap.stmcas_po.exe", "stmcas_dcaumap_noopt"),
+    "stmcas_caumap_slist": ExeCfg("STMCAS/obj64/slist_opt_caumap.stmcas_po.exe", "stmcas_dcaumap_noopt"),
+    "stmcas_carumap": ExeCfg("STMCAS/obj64/dlist_carumap.stmcas_po.exe", "stmcas_dcarumap"),
+    "stmcas_skiplist_cached": ExeCfg("STMCAS/obj64/skiplist_cached_opt_omap.stmcas_po.exe", "stmcas_skiplist_cached"),
+    "stmcas_irbtree_po":ExeCfg("STMCAS/obj64/rbtree_omap.stmcas_po.exe", "stmcas_irbtree_po"),
+}
+
+# Rules for running the trials of an experiment.  We start with a few constants:
+threads = [1, 12, 24, 48, 96]  # thread counts to test
+seconds = 5  # seconds per experiment
+fillThreads = 1  # how many threads should pre-fill the data structure
+trials = 10  # number of trials to average
+machine = 'mario'  # A mnemonic for the machine where tests are being run
+
+# Configuration settings for experiments.  We primarily parameterize on key
+# range and lookup ratio.  However, we need special configurations for the tree
+# tests, because unbalanced BST warm-up needs to be in random order, whereas
+# everything else is fine with increasing order.
+expConfigs = {
+    # Everything except trees can use these
+    "size64_r0": ExpCfg(seconds, threads, 0, fillThreads, trials, 64, 0, machine, "size64_r0"),
+    "size64_r80": ExpCfg(seconds, threads, 0, fillThreads, trials, 64, 80, machine, "size64_r80"),
+    "size256_r0": ExpCfg(seconds, threads, 0, fillThreads, trials, 256, 0, machine, "size256_r0"),
+    "size256_r80": ExpCfg(seconds, threads, 0, fillThreads, trials, 256, 80, machine, "size256_r80"),
+    "size1K_r0": ExpCfg(seconds, threads, 0, fillThreads, trials, 1024, 0, machine, "size1K_r0"),
+    "size1K_r80": ExpCfg(seconds, threads, 0, fillThreads, trials, 1024, 80, machine, "size1K_r80"),
+    "size64K_r0": ExpCfg(seconds, threads, 0, fillThreads, trials, 65536, 0, machine, "size64K_r0"),
+    "size64K_r80": ExpCfg(seconds, threads, 0, fillThreads, trials, 65536, 80, machine, "size64K_r80"),
+    "size1M_r0": ExpCfg(seconds, threads, 0, fillThreads, trials, 1048576, 0, machine, "size1M_r0"),
+    "size1M_r80": ExpCfg(seconds, threads, 0, fillThreads, trials, 1048576, 80, machine, "size1M_r80"),
+    "size1M_r100": ExpCfg(seconds, threads, 0, fillThreads, trials, 1048576, 100, machine, "size1M_r100"),
+    # Trees must use these
+    "size64_r0_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 64, 0, machine, "size64_r0_tree"),
+    "size64_r80_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 64, 80, machine, "size64_r80_tree"),
+    "size256_r0_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 256, 0, machine, "size256_r0_tree"),
+    "size256_r80_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 256, 80, machine, "size256_r80_tree"),
+    "size1K_r0_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 1024, 0, machine, "size1K_r0_tree"),
+    "size1K_r80_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 1024, 80, machine, "size1K_r80_tree"),
+    "size64K_r0_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 65536, 0, machine, "size64K_r0_tree"),
+    "size64K_r80_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 65536, 80, machine, "size64K_r80_tree"),
+    "size1M_r0_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 1048576, 0, machine, "size1M_r0_tree"),
+    "size1M_r80_tree": ExpCfg(seconds, threads, 1, fillThreads, trials, 1048576, 80, machine, "size1M_r80_tree"),
+}
diff --git a/artifact/scripts/GetData.py b/artifact/scripts/GetData.py
new file mode 100644
index 0000000..9238a57
--- /dev/null
+++ b/artifact/scripts/GetData.py
@@ -0,0 +1,49 @@
+import os
+import json
+
+import Types
+import Util
+
+
+def getData(chart: Types.ChartCfg, dataFolder: str, ubenchFolder: str):
+    """
+    Get the data needed in order to produce `chart`
+    """
+    # Ensure the data folder exists
+    Util.makeDir(dataFolder)
+
+    print("collecting data for chart " + chart.name)
+
+    # `chart` consists of a bunch of curves, each of which consists of averages
+    # of multiple data points for a {executable, config} pair.  For each curve,
+    # we need to make sure our data is fresher than the executable's last
+    # modified time.
+    for curve in chart.curves:
+        # Fail if the executable cannot be found, otherwise log its time
+        exe_path = r'%s%s' % (ubenchFolder, curve.exeCfg.exePath)
+        if not os.path.exists(exe_path):
+            print('%s doesn\'t exist, make it before retry' % exe_path)
+            exit()
+        exe_time = os.path.getmtime(exe_path)
+
+        # For each trial, we're going to need to run the experiment for each
+        # thread count
+        for trial in range(chart.expCfg.trials):
+            for thread in chart.expCfg.threads:
+                print(".", end="", flush=True)
+                print("exe_name: %s, thread: %d, trial: %d" % (curve.exeCfg.name, thread, trial))
+                # We produce one file for each run of the executable, and we use all
+                # of the mnemonics to produce the file name.  Make sure the trial
+                # number is 1-based.
+                file_name = Util.makeDataFileName(
+                    curve, chart, thread, (trial+1))
+                # If the file exists and is newer than the exe time, don't re-run the experiment
+                file_path = Util.makeDataFilePath(dataFolder, file_name)
+                if not os.path.exists(file_path) or os.path.getmtime(file_path) < exe_time:
+                    cmd = Util.makeExeName(exe_path, chart, curve, thread)
+                    # Given the `-Q` flag, the entire output of a successful run
+                    # will be a single number.  Get it and put it in the file.
+                    result = os.popen(cmd).readline()
+                    with open(file_path, 'w') as f:
+                        json.dump(float(result), f)
+    print("")
diff --git a/artifact/scripts/MakeChart.py b/artifact/scripts/MakeChart.py
new file mode 100644
index 0000000..025dc63
--- /dev/null
+++ b/artifact/scripts/MakeChart.py
@@ -0,0 +1,128 @@
+import numpy as np
+import matplotlib.ticker as ticker
+from matplotlib.ticker import FuncFormatter
+import matplotlib
+import json
+import functools
+import matplotlib.pyplot as plt
+import Types
+import Util
+
+
+def makeChart(chart: Types.ChartCfg, dataFolder: str, chartFolder: str, errbar: bool):
+    """
+    Make the chart described by `chart`
+    """
+    
+    matplotlib.rcParams.update({'font.size': 18}) # change font size
+
+    formatnum_n = []
+    for i in range(0, 10):
+        def make_formatnum(i=i):
+            def formatnum(x, pos):
+                return '$%.1f$x$10^{%i}$' % (x/(10**i), i)
+            return formatnum
+        formatnum_n.append(make_formatnum())
+    chart_conf = {}
+    chart_conf['list_64_wo'] = [FuncFormatter(formatnum_n[7]), (5.6,0.5), 4]
+    chart_conf['list_64'] = [FuncFormatter(formatnum_n[7]), (5.6,0.5), 4]
+    chart_conf['list_1K_wo'] = [FuncFormatter(formatnum_n[7]), (6,0.5), 4]
+    chart_conf['list_1K'] = [FuncFormatter(formatnum_n[7]), (6,0.5), 4]
+    chart_conf['sl_64K_wo'] = [FuncFormatter(formatnum_n[8]), (3,0.5), 4]
+    chart_conf['sl_64K'] = [FuncFormatter(formatnum_n[8]), (3,0.5), 4]
+    chart_conf['sl_1M_wo'] = [FuncFormatter(formatnum_n[8]), (3,0.5), 4]
+    chart_conf['sl_1M'] = [FuncFormatter(formatnum_n[8]), (3,0.5), 4]
+    chart_conf['umap_1M_wo'] = [FuncFormatter(formatnum_n[8]), (4.5,0.5), 3]
+    chart_conf['umap_1M'] = [FuncFormatter(formatnum_n[8]), (4.5,0.5), 3]
+    chart_conf['bst_64K_wo'] = [FuncFormatter(formatnum_n[8]), (4.5,0.5), 4]
+    chart_conf['bst_64K'] = [FuncFormatter(formatnum_n[8]), (4.5,0.5), 4]
+    chart_conf['bst_1M_wo'] = [FuncFormatter(formatnum_n[8]), (4.5,0.5), 4]
+    chart_conf['bst_1M'] = [FuncFormatter(formatnum_n[8]), (4.5,0.5), 4]
+    chart_conf['bbst_64K_wo'] = [FuncFormatter(formatnum_n[8]), (4.2,0.5), 4]
+    chart_conf['bbst_64K'] = [FuncFormatter(formatnum_n[8]), (4.2,0.5), 4]
+    chart_conf['bbst_1M_wo'] = [FuncFormatter(formatnum_n[8]), (4.2,0.5), 4]
+    chart_conf['bbst_1M'] = [FuncFormatter(formatnum_n[8]), (4.2,0.5), 4]
+    # Ensure the chart folder exists
+    Util.makeDir(chartFolder)
+
+    # fetch data from files, process it into throughput and variance
+    throughput = {}
+    variance = {}
+    for curve in chart.curves:
+        throughput[curve] = []
+        variance[curve] = []
+        for thread in chart.expCfg.threads:
+            throughput_samples = []
+            for trial in range(chart.expCfg.trials):
+                # open the file, read it
+                file_name = Util.makeDataFileName(
+                    curve, chart, thread, (trial+1))
+                file_path = Util.makeDataFilePath(dataFolder, file_name)
+                with open(file_path, 'r') as f:
+                    throughput_samples.append(json.load(f))
+            throughput[curve].append(np.mean(throughput_samples))
+            variance[curve].append(np.std(throughput_samples, ddof=1))
+
+    # Set up the chart object (no curves yet)
+
+    # compute domain and range, set up the axes
+    width = chart.expCfg.threads[-1] + 1
+    height = functools.reduce(lambda a, b: max(a, b), map(
+        lambda x: max(x), list(throughput.values()))) * 1.2
+    plt.axis([0, width, 0, height])
+    ax = plt.gca()
+    ax.grid(True, linestyle='--')
+    ax.xaxis.set_major_locator(ticker.AutoLocator())
+    ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
+
+    ax.yaxis.set_major_formatter(chart_conf[chart.name][0])
+    ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
+    # plt.ticklabel_format(style='sci', axis='y',
+    #                      scilimits=(0, 0), useMathText=True)
+
+    # Start plotting throughput, possibly with error bars
+    for curve_throughput in throughput.items():
+        # Note that we're pretending that the x axis is 1, 2, 3, ..., and we
+        # override the labels with the real thread count.
+        if errbar == False:
+            plt.plot(
+                chart.expCfg.threads, curve_throughput[1],
+                color=curve_throughput[0].lineCfg.color,
+                linestyle=curve_throughput[0].lineCfg.linestyle,
+                marker=curve_throughput[0].lineCfg.marker,
+                label=curve_throughput[0].label,
+                markersize=6)
+        else:
+            plt.errorbar(
+                chart.expCfg.threads, curve_throughput[1], variance[curve_throughput[0]],
+                color=curve_throughput[0].lineCfg.color,
+                linestyle=curve_throughput[0].lineCfg.linestyle,
+                marker=curve_throughput[0].lineCfg.marker,
+                label=curve_throughput[0].label)
+
+    # Set the axis labels
+    plt.xlabel(chart.xLabel)
+    plt.ylabel(chart.yLabel)
+    plt.tight_layout()
+
+    # Save the chart
+    if errbar:
+        plt.savefig(r"%s%s_variance.png" % (chartFolder, chart.name))
+    else:
+        plt.savefig(r"%s%s.png" % (chartFolder, chart.name))
+
+    # Add a legend, extract it, and save it to a legend file
+    matplotlib.rcParams.update({'font.size': 8})
+    fig_leg = plt.figure(figsize = chart_conf[chart.name][1])
+    ax_leg = fig_leg.add_subplot(111)
+    col_num = chart_conf[chart.name][2]#len(chart.curves)
+    # if len(chart.curves) >= 4:
+    #     col_num /= 2
+    ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol = col_num)
+    ax_leg.axis('off')
+    if errbar:
+        fig_leg.savefig(r"%s%s_variance_legend.png" %
+                        (chartFolder, chart.name))
+    else:
+        fig_leg.savefig(r"%s%s_legend.png" % (chartFolder, chart.name))
+    plt.close('all')
diff --git a/artifact/scripts/Makefile b/artifact/scripts/Makefile
new file mode 100644
index 0000000..4abf710
--- /dev/null
+++ b/artifact/scripts/Makefile
@@ -0,0 +1,7 @@
+all:
+		python3 Runner.py
+
+clean:
+		rm -rf charts
+		rm -rf data
+		rm -rf variance
\ No newline at end of file
diff --git a/artifact/scripts/README.md b/artifact/scripts/README.md
new file mode 100644
index 0000000..94ad02b
--- /dev/null
+++ b/artifact/scripts/README.md
@@ -0,0 +1,44 @@
+# Scripts
+
+This folder contains Python scripts for running a standard set of microbenchmark
+experiments and then producing charts to summarize the results.
+
+The scripts are mostly declarative.  See below for details.
+
+## Files
+
+The first category of files are those that declare the experiments to run. These
+culminate in `Targets.py`.  If one wishes to produce different charts, then it
+will be necessary to change some of the declarations in these files.
+
+* `Types.py`: A set of types that we use to describe the various components of
+  an experiment / chart, such as an executable file, a data structure, an
+  experiment's command-line arguments, how a line should look, chart-wide
+  formatting, a chart curves, and a chart's components.
+* `ExpCfg.py`: Instances of various `Types` that define the executables to test,
+  the data structure configurations to test, and how to conduct each trial.
+* `ChartCfg.py`: Instances of various `Types` that define the formatting and
+  appearance of chart lines and axes.
+* `Targets.py`: Combinations of the above instances into an object that
+  describes all of the experiments to run and charts to build.
+
+The second category of files are those that define the functions for running
+experiments and producing charts.
+
+* `GetData.py`: A function for getting all the data needed for a chart.  Each
+  experiment result goes in its own file.  The function only runs experiments
+  for which either (a) there is no data file, or (b) the data file is older than
+  the executable.
+* `MakeChart.py`: A function for making a chart file.  It is parameterized based
+  on whether we want error bars or not.
+
+Finally, there is a script called `Runner.py`, which uses the above functions
+with the `all_targets` object from `Targets.py` to conduct experiments and build
+charts.  Note that at the present time, `Runner.py` hard-codes paths to
+executables and to output folders.  This means it must be run from the `scripts`
+folder (i.e., `python3 Runner.py`).
+
+Please note that the results of experiments will be saved to files, and
+experiments will not be re-run if the corresponding file exists.  This is good
+when reformatting charts, but please be sure to `make clean` if you wish to
+re-run experiments.
\ No newline at end of file
diff --git a/artifact/scripts/Runner.py b/artifact/scripts/Runner.py
new file mode 100644
index 0000000..e3e9784
--- /dev/null
+++ b/artifact/scripts/Runner.py
@@ -0,0 +1,14 @@
+import Targets
+import GetData
+import MakeChart
+
+dataFolder = "./data/"          # Destination folder for data files
+chartFolder = "./charts/"       # Destination folder for charts w/o error bars
+varianceFolder = "./variance/"  # Destination folder for charts w/ error bars
+ubenchFolder = "../ubench/"     # Source folder for benchmark executables
+
+# Now that the paths are set, we can make all the charts
+for c in Targets.all_targets:
+    GetData.getData(c, dataFolder, ubenchFolder)
+    MakeChart.makeChart(c, dataFolder, chartFolder, False)
+    MakeChart.makeChart(c, dataFolder, varianceFolder, True)
diff --git a/artifact/scripts/Targets.py b/artifact/scripts/Targets.py
new file mode 100644
index 0000000..71bc0f0
--- /dev/null
+++ b/artifact/scripts/Targets.py
@@ -0,0 +1,167 @@
+import Types
+import ExpCfg
+import ChartCfg
+
+#
+# A bunch of globals that culminate in a declaration of `all_targets`
+#
+
+Chart = Types.ChartCfg
+Curve = Types.CurveCfg
+exeNames = ExpCfg.exeNames
+dsRules = ExpCfg.dsRules
+lineStyles = ChartCfg.lineStyles
+
+# Curves for the linked list charts with key range 1K
+list_1K_curves = [
+    Curve(exeNames["base_lazylist"], dsRules["list_default"],
+          lineStyles["red"], "Lazy List"),
+    Curve(exeNames["handstm_slist"], dsRules["list_default"],
+          lineStyles["green"], "handSTM"),
+    Curve(exeNames["stmcas_slist"], dsRules["list_default"],
+          lineStyles["blue"], "STMCAS"),
+    Curve(exeNames["stmcas_slist_noopt"], dsRules["list_default"],
+          lineStyles["magenta"], "STMCAS (noopt)"),
+    Curve(exeNames["stmcas_slist"], dsRules["list_nosnap"],
+          lineStyles["gray"], "STMCAS (nosnap)"),
+    Curve(exeNames["stmcas_dlist"], dsRules["list_default"],
+          lineStyles["black"], "STMCAS (dlist)"),
+    Curve(exeNames["stmcas_dlist_noopt"], dsRules["list_default"],
+          lineStyles["cyan"], "STMCAS (dlist noopt)"),
+    Curve(exeNames["stmcas_dlist"], dsRules["list_nosnap"],
+          lineStyles["yellow"], "STMCAS (dlist nosnap)")
+]
+
+# Curves for the linked list charts with key range 64
+list_64_curves = [
+    Curve(exeNames["base_lazylist"], dsRules["list_default"],
+          lineStyles["red"], "Lazy List"),
+    Curve(exeNames["handstm_slist"], dsRules["list_default"],
+          lineStyles["green"], "handSTM"),
+    Curve(exeNames["stmcas_slist"], dsRules["list_default"],
+          lineStyles["blue"], "STMCAS"),
+    Curve(exeNames["stmcas_slist"], dsRules["list_nosnap"],
+          lineStyles["magenta"], "STMCAS (nosnap)"),
+    Curve(exeNames["stmcas_slist_ps"], dsRules["list_default"],
+          lineStyles["black"], "STMCAS (ps)"),
+    Curve(exeNames["stmcas_slist_ps"], dsRules["list_nosnap"],
+          lineStyles["cyan"], "STMCAS (ps nosnap)")
+]
+
+# The four list charts (both key ranges, two lookup ratios)
+list_64 = Chart(
+    list_64_curves, ExpCfg.expConfigs["size64_r80"], "Threads", "Operations/Second", "list_64")
+list_64_wo = Chart(
+    list_64_curves, ExpCfg.expConfigs["size64_r0"], "Threads", "Operations/Second", "list_64_wo")
+list_1K = Chart(
+    list_1K_curves, ExpCfg.expConfigs["size1K_r80"], "Threads", "Operations/Second", "list_1K")
+list_1K_wo = Chart(
+    list_1K_curves, ExpCfg.expConfigs["size1K_r0"], "Threads", "Operations/Second", "list_1K_wo")
+
+# Curves for all skiplist charts
+skiplist_curves = [
+    Curve(exeNames["base_skiplist"], dsRules["skiplist_default"],
+          lineStyles["red"], "fraser"),
+    Curve(exeNames["handstm_skiplist"], dsRules["skiplist_default"],
+          lineStyles["green"], "handSTM"),
+
+    Curve(exeNames["stmcas_skiplist_cached"], dsRules["skiplist_default"],
+          lineStyles["black"], "STMCAS"),
+]
+
+# The four skiplist charts (two key ranges, two lookup ratios)
+sl_64K = Chart(skiplist_curves,
+               ExpCfg.expConfigs["size64K_r80"], "Threads", "Operations/Second", "sl_64K")
+sl_64K_wo = Chart(
+    skiplist_curves, ExpCfg.expConfigs["size64K_r0"], "Threads", "Operations/Second", "sl_64K_wo")
+sl_1M = Chart(skiplist_curves,
+              ExpCfg.expConfigs["size1M_r80"], "Threads", "Operations/Second", "sl_1M")
+sl_1M_wo = Chart(
+    skiplist_curves, ExpCfg.expConfigs["size1M_r0"], "Threads", "Operations/Second", "sl_1M_wo")
+
+# Curves for all unordered map charts
+umap_curves = [
+    Curve(exeNames["base_caumap"], dsRules["umap_default"],
+          lineStyles["red"], "Lazy Hash"),
+    Curve(exeNames["handstm_caumap"], dsRules["umap_default"],
+          lineStyles["green"], "handSTM"),
+    Curve(exeNames["stmcas_caumap"], dsRules["umap_default"],
+          lineStyles["blue"], "STMCAS"),
+    Curve(exeNames["stmcas_caumap_slist"], dsRules["umap_default"],
+          lineStyles["black"], "STMCAS (slist)"),
+    Curve(exeNames["stmcas_caumap_noopt"], dsRules["umap_default"],
+          lineStyles["magenta"], "STMCAS (noopt)"),
+    Curve(exeNames["stmcas_carumap"], dsRules["umap_default"],
+          lineStyles["yellow"], "STMCAS (resizable)"),
+    Curve(exeNames["hybrid_carumap"], dsRules["umap_default"],
+          lineStyles["cyan"], "hybrid (resizable)"),
+    Curve(exeNames["handstm_carumap"], dsRules["umap_default"],
+          lineStyles["gray"], "handSTM (resizable)")
+]
+
+# The two umap charts (two lookup ratios, one key range)
+umap_1M = Chart(
+    umap_curves, ExpCfg.expConfigs["size1M_r80"], "Threads", "Operations/Second", "umap_1M")
+umap_1M_wo = Chart(
+    umap_curves, ExpCfg.expConfigs["size1M_r0"], "Threads", "Operations/Second", "umap_1M_wo")
+
+# Curves for all bst charts
+bst_curves = [
+    Curve(exeNames["base_ebst"], dsRules["bst_default"],
+          lineStyles["red"], "baseline"),
+    Curve(exeNames["base_ibst"], dsRules["bst_default"],
+          lineStyles["cyan"], "pathCAS"),
+    Curve(exeNames["handstm_ibst"], dsRules["bst_default"],
+          lineStyles["green"], "handSTM"),
+    Curve(exeNames["xstm_ibst"], dsRules["bst_default"],
+          lineStyles["yellow"], "xSTM"),
+    Curve(exeNames["xstm_ibst_tiny"], dsRules["bst_default"],
+          lineStyles["magenta"], "TinySTM"),
+    Curve(exeNames["stmcas_ibst"], dsRules["bst_default"],
+          lineStyles["blue"], "STMCAS"),
+    Curve(exeNames["stmcas_ibst_ps"], dsRules["bst_default"],
+          lineStyles["black"], "STMCAS (ps)"),
+]
+
+# The four bst charts (two key ranges, two lookup ratios)
+bst_64K = Chart(
+    bst_curves, ExpCfg.expConfigs["size64K_r80_tree"], "Threads", "Operations/Second", "bst_64K")
+bst_64K_wo = Chart(
+    bst_curves, ExpCfg.expConfigs["size64K_r0_tree"], "Threads", "Operations/Second", "bst_64K_wo")
+bst_1M = Chart(
+    bst_curves, ExpCfg.expConfigs["size1M_r80_tree"], "Threads", "Operations/Second", "bst_1M")
+bst_1M_wo = Chart(
+    bst_curves, ExpCfg.expConfigs["size1M_r0_tree"], "Threads", "Operations/Second", "bst_1M_wo")
+
+# Curves for all bbst charts
+bbst_curves = [
+    Curve(exeNames["base_ibbst"], dsRules["bst_default"],
+          lineStyles["red"], "pathcas_avl"),
+    Curve(exeNames["handstm_irbtree"], dsRules["bst_default"],
+          lineStyles["green"], "handSTM"),
+    Curve(exeNames["hybrid_irbtree"], dsRules["bst_default"],
+          lineStyles["yellow"], "hybrid"),
+    Curve(exeNames["stmcas_irbtree_po"], dsRules["bst_default"],
+          lineStyles["blue"], "STMCAS"),
+]
+
+# the four bbsts charts (two key ranges, two lookup ratios)
+bbst_64K = Chart(
+    bbst_curves, ExpCfg.expConfigs["size64K_r80_tree"], "Threads", "Operations/Second", "bbst_64K")
+bbst_64K_wo = Chart(
+    bbst_curves, ExpCfg.expConfigs["size64K_r0_tree"], "Threads", "Operations/Second", "bbst_64K_wo")
+bbst_1M = Chart(
+    bbst_curves, ExpCfg.expConfigs["size1M_r80_tree"], "Threads", "Operations/Second", "bbst_1M")
+bbst_1M_wo = Chart(
+    bbst_curves, ExpCfg.expConfigs["size1M_r0_tree"], "Threads", "Operations/Second", "bbst_1M_wo")
+
+# Now we can define `targets`, a complete description of all the data we need,
+# and how it should be grouped
+all_targets = [
+    list_64,  list_64_wo, list_1K, list_1K_wo,
+    sl_64K, sl_64K_wo, sl_1M, sl_1M_wo,
+    umap_1M, umap_1M_wo,
+    bst_64K, bst_64K_wo, bst_1M, bst_1M_wo,
+    bbst_64K, bbst_64K_wo, bbst_1M, bbst_1M_wo
+]
+# all_targets = [umap_1M_wo, umap_1M]
diff --git a/artifact/scripts/Types.py b/artifact/scripts/Types.py
new file mode 100644
index 0000000..c80fe47
--- /dev/null
+++ b/artifact/scripts/Types.py
@@ -0,0 +1,119 @@
+#
+# This file defines all the data types that we will use to describe the set of
+# experiments to run.
+#
+
+
+from typing import List
+
+
+class ExeCfg:
+    """
+    Description of a benchmark executable
+      exePath: The full path to the executable
+      name:    A name to associate with that executable (used in output files)
+    """
+
+    def __init__(self, exePath: str, name: str) -> None:
+        self.exePath = exePath
+        self.name = name
+
+
+class DsCfg:
+    """
+    Description of how to configure a data structure.  We expect this to be the
+    same for all data structures presented in a single chart, but we do not
+    strictly enforce that behavior, since there are situations where we might
+    want to present the same ExeCfg twice on the same chart, using different
+    DsCfgs.
+      bucketSize:   The initial config of bucket numbers
+      chunkSize:    The initial size of chunks
+      resizeThresh: The number of elements before a resize should happen
+      snapshotFreq: The frequency with which to take snapshots
+      maxLevels:    The maximum number of levels
+      name:         A name for this configuration (used in output files)
+    """
+
+    def __init__(self, bucketSize: int, chunkSize: int, resizeThresh: int, snapshotFreq: int, maxLevels: int, name: str):
+        self.bucketSize = bucketSize
+        self.chunkSize = chunkSize
+        self.resizeThresh = resizeThresh
+        self.snapshotFreq = snapshotFreq
+        self.maxLevels = maxLevels
+        self.name = name
+
+
+class ExpCfg:
+    """
+    Description of how to run an experiment.  All data structures presented in a
+    single chart should use the same ExpCfg.  In general, we wouldn't expect
+    much other than fillRand and fillThreads to differ even across charts.
+      seconds:     Number of seconds for which to run
+      threads:     Array of thread counts
+      fillRand:    Should the prefilling be done randomly (vs reverse order)
+      fillThreads: How many threads should do the prefilling?
+      trials:      How many trials to run
+      keyRange:    The range for keys
+      lookupRatio: The percentage of operations that should be lookups
+      machine:     declair which machine we use
+      name:        A name for this configuration (used in output files)
+    """
+
+    def __init__(self, seconds: int,  threads: int,  fillRand: int, fillThreads: int, trials: int, keyRange: int, lookupRatio: int, machine: str, name: str):
+        self.seconds = seconds
+        self.threads = threads
+        self.fillRand = fillRand
+        self.fillThreads = fillThreads
+        self.trials = trials
+        self.keyRange = keyRange
+        self.lookupRatio = lookupRatio
+        self.machine = machine
+        self.name = name + '_' + machine
+
+
+class LineCfg:
+    """
+    Description of how a line in a chart should appear
+      color:      The color of this line
+      linestyle:  The style for line between glyphs
+      marker: The style for the marker
+    """
+
+    def __init__(self, color: str,  linestyle: str, marker: str):
+        self.color = color
+        self.linestyle = linestyle
+        self.marker = marker
+
+
+class CurveCfg:
+    """
+    Description of one of the lines in a chart
+      exeCfg:  The executable that is used to generate the data for this line
+      dsCfg:   The rules for how to configure the exe's data structure
+      lineCfg: The description of how to draw this line
+      label:   The label to use for this curve
+    """
+
+    def __init__(self, exeCfg: ExeCfg, dsCfg: DsCfg, lineCfg: LineCfg, label: str):
+        self.exeCfg = exeCfg
+        self.dsCfg = dsCfg
+        self.lineCfg = lineCfg
+        self.label = label
+
+
+class ChartCfg:
+    """
+    All of the information needed to run experiments and produce a single chart
+      curves:   An array with benchmarks to run and rules for drawing their results
+      expCfg:   Rules for how to run the experiment
+      xLabel:   The x axis label
+      yLabel:   The y axis label
+      name:     The name for this chart (output filename)
+    """
+
+    def __init__(self, curves: List, expCfg: ExpCfg, xLabel: str, yLabel: str, name: str):
+        self.curves = curves
+        self.expCfg = expCfg
+        self.xLabel = xLabel
+        self.yLabel = yLabel
+        self.name = name
diff --git a/artifact/scripts/Util.py b/artifact/scripts/Util.py
new file mode 100644
index 0000000..1d692d3
--- /dev/null
+++ b/artifact/scripts/Util.py
@@ -0,0 +1,31 @@
+import os
+import errno
+
+from Types import ChartCfg, CurveCfg
+
+
+def makeDir(d: str):
+    """Create a directory with the name given by `d`"""
+    if not os.path.exists(os.path.dirname(d)):
+        # Races to create the folder are OK, others aren't
+        try:
+            os.makedirs(os.path.dirname(d))
+        except OSError as exc:
+            if exc.errno != errno.EEXIST:
+                raise
+
+
+def makeDataFileName(curve: CurveCfg, chart: ChartCfg, thread, trial):
+    """Create the name for a data file, given the curve, chart, thread count, and trial number"""
+    return r'%s_%s_%s_%i_%i' % (curve.exeCfg.name, curve.dsCfg.name, chart.expCfg.name, thread, trial)
+
+
+def makeDataFilePath(folder: str, name: str):
+    """Create the full path to a data file from a folder and a file name"""
+    return r'%s%s' % (folder, name)
+
+
+def makeExeName(exe_path: str, chart: ChartCfg, curve: CurveCfg, thread: int):
+    """Create the full command for executing a benchmark, from its path, the chart config, the curve config, and the thread count"""
+    return r'./%s -b %i -c %i -i %i -k %i -r %i -t %i -B %i -o %i -s %i -l %i -T %i -Q' % (
+        exe_path, curve.dsCfg.bucketSize, curve.dsCfg.chunkSize, chart.expCfg.seconds, chart.expCfg.keyRange, chart.expCfg.lookupRatio, thread, curve.dsCfg.resizeThresh, chart.expCfg.fillRand, curve.dsCfg.snapshotFreq, curve.dsCfg.maxLevels, chart.expCfg.fillThreads)
diff --git a/artifact/ubench/Makefile b/artifact/ubench/Makefile
new file mode 100644
index 0000000..ac09509
--- /dev/null
+++ b/artifact/ubench/Makefile
@@ -0,0 +1,15 @@
+.PHONY: all clean
+
+all:
+	$(MAKE) -C baseline
+	$(MAKE) -C xSTM
+	$(MAKE) -C handSTM
+	$(MAKE) -C STMCAS
+	$(MAKE) -C hybrid
+
+clean:
+	$(MAKE) -C baseline clean
+	$(MAKE) -C xSTM clean
+	$(MAKE) -C handSTM clean
+	$(MAKE) -C STMCAS clean
+	$(MAKE) -C hybrid clean
diff --git a/artifact/ubench/README.md b/artifact/ubench/README.md
new file mode 100644
index 0000000..26182eb
--- /dev/null
+++ b/artifact/ubench/README.md
@@ -0,0 +1,50 @@
+# Stress-Test Data Structure Microbenchmarks
+
+This folder contains stress-test microbenchmarks.
+
+In order to avoid anomalies, each data structure is its own executable file.
+All experiments share a common harness and a common set of command-line
+configuration options.
+
+This design means that each executable consists of a small C++ file, consisting
+only of `#include` statements and a few type aliases.
+
+Note that the build process has some big combinatorial factors.  For xSTM, we
+must link each data structure against each STM algorithm.  For
+handSTM/STMCAS/hybrid, we must compile for each policy implementation (and for
+PO versus PS).  The Makefiles generate temporary Makefiles for this purpose, in
+as consistent of a way as possible.
+
+## Parameters
+
+The microbenchmarks use the same command-line configuration object, with the
+following arguments:
+
+```text
+  -b: # buckets                       (default 128)
+  -i: seconds to run, or # ops/thread (default 5)
+  -h: print this message              (default false)
+  -k: key range                       (default 256)
+  -r: lookup ratio                    (default 34%)
+  -t: # threads                       (default 1)
+  -v: toggle verbose output           (default false)
+  -x: toggle 'i' parameter            (default true <timed mode>)
+  -B: resize threshold                (default 7)
+  -o: random prefill                  (default false)
+  -s: snapshot frequency              (default 33)
+  -l: max levels                      (default 32)
+  -Q: quiet mode                      (default false)
+  -T: #warm-up threads                (default 1)
+```
+
+Not all of these arguments are relevant to all data structures.  For example,
+the number of buckets and the resize threshold are only relevant to unordered
+maps.
+
+Of particular interest, the `-x` flag changes the meaning of the `-i` flag.  The
+default is that `-i` provides a number of seconds to run.  But when `-x` is
+used, then `-i` means the number of operations to run in each thread.
+
+Also, please note that `-o`, which randomizes the pre-filling of the data
+structure, is an essential flag for large unbalanced trees, but should not be
+used for lists.
diff --git a/artifact/ubench/STMCAS/Makefile b/artifact/ubench/STMCAS/Makefile
new file mode 100644
index 0000000..a6ba3a9
--- /dev/null
+++ b/artifact/ubench/STMCAS/Makefile
@@ -0,0 +1,44 @@
+# We have a collection of data structures and a collection of STMCAS algorithms.
+# For STMCAS, the way to get a data structure to use an STMCAS algorithm is to
+# instantiate the data structure with that STMCAS's descriptor type.  The only
+# way to do this without lots of extra .cc files is to pass include files and
+# `-D` flags at compile time.
+#
+# Our solution is to build in two steps.  The first step (this file) creates a
+# temporary Makefile (rules.mk) in the output directory.  It then recursively
+# calls `make` using the `build.mk` Makefile.  That, in turn, includes rules.mk.
+#
+# The net effect is that we have a nasty Makefile that has an explicit build
+# rule for each of the <data structure, STMCAS algorithm> combinations (no
+# wildcards!), but we don't have to maintain it.
+
+# rules and definitions common to this file and `build.mk`
+include common.mk
+
+# The default target just recurses, but it ensures the temporary Makefile is
+# built first.
+.DEFAULT_GOAL = all
+.PHONY: all clean
+all: $(ODIR)/rules.mk
+	$(MAKE) -f build.mk
+
+# clean by clobbering the build folder
+clean:
+	@echo Cleaning up...
+	@rm -rf $(ODIR)
+
+# Build the rules Makefile as a whole bunch of simple build rules
+$(ODIR)/rules.mk:
+	$(shell \
+		for d in $(DS); \
+		do \
+			for a in $(STMCAS_ALG); \
+			do \
+				for o in $(STMCAS_OREC); \
+				do \
+					echo $(ODIR)/$$d."$$a"_$$o.exe: $$d.cc ../../policies/STMCAS/$$a.h >> $@; \
+					echo '	@echo "[CXX]"' "$$d".cc '--\> $$@'  >> $@; \
+					echo '	@$$(CXX) $$< -o $$@ $$(CXXFLAGS)' -DSTMCAS_ALG="$$a"_t -DSTMCAS_OREC=orec_"$$o"_t -include ../../policies/STMCAS/$$a.h '$$(LDFLAGS)' >> $@; \
+				done; \
+			done; \
+        done)
\ No newline at end of file
diff --git a/artifact/ubench/STMCAS/build.mk b/artifact/ubench/STMCAS/build.mk
new file mode 100644
index 0000000..a7be926
--- /dev/null
+++ b/artifact/ubench/STMCAS/build.mk
@@ -0,0 +1,16 @@
+# rules and definitions common to this file and `Makefile`
+include common.mk
+
+# dependencies for the .o files built from .cc files in this folder
+-include $(DFILES) 
+
+# explicit build rules for each of the data structure/stmcas algorithm
+# combinations (i.e., $(EXEFILES))
+include $(ODIR)/rules.mk
+
+# The default target tries to build each combination.  The rules for each task
+# are in the temporary file `rules.mk`
+.DEFAULT_GOAL = all
+.PHONY = all
+.PRECIOUS: $(EXEFILES)
+all: $(EXEFILES)
diff --git a/artifact/ubench/STMCAS/common.mk b/artifact/ubench/STMCAS/common.mk
new file mode 100644
index 0000000..aea12e9
--- /dev/null
+++ b/artifact/ubench/STMCAS/common.mk
@@ -0,0 +1,22 @@
+# Data structures for the STMCAS work
+DS = dlist_omap                     dlist_opt_omap                   \
+     dlist_caumap                   dlist_opt_caumap                 \
+     slist_omap                                                      \
+     slist_opt_caumap                                                \
+     dlist_carumap                                                   \
+     ibst_omap                                                       \
+     rbtree_omap                                                     \
+     skiplist_cached_opt_omap         
+                                    
+
+# STMCAS libraries to evaluate: algorithm and orec policy
+STMCAS_ALG = stmcas
+STMCAS_OREC = po ps
+
+# Get the default build config
+include ../config.mk
+
+# NB: The intention is to build a binary for each combination of $(DS),
+# $(STMCAS_ALG), and $(STMCAS_OREC).
+EXEFILES = $(foreach a, $(STMCAS_ALG), $(foreach o, $(STMCAS_OREC), $(foreach d, $(DS), $(ODIR)/$d.${a}_$o.exe)))
+DFILES   = $(patsubst %.exe, %.d, $(EXEFILES))
diff --git a/artifact/ubench/STMCAS/dlist_carumap.cc b/artifact/ubench/STMCAS/dlist_carumap.cc
new file mode 100644
index 0000000..20de79e
--- /dev/null
+++ b/artifact/ubench/STMCAS/dlist_carumap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/dlist_carumap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = dlist_carumap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/dlist_caumap.cc b/artifact/ubench/STMCAS/dlist_caumap.cc
new file mode 100644
index 0000000..3aeac05
--- /dev/null
+++ b/artifact/ubench/STMCAS/dlist_caumap.cc
@@ -0,0 +1,12 @@
+#include "../../ds/STMCAS/dlist_omap.h"
+#include "../../ds/include/ca_umap_list_adapter.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = ca_umap_list_adapter_t<int, int, descriptor,
+                                   dlist_omap<int, int, descriptor, false>>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/dlist_omap.cc b/artifact/ubench/STMCAS/dlist_omap.cc
new file mode 100644
index 0000000..fa0a4ac
--- /dev/null
+++ b/artifact/ubench/STMCAS/dlist_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/dlist_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = dlist_omap<int, int, descriptor, false>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/dlist_opt_caumap.cc b/artifact/ubench/STMCAS/dlist_opt_caumap.cc
new file mode 100644
index 0000000..7072d03
--- /dev/null
+++ b/artifact/ubench/STMCAS/dlist_opt_caumap.cc
@@ -0,0 +1,12 @@
+#include "../../ds/STMCAS/dlist_omap.h"
+#include "../../ds/include/ca_umap_list_adapter.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = ca_umap_list_adapter_t<int, int, descriptor,
+                                   dlist_omap<int, int, descriptor, true>>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/dlist_opt_omap.cc b/artifact/ubench/STMCAS/dlist_opt_omap.cc
new file mode 100644
index 0000000..b7fe483
--- /dev/null
+++ b/artifact/ubench/STMCAS/dlist_opt_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/dlist_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = dlist_omap<int, int, descriptor, true>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/ibst_omap.cc b/artifact/ubench/STMCAS/ibst_omap.cc
new file mode 100644
index 0000000..9a54a2f
--- /dev/null
+++ b/artifact/ubench/STMCAS/ibst_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/ibst_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = ibst_omap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/rbtree_omap.cc b/artifact/ubench/STMCAS/rbtree_omap.cc
new file mode 100644
index 0000000..36ad10a
--- /dev/null
+++ b/artifact/ubench/STMCAS/rbtree_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/rbtree_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = rbtree_omap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/skiplist_cached_opt_omap.cc b/artifact/ubench/STMCAS/skiplist_cached_opt_omap.cc
new file mode 100644
index 0000000..70e98da
--- /dev/null
+++ b/artifact/ubench/STMCAS/skiplist_cached_opt_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/skiplist_cached_opt_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = skiplist_cached_opt_omap<int, int, descriptor, -1, -1>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/slist_omap.cc b/artifact/ubench/STMCAS/slist_omap.cc
new file mode 100644
index 0000000..b87ffe5
--- /dev/null
+++ b/artifact/ubench/STMCAS/slist_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/STMCAS/slist_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = slist_omap<int, int, descriptor, false>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/STMCAS/slist_opt_caumap.cc b/artifact/ubench/STMCAS/slist_opt_caumap.cc
new file mode 100644
index 0000000..abd9811
--- /dev/null
+++ b/artifact/ubench/STMCAS/slist_opt_caumap.cc
@@ -0,0 +1,12 @@
+#include "../../ds/STMCAS/slist_omap.h"
+#include "../../ds/include/ca_umap_list_adapter.h"
+#include "../include/experiment.h"
+
+using descriptor = STMCAS_ALG<STMCAS_OREC>; // defined by Makefile
+using map = ca_umap_list_adapter_t<int, int, descriptor,
+                                   slist_omap<int, int, descriptor, true>>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+STMCAS_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/baseline/Makefile b/artifact/ubench/baseline/Makefile
new file mode 100644
index 0000000..75ab078
--- /dev/null
+++ b/artifact/ubench/baseline/Makefile
@@ -0,0 +1,37 @@
+# Executables to build.  We assume each .exe is built from just one .cc file.
+TARGETS = ebst_ticket_omap lfskiplist_omap lazylist_omap	\
+          lazylist_caumap 									\
+		  ibst_pathcas_omap iavl_pathcas_omap
+
+# Get the default build config
+include ../config.mk
+
+# Names of all .exe files, .o files, and .d files
+EXEFILES  = $(patsubst %, $(ODIR)/%.exe, $(TARGETS))
+OFILES    = $(patsubst %, $(ODIR)/%.o, $(TARGETS))
+DFILES    = $(patsubst %, $(ODIR)/%.d, $(TARGETS))
+
+# dependencies for the .o files built from .cc files in this folder
+-include $(DFILES)
+
+# The default target builds all executables in a two step (compile, link)
+# process
+.DEFAULT_GOAL = all
+.PHONY: all clean
+.PRECIOUS: $(OFILES) $(EXEFILES)
+all: $(EXEFILES)
+
+# Build a .exe file from a .cc file
+$(ODIR)/%.exe: %.cc
+	@echo "[CXX] $< --> $@"
+	@$(CXX) $< -o $@ $(CXXFLAGS) $(LDFLAGS)
+
+# Link a .o file into a .exe
+$(ODIR)/%.exe: $(ODIR)/%.o
+	@echo "[LD] $^ --> $@"
+	@$(CXX) $^ -o $@ $(LDFLAGS)
+
+# clean by clobbering the build folder
+clean:
+	@echo Cleaning up...
+	@rm -rf $(ODIR)
diff --git a/artifact/ubench/baseline/ebst_ticket_omap.cc b/artifact/ubench/baseline/ebst_ticket_omap.cc
new file mode 100644
index 0000000..e1a5b01
--- /dev/null
+++ b/artifact/ubench/baseline/ebst_ticket_omap.cc
@@ -0,0 +1,11 @@
+#include "../../ds/baseline/ext_ticket_bst/ticket_impl.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment.h"
+
+using descriptor = thread_t;
+using map = ticket<int, int, descriptor, INT_MIN, INT_MAX, -1>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/baseline/iavl_pathcas_omap.cc b/artifact/ubench/baseline/iavl_pathcas_omap.cc
new file mode 100644
index 0000000..8661c00
--- /dev/null
+++ b/artifact/ubench/baseline/iavl_pathcas_omap.cc
@@ -0,0 +1,14 @@
+#include "../../ds/baseline/int_bst_pathcas/internal_kcas_avl.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment_pathcas.h"
+
+using descriptor = thread_t;
+const int numThreads = 96;
+const int minKey = 0;
+const long long maxKey = INT_MAX;
+using map = InternalKCAS<descriptor, int, int, numThreads, minKey, maxKey>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/baseline/ibst_pathcas_omap.cc b/artifact/ubench/baseline/ibst_pathcas_omap.cc
new file mode 100644
index 0000000..fe0e146
--- /dev/null
+++ b/artifact/ubench/baseline/ibst_pathcas_omap.cc
@@ -0,0 +1,14 @@
+#include "../../ds/baseline/int_bst_pathcas/internal_kcas_bst.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment_pathcas.h"
+
+using descriptor = thread_t;
+const int numThreads = 96;
+const int minKey = 0;
+const long long maxKey = INT_MAX;
+using map = InternalKCAS<descriptor, int, int, numThreads, minKey, maxKey>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/baseline/lazylist_caumap.cc b/artifact/ubench/baseline/lazylist_caumap.cc
new file mode 100644
index 0000000..6200cb5
--- /dev/null
+++ b/artifact/ubench/baseline/lazylist_caumap.cc
@@ -0,0 +1,13 @@
+#include "../../ds/baseline/lazylist_omap.h"
+#include "../../ds/include/ca_umap_list_adapter.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment.h"
+
+using descriptor = thread_t;
+using map = ca_umap_list_adapter_t<int, int, descriptor,
+                                   lazylist_omap<int, int, descriptor, -1, -1>>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/baseline/lazylist_omap.cc b/artifact/ubench/baseline/lazylist_omap.cc
new file mode 100644
index 0000000..2d798ee
--- /dev/null
+++ b/artifact/ubench/baseline/lazylist_omap.cc
@@ -0,0 +1,11 @@
+#include "../../ds/baseline/lazylist_omap.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment.h"
+
+using descriptor = thread_t;
+using map = lazylist_omap<int, int, descriptor, -1, -1>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/baseline/lfskiplist_omap.cc b/artifact/ubench/baseline/lfskiplist_omap.cc
new file mode 100644
index 0000000..87f1429
--- /dev/null
+++ b/artifact/ubench/baseline/lfskiplist_omap.cc
@@ -0,0 +1,11 @@
+#include "../../ds/baseline/lfskiplist_omap.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment.h"
+
+using descriptor = thread_t;
+using map = fraser_skiplist<unsigned long, void *, descriptor>;
+using K2VAL = I2V;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/config.mk b/artifact/ubench/config.mk
new file mode 100644
index 0000000..edc72ea
--- /dev/null
+++ b/artifact/ubench/config.mk
@@ -0,0 +1,12 @@
+# Default to 64 bits, but allow overriding on command line
+BITS     ?= 64
+
+# Give name to output folder, and ensure it is created before any compilation
+ODIR     := ./obj$(BITS)
+__odir   := $(shell mkdir -p $(ODIR))
+
+# Basic tool configuration for gcc/g++
+CXX       = clang++-15
+LD        = g++
+CXXFLAGS += -MMD -O3 -m$(BITS) -g -std=c++20 -g -fPIC -march=native -mrtm -Wall -Werror
+LDFLAGS  += -m$(BITS) -lpthread
diff --git a/artifact/ubench/handSTM/Makefile b/artifact/ubench/handSTM/Makefile
new file mode 100644
index 0000000..4466dcf
--- /dev/null
+++ b/artifact/ubench/handSTM/Makefile
@@ -0,0 +1,44 @@
+# We have a collection of data structures and a collection of handSTM algorithms.
+# For handSTM, the way to get a data structure to use an handSTM algorithm is to
+# instantiate the data structure with that handSTM's descriptor type.  The only
+# way to do this without lots of extra .cc files is to pass include files and
+# `-D` flags at compile time.
+#
+# Our solution is to build in two steps.  The first step (this file) creates a
+# temporary Makefile (rules.mk) in the output directory.  It then recursively
+# calls `make` using the `build.mk` Makefile.  That, in turn, includes rules.mk.
+#
+# The net effect is that we have a nasty Makefile that has an explicit build
+# rule for each of the <data structure, handSTM algorithm> combinations (no
+# wildcards!), but we don't have to maintain it.
+
+# rules and definitions common to this file and `build.mk`
+include common.mk
+
+# The default target just recurses, but it ensures the temporary Makefile is
+# built first.
+.DEFAULT_GOAL = all
+.PHONY: all clean
+all: $(ODIR)/rules.mk
+	$(MAKE) -f build.mk
+
+# clean by clobbering the build folder
+clean:
+	@echo Cleaning up...
+	@rm -rf $(ODIR)
+
+# Build the rules Makefile as a whole bunch of simple build rules
+$(ODIR)/rules.mk:
+	$(shell \
+		for d in $(DS); \
+		do \
+			for a in $(HANDSTM_ALG); \
+			do \
+			for o in $(HANDSTM_OREC); \
+				do \
+					echo $(ODIR)/$$d."$$a"_$$o.exe: $$d.cc ../../policies/handSTM/$$a.h >> $@; \
+					echo '	@echo "[CXX]"' "$$d".cc '--\> $$@'  >> $@; \
+					echo '	@$$(CXX) $$< -o $$@ $$(CXXFLAGS)' -DHANDSTM_ALG="$$a"_t -DHANDSTM_OREC=orec_"$$o"_t -include ../../policies/handSTM/$$a.h '$$(LDFLAGS)' >> $@; \
+				done; \
+			done; \
+        done)
\ No newline at end of file
diff --git a/artifact/ubench/handSTM/build.mk b/artifact/ubench/handSTM/build.mk
new file mode 100644
index 0000000..c84b76c
--- /dev/null
+++ b/artifact/ubench/handSTM/build.mk
@@ -0,0 +1,16 @@
+# rules and definitions common to this file and `Makefile`
+include common.mk
+
+# dependencies for the .o files built from .cc files in this folder
+-include $(DFILES) 
+
+# explicit build rules for each of the data structure/handSTM algorithm
+# combinations (i.e., $(EXEFILES))
+include $(ODIR)/rules.mk
+
+# The default target tries to build each combination.  The rules for each task
+# are in the temporary file `rules.mk`
+.DEFAULT_GOAL = all
+.PHONY = all
+.PRECIOUS: $(EXEFILES)
+all: $(EXEFILES)
\ No newline at end of file
diff --git a/artifact/ubench/handSTM/common.mk b/artifact/ubench/handSTM/common.mk
new file mode 100644
index 0000000..295c26d
--- /dev/null
+++ b/artifact/ubench/handSTM/common.mk
@@ -0,0 +1,15 @@
+# Data structures that we want to test
+DS = slist_omap skiplist_omap_bigtx       \
+     ibst_omap	rbtree_omap dlist_caumap dlist_carumap
+
+# handSTM libraries to evaluate: algorithm and orec policy
+HANDSTM_ALG  = eager_c1 eager_c2 lazy wb_c1 wb_c2
+HANDSTM_OREC = po ps
+
+# Get the default build config
+include ../config.mk
+
+# NB: The intention is to build a binary for each combination of $(DS),
+#     $(HANDSTM_ALG), and $(HANDSTM_OREC)
+EXEFILES = $(foreach a, $(HANDSTM_ALG), $(foreach o, $(HANDSTM_OREC), $(foreach d, $(DS), $(ODIR)/$d.${a}_$o.exe)))
+DFILES   = $(patsubst %.exe, %.d, $(EXEFILES))
diff --git a/artifact/ubench/handSTM/dlist_carumap.cc b/artifact/ubench/handSTM/dlist_carumap.cc
new file mode 100644
index 0000000..857c1ef
--- /dev/null
+++ b/artifact/ubench/handSTM/dlist_carumap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/handSTM/dlist_carumap.h"
+#include "../include/experiment.h"
+
+using descriptor = HANDSTM_ALG<HANDSTM_OREC>; // defined by Makefile
+using map = dlist_carumap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HANDSTM_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/handSTM/dlist_caumap.cc b/artifact/ubench/handSTM/dlist_caumap.cc
new file mode 100644
index 0000000..a39c3ed
--- /dev/null
+++ b/artifact/ubench/handSTM/dlist_caumap.cc
@@ -0,0 +1,12 @@
+#include "../../ds/handSTM/dlist_omap.h"
+#include "../../ds/include/ca_umap_list_adapter.h"
+#include "../include/experiment.h"
+
+using descriptor = HANDSTM_ALG<HANDSTM_OREC>; // defined by Makefile
+using map = ca_umap_list_adapter_t<int, int, descriptor,
+                                   dlist_omap<int, int, descriptor>>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HANDSTM_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/handSTM/ibst_omap.cc b/artifact/ubench/handSTM/ibst_omap.cc
new file mode 100644
index 0000000..7aa79fa
--- /dev/null
+++ b/artifact/ubench/handSTM/ibst_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/handSTM/ibst_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = HANDSTM_ALG<HANDSTM_OREC>; // defined by Makefile
+using map = ibst_omap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HANDSTM_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/handSTM/rbtree_omap.cc b/artifact/ubench/handSTM/rbtree_omap.cc
new file mode 100644
index 0000000..e9f2795
--- /dev/null
+++ b/artifact/ubench/handSTM/rbtree_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/handSTM/rbtree_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = HANDSTM_ALG<HANDSTM_OREC>; // defined by Makefile
+using map = rbtree_omap<int, int, descriptor, -1, -1>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HANDSTM_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/handSTM/skiplist_omap_bigtx.cc b/artifact/ubench/handSTM/skiplist_omap_bigtx.cc
new file mode 100644
index 0000000..8c78a5a
--- /dev/null
+++ b/artifact/ubench/handSTM/skiplist_omap_bigtx.cc
@@ -0,0 +1,10 @@
+#include "../../ds/handSTM/skiplist_omap_bigtx.h"
+#include "../include/experiment.h"
+
+using descriptor = HANDSTM_ALG<HANDSTM_OREC>; // defined by Makefile
+using map = skiplist_omap_bigtx<int, int, descriptor, -1, -1>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HANDSTM_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/handSTM/slist_omap.cc b/artifact/ubench/handSTM/slist_omap.cc
new file mode 100644
index 0000000..b878a3d
--- /dev/null
+++ b/artifact/ubench/handSTM/slist_omap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/handSTM/slist_omap.h"
+#include "../include/experiment.h"
+
+using descriptor = HANDSTM_ALG<HANDSTM_OREC>; // defined by Makefile
+using map = slist_omap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HANDSTM_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/hybrid/Makefile b/artifact/ubench/hybrid/Makefile
new file mode 100644
index 0000000..8658be4
--- /dev/null
+++ b/artifact/ubench/hybrid/Makefile
@@ -0,0 +1,44 @@
+# We have a collection of data structures and a collection of HYBRID algorithms.
+# For HYBRID, the way to get a data structure to use an HYBRID algorithm is to
+# instantiate the data structure with that HYBRID's descriptor type.  The only
+# way to do this without lots of extra .cc files is to pass include files and
+# `-D` flags at compile time.
+#
+# Our solution is to build in two steps.  The first step (this file) creates a
+# temporary Makefile (rules.mk) in the output directory.  It then recursively
+# calls `make` using the `build.mk` Makefile.  That, in turn, includes rules.mk.
+#
+# The net effect is that we have a nasty Makefile that has an explicit build
+# rule for each of the <data structure, HYBRID algorithm> combinations (no
+# wildcards!), but we don't have to maintain it.
+
+# rules and definitions common to this file and `build.mk`
+include common.mk
+
+# The default target just recurses, but it ensures the temporary Makefile is
+# built first.
+.DEFAULT_GOAL = all
+.PHONY: all clean
+all: $(ODIR)/rules.mk
+	$(MAKE) -f build.mk
+
+# clean by clobbering the build folder
+clean:
+	@echo Cleaning up...
+	@rm -rf $(ODIR)
+
+# Build the rules Makefile as a whole bunch of simple build rules
+$(ODIR)/rules.mk:
+	$(shell \
+		for d in $(DS); \
+		do \
+			for a in $(HYBRID_ALG); \
+			do \
+			for o in $(HYBRID_OREC); \
+				do \
+					echo $(ODIR)/$$d."$$a"_$$o.exe: $$d.cc ../../policies/hybrid/$$a.h >> $@; \
+					echo '	@echo "[CXX]"' "$$d".cc '--\> $$@'  >> $@; \
+					echo '	@$$(CXX) $$< -o $$@ $$(CXXFLAGS)' -DHYBRID_ALG="$$a"_t -DHYBRID_OREC=orec_"$$o"_t -include ../../policies/hybrid/$$a.h '$$(LDFLAGS)' >> $@; \
+				done; \
+			done; \
+        done)
\ No newline at end of file
diff --git a/artifact/ubench/hybrid/build.mk b/artifact/ubench/hybrid/build.mk
new file mode 100644
index 0000000..4b2cf30
--- /dev/null
+++ b/artifact/ubench/hybrid/build.mk
@@ -0,0 +1,16 @@
+# rules and definitions common to this file and `Makefile`
+include common.mk
+
+# dependencies for the .o files built from .cc files in this folder
+-include $(DFILES) 
+
+# explicit build rules for each of the data structure/hybrid algorithm
+# combinations (i.e., $(EXEFILES))
+include $(ODIR)/rules.mk
+
+# The default target tries to build each combination.  The rules for each task
+# are in the temporary file `rules.mk`
+.DEFAULT_GOAL = all
+.PHONY = all
+.PRECIOUS: $(EXEFILES)
+all: $(EXEFILES)
diff --git a/artifact/ubench/hybrid/common.mk b/artifact/ubench/hybrid/common.mk
new file mode 100644
index 0000000..d0e5bc0
--- /dev/null
+++ b/artifact/ubench/hybrid/common.mk
@@ -0,0 +1,14 @@
+# Data structures that we want to test
+DS = rbtree_omap_drop dlist_carumap
+
+# HYBRID libraries to evaluate: algorithm and orec policy
+HYBRID_ALG  = lazy wb_c1 wb_c2
+HYBRID_OREC = po ps
+
+# Get the default build config
+include ../config.mk
+
+# NB: The intention is to build a binary for each combination of $(DS),
+# $(HYBRID_ALG), and $(HYBRID_OREC)
+EXEFILES = $(foreach a, $(HYBRID_ALG), $(foreach o, $(HYBRID_OREC), $(foreach d, $(DS), $(ODIR)/$d.${a}_$o.exe)))
+DFILES   = $(patsubst %.exe, %.d, $(EXEFILES))
diff --git a/artifact/ubench/hybrid/dlist_carumap.cc b/artifact/ubench/hybrid/dlist_carumap.cc
new file mode 100644
index 0000000..e9bbb37
--- /dev/null
+++ b/artifact/ubench/hybrid/dlist_carumap.cc
@@ -0,0 +1,10 @@
+#include "../../ds/hybrid/dlist_carumap.h"
+#include "../include/experiment.h"
+
+using descriptor = HYBRID_ALG<HYBRID_OREC>; // defined by Makefile
+using map = dlist_carumap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HYBRID_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/hybrid/rbtree_omap_drop.cc b/artifact/ubench/hybrid/rbtree_omap_drop.cc
new file mode 100644
index 0000000..5b57dca
--- /dev/null
+++ b/artifact/ubench/hybrid/rbtree_omap_drop.cc
@@ -0,0 +1,10 @@
+#include "../../ds/hybrid/rbtree_omap_drop.h"
+#include "../include/experiment.h"
+
+using descriptor = HYBRID_ALG<HYBRID_OREC>; // defined by Makefile
+using map = rbtree_omap_drop<int, int, descriptor, -1, -1>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+HYBRID_GLOBALS_INITIALIZER;
diff --git a/artifact/ubench/include/bench_thread_context.h b/artifact/ubench/include/bench_thread_context.h
new file mode 100644
index 0000000..59c6038
--- /dev/null
+++ b/artifact/ubench/include/bench_thread_context.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <random>
+
+/// bench_thread_context_t has per-thread counters for the six intset benchmark
+/// events.  It also has a per-thread pseudorandom number generator.
+class bench_thread_context_t {
+  /// A large prime.  Use to seed Mersenne Twister because similar seeds lead to
+  /// similar sequences
+  const uint64_t LARGE_PRIME = 2654435761ULL;
+
+public:
+  enum EVENTS {
+    GET_T,
+    GET_F,
+    INS_T,
+    INS_F,
+    RMV_T,
+    RMV_F,
+    MOD_T,
+    MOD_F,
+    RNG_T,
+    RNG_F,
+    TX_T,
+    NUM
+  };                            // event types
+  std::mt19937 mt;              // Per-thread PRNG
+  int stats[EVENTS::NUM] = {0}; // Event counters
+
+  /// Construct a thread's context by creating its PRNG
+  bench_thread_context_t(int _id) : mt(_id * LARGE_PRIME) {}
+};
diff --git a/artifact/ubench/include/config.h b/artifact/ubench/include/config.h
new file mode 100644
index 0000000..d1c8fe3
--- /dev/null
+++ b/artifact/ubench/include/config.h
@@ -0,0 +1,138 @@
+#pragma once
+
+#include <iostream>
+#include <libgen.h>
+#include <unistd.h>
+
+/// config_t encapsulates all of the configuration behaviors that we require of
+/// our benchmarks.  It standardizes the format of command-line arguments,
+/// parsing of command-line arguments, and reporting of command-line arguments.
+/// Note that config_t is a superset of everything that our individual
+/// benchmarks need.
+///
+/// The purpose of config_t is just to reduce boilerplate code.  We aren't
+/// concerned about good object-oriented design, so everything is public.
+struct config_t {
+  size_t interval = 1;    // # seconds to run for, or # operations per thread
+  bool timed_mode = true; // is `interval` a time (true) or # transactions
+  size_t chunksize = 2;   // size of chunks, for chunked data structures
+  size_t iChunksize = 2;  // size of index chunks, for chunked data structures
+  size_t key_range = 256; // The range for keys in maps or for elements in sets
+  size_t nthreads = 1;    // Number of threads that should run the benchmark
+  size_t lookup = 34;     // % lookups.  inserts/removes evenly split the rest
+  size_t buckets = 1048576; // # buckets for closed addressing unordered maps
+  bool verbose = false;     // Print verbose output?
+  size_t resize_threshold = 65536; // resize threshold of the buckets
+  bool prefill_rand = false; // 0 to pre-fill in descending order, 1 for random
+  std::string program_name;  // The name of the executable
+  size_t snapshot_freq = 33; // The frequency with which to take snapshots
+  size_t max_levels = 32;    // Max number of index levels in skiplists
+  float merge_threshold = 1; // Merge threshold for chunked data structures
+  size_t wthreads = 1;       // Number of warm-up threads
+  bool quiet = false;        // Skip all output except the throughput?
+  size_t bulk = 1;           // maxium number of opeartions in one transaction
+  size_t orec_size = 65536;
+  /// Initialize the program's configuration by setting the strings that are not
+  /// dependent on the command-line
+  config_t() {}
+  config_t(int argc, char **argv) : program_name(basename(argv[0])) {
+    long opt;
+    while ((opt = getopt(argc, argv, "b:c:hi:l:k:or:s:t:vxB:QT:m:I:K:")) !=
+           -1) {
+      switch (opt) {
+      case 'b':
+        buckets = atoi(optarg);
+        break;
+      case 'c':
+        chunksize = atoi(optarg);
+        break;
+      case 'I':
+        iChunksize = atoi(optarg);
+        break;
+      case 'i':
+        interval = atoi(optarg);
+        break;
+      case 'k':
+        key_range = atoi(optarg);
+        break;
+      case 'o':
+        prefill_rand = true;
+        break;
+      case 'h':
+        usage();
+        break;
+      case 'l':
+        max_levels = atoi(optarg);
+        break;
+      case 'm':
+        merge_threshold = atof(optarg);
+        break;
+      case 'r':
+        lookup = atoi(optarg);
+        break;
+      case 's':
+        snapshot_freq = atoi(optarg);
+        break;
+      case 't':
+        nthreads = atoi(optarg);
+        break;
+      case 'v':
+        verbose = !verbose;
+        break;
+      case 'x':
+        timed_mode = !timed_mode;
+        break;
+      case 'B':
+        resize_threshold = atoi(optarg);
+        break;
+      case 'Q':
+        quiet = true;
+        break;
+      case 'T':
+        wthreads = atoi(optarg);
+        break;
+      case 'K':
+        bulk = atoi(optarg);
+        break;
+      default:
+        throw "Invalid configuration flag " + std::to_string(opt);
+      }
+    }
+  }
+
+  /// Usage() reports on the command-line options for the benchmark
+  void usage() {
+    std::cout
+        << program_name << "\n"
+        << "  -b: # buckets                       (default 128)\n"
+        << "  -c: (data) chunk size               (default 8)\n"
+        << "  -i: seconds to run, or # ops/thread (default 5)\n"
+        << "  -h: print this message              (default false)\n"
+        << "  -k: key range                       (default 256)\n"
+        << "  -r: lookup ratio                    (default 34%)\n"
+        << "  -t: # threads                       (default 1)\n"
+        << "  -v: toggle verbose output           (default false)\n"
+        << "  -x: toggle 'i' parameter            (default true <timed mode>)\n"
+        << "  -B: resize threshold                (default 7)\n"
+        << "  -o: random prefill                  (default false)\n"
+        << "  -s: snapshot frequency              (default 33)\n"
+        << "  -l: max levels                      (default 32)\n"
+        << "  -m: merge threshold                 (default 1.0)\n"
+        << "  -Q: quiet mode                      (default false)\n"
+        << "  -T: #warm-up threads                (default 1)\n"
+        << "  -I: (index) chunk size              (default 8)\n"
+        << "  -K: number of #ops per transaction  (default 1)\n";
+  }
+
+  /// Report the current values of the configuration object as a CSV line
+  void report() {
+    if (quiet)
+      return;
+    std::cout << program_name << ", (bcikrtxBoslmTIK), " << buckets << ", "
+              << chunksize << ", " << interval << ", " << key_range << ", "
+              << lookup << ", " << nthreads << ", " << timed_mode << ", "
+              << resize_threshold << ", " << prefill_rand << ", "
+              << snapshot_freq << ", " << max_levels << ", " << merge_threshold
+              << ", " << wthreads << ", " << iChunksize << ", " << bulk << ", ";
+  }
+};
diff --git a/artifact/ubench/include/experiment.h b/artifact/ubench/include/experiment.h
new file mode 100644
index 0000000..bdfb1d3
--- /dev/null
+++ b/artifact/ubench/include/experiment.h
@@ -0,0 +1,173 @@
+#pragma once
+
+#include <algorithm> // For std::shuffle
+#include <iostream>
+#include <random> // For std::mt19937
+#include <thread>
+#include <unistd.h>
+
+#include "bench_thread_context.h"
+#include "config.h"
+#include "manager.h"
+
+/// A conversion function from integer to integer.  It doesn't really do
+/// anything, but we need it for symmetry with I2V.
+struct I2I {
+  static int convert(int i) { return i; }
+};
+
+/// A conversion function from integer to void*.  We need this because the
+/// lfskiplist requires its value type to be void*, whereas everything else has
+/// generic value types.  This lets our harness use integers as the type
+/// everywhere except the lfskiplist, without requiring a special version of
+/// this file.
+struct I2V {
+  static void *convert(int i) { return (void *)(uintptr_t)i; }
+};
+
+/// Populate a map as if it were a set, with all of the even numbers in the
+/// range specified by the configuration serving as the elements.
+///
+/// @param SET            The type of the set to populate
+/// @param THREAD_CONTEXT The per-thread context used by SET
+/// @param K2V            A converter from int keys to whatever value SET uses
+///
+/// @param me  The operation descriptor of the calling thread
+/// @param set The set into which the inserts should happen
+/// @param cfg The configuration object
+template <class SET, class THREAD_CONTEXT, class K2V>
+void fill_even(SET *set, config_t *cfg) {
+  using namespace std;
+  using namespace std::chrono;
+  auto task = [&](int start, int end, int tid) {
+    auto me = new THREAD_CONTEXT();
+    std::vector<int> v((end - start + 1) / 2 + 1);
+    for (size_t i = 0; i < v.size(); i++)
+      v[i] = i * 2 + start;
+    // We may prefill in random order or in decreasing order.  Random is better
+    // for unbalanced trees.  Decreasing is better for lists.
+    if (cfg->prefill_rand) {
+      std::mt19937 prng(0);
+      std::shuffle(v.begin(), v.end(), prng);
+      for (auto k : v) {
+        me->op_begin();
+        auto val = K2V::convert(k);
+        set->insert(me, k, val);
+        me->op_end();
+      }
+    } else {
+      for (auto k : v) {
+        me->op_begin();
+        auto val = K2V::convert(k);
+        set->insert(me, k, val);
+        me->op_end();
+      }
+    }
+  };
+
+  // split key_range to T pieces
+  // Launch the threads... this thread won't run the tests
+  vector<thread> threads;
+  int start = 0;
+  for (size_t i = 0; i < cfg->wthreads; i++) {
+    int end = start + cfg->key_range / cfg->wthreads;
+    threads.emplace_back(task, start, end, i);
+    start = end + 2;
+  }
+
+  for (size_t i = 0; i < cfg->wthreads; i++) {
+    threads[i].join();
+  }
+}
+/// Run integer set tests on map data structures as if they were sets.  This
+/// requires set_t to have insert, lookup, and remove operations.
+///
+/// @param SET            The type of the set to populate
+/// @param THREAD_CONTEXT The per-thread context used by SET
+/// @param K2V            A converter from int keys to whatever value SET uses
+///
+/// @param set The set into which the inserts should happen
+/// @param cfg The configuration object
+template <class SET, class THREAD_CONTEXT, typename K2V>
+void intmap_test(SET *set, config_t *cfg) {
+  using namespace std;
+  using namespace std::chrono;
+  using event_types = bench_thread_context_t::EVENTS;
+
+  // A manager for coordinating threads and collecting stats
+  experiment_manager_t exp;
+
+  // This is the benchmark task that each thread will perform
+  auto task = [&](int id) {
+    // Create thread benchmark and ds-specific contexts
+    bench_thread_context_t self(id);
+    auto me = new THREAD_CONTEXT();
+
+    // set up a PRNG for the thread
+    using std::uniform_int_distribution;
+    uniform_int_distribution<size_t> key_dist(0, cfg->key_range - 1);
+    uniform_int_distribution<size_t> action_dist(0, 100);
+
+    // A lambda that does one random operation
+    auto tx = [&]() {
+      // Generate a random key and action for the transaction
+      int key;
+      size_t action;
+      key = key_dist(self.mt) % cfg->key_range;
+      action = action_dist(self.mt);
+
+      // Split non-lookups evenly between insert and remove
+      size_t insert = (100 - cfg->lookup) / 2;
+
+      // Each operation is protected by safe reclamation
+      me->op_begin();
+      if (action <= cfg->lookup) {
+        auto val = K2V::convert(key);
+        if (set->get(me, key, val))
+          ++self.stats[event_types::GET_T];
+        else
+          ++self.stats[event_types::GET_F];
+      } else if (action < cfg->lookup + insert) {
+        auto val = K2V::convert(key);
+        if (set->insert(me, key, val))
+          ++self.stats[event_types::INS_T];
+        else
+          ++self.stats[event_types::INS_F];
+      } else {
+        if (set->remove(me, key))
+          ++self.stats[event_types::RMV_T];
+        else
+          ++self.stats[event_types::RMV_F];
+      }
+      me->op_end();
+    };
+
+    // Synchronize threads and get time
+    exp.sync_before_launch(id, cfg);
+
+    // Run the experiment
+    if (cfg->timed_mode)
+      while (exp.running.load())
+        tx();
+    else
+      for (size_t i = 0; i < cfg->interval; ++i)
+        tx();
+
+    // arrive at the last barrier, then get the timer again
+    exp.sync_after_launch(id, cfg);
+
+    // merge stats into global
+    for (size_t i = 0; i < event_types::NUM; ++i)
+      exp.stats[i].fetch_add(self.stats[i]);
+  };
+
+  // Launch the threads... this thread won't run the tests
+  vector<thread> threads;
+  for (size_t i = 0; i < cfg->nthreads; i++)
+    threads.emplace_back(task, i);
+  for (size_t i = 0; i < cfg->nthreads; i++)
+    threads[i].join();
+
+  // Report statistics from the experiment
+  exp.report(cfg);
+}
diff --git a/artifact/ubench/include/experiment_pathcas.h b/artifact/ubench/include/experiment_pathcas.h
new file mode 100644
index 0000000..cbac1b1
--- /dev/null
+++ b/artifact/ubench/include/experiment_pathcas.h
@@ -0,0 +1,173 @@
+#pragma once
+
+#include <algorithm> // For std::shuffle
+#include <iostream>
+#include <random> // For std::mt19937
+#include <thread>
+#include <unistd.h>
+
+#include "bench_thread_context.h"
+#include "config.h"
+#include "manager.h"
+
+/// A conversion function from integer to integer.  It doesn't really do
+/// anything, but we need it for symmetry with I2V.
+struct I2I {
+  static int convert(int i) { return i; }
+};
+
+/// A conversion function from integer to void*.  We need this because the
+/// lfskiplist requires its value type to be void*, whereas everything else has
+/// generic value types.  This lets our harness use integers as the type
+/// everywhere except the lfskiplist, without requiring a special version of
+/// this file.
+struct I2V {
+  static void *convert(int i) { return (void *)(uintptr_t)i; }
+};
+
+/// Populate a map as if it were a set, with all of the even numbers in the
+/// range specified by the configuration serving as the elements.
+///
+/// @param SET            The type of the set to populate
+/// @param THREAD_CONTEXT The per-thread context used by SET
+/// @param K2V            A converter from int keys to whatever value SET uses
+///
+/// @param me  The operation descriptor of the calling thread
+/// @param set The set into which the inserts should happen
+/// @param cfg The configuration object
+template <class SET, class THREAD_CONTEXT, class K2V>
+void fill_even(SET *set, config_t *cfg) {
+  using namespace std;
+  using namespace std::chrono;
+  auto task = [&](int start, int end, int tid) {
+    auto me = new THREAD_CONTEXT(tid);
+    std::vector<int> v((end - start + 1) / 2 + 1);
+    for (size_t i = 0; i < v.size(); i++)
+      v[i] = i * 2 + start;
+    // We may prefill in random order or in decreasing order.  Random is better
+    // for unbalanced trees.  Decreasing is better for lists.
+    if (cfg->prefill_rand) {
+      std::mt19937 prng(0);
+      std::shuffle(v.begin(), v.end(), prng);
+      for (auto k : v) {
+        me->op_begin();
+        auto val = K2V::convert(k);
+        set->insert(me, k, val);
+        me->op_end();
+      }
+    } else {
+      for (auto k : v) {
+        me->op_begin();
+        auto val = K2V::convert(k);
+        set->insert(me, k, val);
+        me->op_end();
+      }
+    }
+  };
+
+  // split key_range to T pieces
+  // Launch the threads... this thread won't run the tests
+  vector<thread> threads;
+  int start = 0;
+  for (size_t i = 0; i < cfg->wthreads; i++) {
+    int end = start + cfg->key_range / cfg->wthreads;
+    threads.emplace_back(task, start, end, i);
+    start = end + 2;
+  }
+
+  for (size_t i = 0; i < cfg->wthreads; i++) {
+    threads[i].join();
+  }
+}
+/// Run integer set tests on map data structures as if they were sets.  This
+/// requires set_t to have insert, lookup, and remove operations.
+///
+/// @param SET            The type of the set to populate
+/// @param THREAD_CONTEXT The per-thread context used by SET
+/// @param K2V            A converter from int keys to whatever value SET uses
+///
+/// @param set The set into which the inserts should happen
+/// @param cfg The configuration object
+template <class SET, class THREAD_CONTEXT, typename K2V>
+void intmap_test(SET *set, config_t *cfg) {
+  using namespace std;
+  using namespace std::chrono;
+  using event_types = bench_thread_context_t::EVENTS;
+
+  // A manager for coordinating threads and collecting stats
+  experiment_manager_t exp;
+
+  // This is the benchmark task that each thread will perform
+  auto task = [&](int id) {
+    // Create thread benchmark and ds-specific contexts
+    bench_thread_context_t self(id);
+    auto me = new THREAD_CONTEXT(id);
+
+    // set up a PRNG for the thread
+    using std::uniform_int_distribution;
+    uniform_int_distribution<size_t> key_dist(0, cfg->key_range - 1);
+    uniform_int_distribution<size_t> action_dist(0, 100);
+
+    // A lambda that does one random operation
+    auto tx = [&]() {
+      // Generate a random key and action for the transaction
+      int key;
+      size_t action;
+      key = key_dist(self.mt) % cfg->key_range;
+      action = action_dist(self.mt);
+
+      // Split non-lookups evenly between insert and remove
+      size_t insert = (100 - cfg->lookup) / 2;
+
+      // Each operation is protected by safe reclamation
+      me->op_begin();
+      if (action <= cfg->lookup) {
+        auto val = K2V::convert(key);
+        if (set->get(me, key, val))
+          ++self.stats[event_types::GET_T];
+        else
+          ++self.stats[event_types::GET_F];
+      } else if (action < cfg->lookup + insert) {
+        auto val = K2V::convert(key);
+        if (set->insert(me, key, val))
+          ++self.stats[event_types::INS_T];
+        else
+          ++self.stats[event_types::INS_F];
+      } else {
+        if (set->remove(me, key))
+          ++self.stats[event_types::RMV_T];
+        else
+          ++self.stats[event_types::RMV_F];
+      }
+      me->op_end();
+    };
+
+    // Synchronize threads and get time
+    exp.sync_before_launch(id, cfg);
+
+    // Run the experiment
+    if (cfg->timed_mode)
+      while (exp.running.load())
+        tx();
+    else
+      for (size_t i = 0; i < cfg->interval; ++i)
+        tx();
+
+    // arrive at the last barrier, then get the timer again
+    exp.sync_after_launch(id, cfg);
+
+    // merge stats into global
+    for (size_t i = 0; i < event_types::NUM; ++i)
+      exp.stats[i].fetch_add(self.stats[i]);
+  };
+
+  // Launch the threads... this thread won't run the tests
+  vector<thread> threads;
+  for (size_t i = 0; i < cfg->nthreads; i++)
+    threads.emplace_back(task, i);
+  for (size_t i = 0; i < cfg->nthreads; i++)
+    threads[i].join();
+
+  // Report statistics from the experiment
+  exp.report(cfg);
+}
diff --git a/artifact/ubench/include/launch.h b/artifact/ubench/include/launch.h
new file mode 100644
index 0000000..76ce805
--- /dev/null
+++ b/artifact/ubench/include/launch.h
@@ -0,0 +1,17 @@
+#pragma once
+
+/// A standardized main() function for use with all of our integer map
+/// benchmarks
+int main(int argc, char **argv) {
+  // Parse and print the command-line options.  If it throws, terminate
+  config_t *cfg = new config_t(argc, argv);
+  cfg->report();
+
+  // Create a bst and fill it
+  auto me = new descriptor();
+  auto ds = new map(me, cfg);
+  fill_even<map, descriptor, K2VAL>(ds, cfg);
+
+  // Launch the test
+  intmap_test<map, descriptor, K2VAL>(ds, cfg);
+}
diff --git a/artifact/ubench/include/launch_multi.h b/artifact/ubench/include/launch_multi.h
new file mode 100644
index 0000000..b036a20
--- /dev/null
+++ b/artifact/ubench/include/launch_multi.h
@@ -0,0 +1,18 @@
+#pragma once
+
+/// A standardized main() function for use with all of our integer map
+/// benchmarks
+int main(int argc, char **argv) {
+  // Parse and print the command-line options.  If it throws, terminate
+  config_t *cfg = new config_t(argc, argv);
+  cfg->report();
+
+  // Create a bst and fill it
+  auto me = new descriptor();
+  map *ds1 = new map(me, cfg);
+  map *ds2 = new map(me, cfg);
+  fill_even<map, descriptor, K2VAL>(ds1, ds2, cfg);
+
+  // Launch the test
+  // intmap_test<map, descriptor, K2VAL>(ds, cfg);
+}
diff --git a/artifact/ubench/include/manager.h b/artifact/ubench/include/manager.h
new file mode 100644
index 0000000..31006eb
--- /dev/null
+++ b/artifact/ubench/include/manager.h
@@ -0,0 +1,146 @@
+#pragma once
+
+#include <atomic>
+#include <chrono>
+#include <signal.h>
+
+#include "bench_thread_context.h"
+
+/// experiment_manager keeps track of all data that we measure during an
+/// experiment, and any data we use to manage the execution of the experiment
+struct experiment_manager_t {
+  using event_types = bench_thread_context_t::EVENTS;
+  using time_point = std::chrono::high_resolution_clock::time_point;
+
+  std::atomic<uint32_t> barriers[3]; // barriers for coordinating threads
+  time_point start_time;             // start time of the experiment
+  time_point end_time;               // end time of the experiment
+  std::atomic<uint64_t> stats[event_types::NUM]; // global stat counters
+  std::atomic<bool> running; // flag for stopping timed experiments
+
+  /// Static reference to singleton instance of this struct... we need this for
+  /// the experiment timer
+  static experiment_manager_t *instance;
+
+  /// Construct the global context by initializing the barriers and zeroing the
+  /// counters
+  experiment_manager_t() : running(true) {
+    experiment_manager_t::instance = this;
+    for (int i = 0; i < 3; ++i)
+      barriers[i] = 0;
+    for (int i = 0; i < event_types::NUM; ++i)
+      stats[i] = 0;
+  }
+
+  /// Report the most essential configuration settings and statistics that we
+  /// counted as a comma separated line
+  void report_csv() {
+    using namespace std::chrono;
+
+    // Report throughput, execution time, and operations completed
+    uint64_t ops = count_operations();
+    auto dur = duration_cast<duration<double>>(end_time - start_time).count();
+    std::cout << "(tput, time, ops), " << ops / dur << ", " << dur << ", "
+              << ops << ", ";
+  }
+
+  /// Only report throughput, nothing else
+  void report_tput_only() {
+    using namespace std::chrono;
+
+    // Report throughput, execution time, and operations completed
+    uint64_t ops = count_operations();
+    auto dur = duration_cast<duration<double>>(end_time - start_time).count();
+    std::cout << ops / dur;
+  }
+
+  /// Report all of the statistics that we counted, in a human-readable form
+  void report_verbose() {
+    using namespace std::chrono;
+
+    // Report throughput, execution time, and operations completed
+    uint64_t ops = count_operations();
+    auto dur = duration_cast<duration<double>>(end_time - start_time).count();
+    std::cout << "Throughput: " << ops / dur << "\n"
+              << "Execution Time: " << dur << "\n"
+              << "Operations: " << ops << "\n";
+
+    std::string titles[] = {"lookup hit",  "lookup miss", "insert hit",
+                            "insert miss", "remove hit",  "remove miss",
+                            "modify hit",  "modify miss", "range hit",
+                            "range miss",  "transactions"};
+    for (size_t i = 0; i < event_types::NUM; ++i)
+      std::cout << "  " << titles[i] << " : " << stats[i] << "\n";
+  }
+
+  /// Report all of the statistics that we counted
+  void report(config_t *cfg) {
+    if (cfg->quiet) {
+      report_tput_only();
+      return;
+    }
+    report_csv();
+    std::cout << "\n";
+    if (cfg->verbose) {
+      report_verbose();
+    }
+  }
+
+  /// Before launching experiments, use this to ensure that the threads start at
+  /// the same time.  This uses two barriers internally, with a timer read
+  /// between the first and second, so that we don't read the time while threads
+  /// are still being configured, but we do ensure we read it before any work is
+  /// done
+  void sync_before_launch(size_t id, config_t *cfg) {
+    // Barrier #1: ensure everyone is initialized
+    barrier(0, id, cfg);
+    // Now get the time
+    if (id == 0) {
+      start_time = std::chrono::high_resolution_clock::now();
+      if (cfg->timed_mode) {
+        signal(SIGALRM, experiment_manager_t::stop_running);
+        alarm(cfg->interval);
+      }
+    }
+    // Barrier #2: ensure we have the start time before work begins
+    barrier(1, id, cfg);
+  }
+
+  /// Method used to stop test execution.
+  static void stop_running(int signal) {
+    experiment_manager_t::instance->running.store(false);
+  }
+
+  /// After threads finish the experiments, use this to have them all wait
+  /// before getting the stop time.
+  void sync_after_launch(size_t id, config_t *cfg) {
+    // wait for all threads
+    barrier(2, id, cfg);
+
+    // now get the time
+    if (id == 0)
+      end_time = std::chrono::high_resolution_clock::now();
+  }
+
+  /// Arrive at one of the barriers.
+  void barrier(size_t which, size_t id, config_t *cfg) {
+    barriers[which]++;
+    while (barriers[which] < cfg->nthreads) {
+    }
+  }
+
+  /// Get a count of the number of operations that were completed.
+  /// Note: this is brittle, be sure to update this when introducing a new
+  /// operation type.
+  uint64_t count_operations() {
+    return stats[event_types::GET_T] + stats[event_types::GET_F] +
+           stats[event_types::INS_T] + stats[event_types::INS_F] +
+           stats[event_types::RMV_T] + stats[event_types::RMV_F];
+  }
+};
+
+// Provide a definition to go along with the declaration of the singleton
+// instance.
+//
+// NB: this would break if we included this file in more than one .cc file
+experiment_manager_t *experiment_manager_t::instance;
diff --git a/artifact/ubench/xSTM/Makefile b/artifact/ubench/xSTM/Makefile
new file mode 100644
index 0000000..28cd04e
--- /dev/null
+++ b/artifact/ubench/xSTM/Makefile
@@ -0,0 +1,40 @@
+# We have a collection of data structures and a collection of xSTM algorithms.
+# For xSTM, the way to get a data structure to use a xSTM algorithm is to link
+# the .o of the data structure with the .o of the xSTM algorithm.  This would
+# be easy if `make` supported multiple wildcards, but it doesn't
+#
+# Our solution is to build in two steps.  The first step (this file) creates a
+# temporary Makefile (rules.mk) in the output directory.  It then recursively
+# calls `make` using the `build.mk` Makefile.  That, in turn, includes rules.mk.
+#
+# The net effect is that we have a nasty Makefile that has an explicit build
+# rule for each of the <data structure, xSTM algorithm> combinations (no
+# wildcards!), but we don't have to maintain it.
+
+# rules and definitions common to this file and `build.mk`
+include common.mk
+
+# The default target just recurses, but it ensures the temporary Makefile is
+# built first.
+.DEFAULT_GOAL = all
+.PHONY: all clean
+all: $(ODIR)/rules.mk
+	$(MAKE) -f build.mk
+
+# clean by clobbering the build folder
+clean:
+	@echo Cleaning up...
+	@rm -rf $(ODIR)
+
+# Build the rules Makefile as a whole bunch of simple link rules
+$(ODIR)/rules.mk:
+	$(shell \
+		for d in $(DS); \
+		do \
+			for t in $(TM_LIB_NAMES); \
+			do \
+				echo $(ODIR)/$$d.$$t.exe: $(ODIR)/$$d.opt.bc $(TM_ROOT)/libs/$(ODIR)/$$t.bc >> $@; \
+				echo '	@echo "[LD]" $$@' >> $@; \
+				echo '	@$$(LD) $$^ -o $$@ $$(LDFLAGS)' >> $@; \
+			done; \
+        done)
\ No newline at end of file
diff --git a/artifact/ubench/xSTM/build.mk b/artifact/ubench/xSTM/build.mk
new file mode 100644
index 0000000..2370cb4
--- /dev/null
+++ b/artifact/ubench/xSTM/build.mk
@@ -0,0 +1,26 @@
+# rules and definitions common to this file and `Makefile`
+include common.mk 
+
+# dependencies for the .o files built from .cc files in this folder
+-include $(DFILES) 
+
+# explicit link rules for each of the data structure/xSTM algorithm
+# combinations (i.e., $(EXEFILES))
+include $(ODIR)/rules.mk
+
+# The default target tries to link each combination.  The rules for each link
+# task are in the temporary file `rules.mk`.
+.DEFAULT_GOAL = all
+.PHONY: all
+.PRECIOUS: $(OFILES) $(EXEFILES)
+all: $(EXEFILES)
+
+# The link steps each require a .o file built from a .cc in this folder.  This
+# rule builds them
+$(ODIR)/%.bc: %.cc
+	@echo "[CXX] $< --> $@"
+	@$(CXX) $< -o $@ -c $(CXXFLAGS)
+
+$(ODIR)/%.opt.bc: $(ODIR)/%.bc
+	@echo "[OPT] $< --> $@"
+	@$(OPT) $(OPTFLAGS) $< -o $@
\ No newline at end of file
diff --git a/artifact/ubench/xSTM/common.mk b/artifact/ubench/xSTM/common.mk
new file mode 100644
index 0000000..8af0f7d
--- /dev/null
+++ b/artifact/ubench/xSTM/common.mk
@@ -0,0 +1,24 @@
+# Data structures that we want to test
+DS = ibst_omap
+
+# Path to the xSTM plugin and STM libraries
+TM_ROOT = ../../policies/xSTM
+
+# TM libraries to evaluate
+include $(TM_ROOT)/libs/tm_names.mk # defines TM_LIB_NAMES
+
+# Get the default build config, and the special config for our LLVM plugin
+include ../config.mk
+include $(TM_ROOT)/common/xSTM.mk # NB: this updates CXXFLAGS!
+
+CXXFLAGS += -emit-llvm -fno-slp-vectorize -fno-vectorize
+LD        = $(CXX)
+OPT      = opt-15
+
+# NB: The intention is to build a binary for each combination of $(DS) and
+#     $(TM_LIB_NAMES).  We get there by building one .o per $(DS), and linking
+#     each of those .o files with each entry in $(TM_LIB_NAMES)
+EXEFILES = $(foreach l, $(TM_LIB_NAMES), $(foreach t, $(DS), $(ODIR)/$t.$l.exe))
+OFILES   = $(patsubst %, $(ODIR)/%.bc, $(DS))
+OPTFILES = $(patsubst %, $(ODIR)/%.opt.bc, $(DS))
+DFILES   = $(patsubst %, $(ODIR)/%.d, $(DS))
\ No newline at end of file
diff --git a/artifact/ubench/xSTM/ibst_omap.cc b/artifact/ubench/xSTM/ibst_omap.cc
new file mode 100644
index 0000000..c4d9c83
--- /dev/null
+++ b/artifact/ubench/xSTM/ibst_omap.cc
@@ -0,0 +1,11 @@
+#include "../../ds/xSTM/ibst_omap.h"
+#include "../../policies/baseline/thread.h"
+#include "../include/experiment.h"
+
+using descriptor = thread_t;
+using map = ibst_omap<int, int, descriptor>;
+using K2VAL = I2I;
+
+#include "../include/launch.h"
+
+THREAD_T_GLOBALS_INITIALIZER;