diff --git a/.github/scripts/bootstrap-nvhpc.sh b/.github/scripts/bootstrap-nvhpc.sh
new file mode 100755
index 00000000..c099df98
--- /dev/null
+++ b/.github/scripts/bootstrap-nvhpc.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -euo pipefail
+set -x
+
+nvhpc_version=21.9
+
+# Use Atlas' nvhpc installation script
+wget https://raw.githubusercontent.com/ecmwf/atlas/develop/tools/install-nvhpc.sh
+chmod +x install-nvhpc.sh
+
+# Install nvhpc
+./install-nvhpc.sh --version $nvhpc_version --prefix "${GITHUB_WORKSPACE}/nvhpc-install" --tmpdir "${RUNNER_TEMP}"
+
+exit 0
diff --git a/.github/scripts/install-hdf5.sh b/.github/scripts/install-hdf5.sh
new file mode 100755
index 00000000..5d714f76
--- /dev/null
+++ b/.github/scripts/install-hdf5.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -euo pipefail
+set -x
+
+hdf5_version=1.10.8
+
+# Choose hdf5
+version_parts=($(echo ${hdf5_version} | tr "." "\n"))
+major_version=${version_parts[0]}.${version_parts[1]}
+temporary_files="${RUNNER_TEMP}/hdf5"
+url=https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-${major_version}/hdf5-${hdf5_version}/src/hdf5-${hdf5_version}.tar.gz
+
+# Download hdf5
+mkdir -p "${temporary_files}"
+curl --location "$url" | tar zx -C "${temporary_files}"
+
+# Build hdf5
+cd "${temporary_files}/hdf5-${hdf5_version}"
+prefix="${GITHUB_WORKSPACE}/hdf5-install"
+mkdir -p "${prefix}"
+./configure --prefix="${prefix}" --enable-shared --enable-fortran --enable-hl
+make -j
+make install
+
+exit 0
+
diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh
index 8f965c00..d722a884 100755
--- a/.github/scripts/run-targets.sh
+++ b/.github/scripts/run-targets.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -euo pipefail
+set -eu
 set -x
 
 # These targets don't have an MPI-parallel driver routine
@@ -8,11 +8,23 @@ non_mpi_targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-c)
 # These targets currently cause issues and are therefore not tested
 skipped_targets=(dwarf-cloudsc-gpu-claw)
 
+if [[ "$arch" == *"nvhpc"* ]]
+then
+  # Skip GPU targets if built with nvhpc (don't have GPU in test runner)
+  skipped_targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-omp-scc-hoist)
+
+  # Skip GPU targets from Loki if built with nvhpc (don't have GPU in test runner)
+  skipped_targets+=(dwarf-cloudsc-loki-claw-gpu dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist)
+
+  # Skip C target if built with nvhpc, segfaults for unknown reasons
+  skipped_targets+=(dwarf-cloudsc-c)
+fi
+
 exit_code=0
 cd build
 
 #
-# Run each of the binaries with default NPROMA and validate exit codes
+# Run each of the binaries with a safe NPROMA value and validate exit codes
 #
 
 for target in $(ls bin)
@@ -25,11 +37,12 @@ do
 
   if [[ "$mpi_flag" == "--with-mpi" && ! " ${non_mpi_targets[*]} " =~ " $target " ]]
   then
-    # Two ranks with one thread each, default NPROMA
-    mpirun -np 2 bin/$target 1 100
+    # Two ranks with one thread each, safe NPROMA
+    # NB: Use oversubscribe to run, even if we end up on a single core agent
+    mpirun --oversubscribe -np 2 bin/$target 1 100 64
   else
-    # Single thread, default NPROMA
-    bin/$target 1 100
+    # Single thread, safe NPROMA
+    bin/$target 1 100 64
   fi
   exit_code=$((exit_code + $?))
 done
diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh
index aced76b9..18402bf8 100755
--- a/.github/scripts/verify-targets.sh
+++ b/.github/scripts/verify-targets.sh
@@ -10,9 +10,32 @@ exit_code=0
 
 targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran)
 
+if [[ "$io_library_flag" == "--with-serialbox" ]]
+then
+  targets+=(dwarf-cloudsc-c)
+fi
+
 if [[ "$gpu_flag" == "--with-gpu" ]]
 then
-  targets+=(dwarf-cloudsc-gpu-claw dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist)
+  targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-omp-scc-hoist)
+  if [[ "$claw_flag" == "--with-claw" ]]
+  then
+    targets+=(dwarf-cloudsc-gpu-claw)
+  fi
+fi
+
+if [[ "$loki_flag" == "--with-loki" ]]
+then
+  targets+=(dwarf-cloudsc-loki-idem dwarf-cloudsc-loki-sca)
+  targets+=(dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist)
+  if [[ "$prec_flag" != "--single-precision" ]]
+  then
+    targets+=(dwarf-cloudsc-loki-c)
+  fi
+  if [[ "$claw_flag" == "--with-claw" ]]
+  then
+    targets+=(dwarf-cloudsc-loki-claw-cpu dwarf-cloudsc-loki-claw-gpu)
+  fi
 fi
 
 #
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9d421b44..62ef2e91 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -4,7 +4,7 @@ name: build
 on:
   # Triggers the workflow on push events
   push:
-    branches: [ '**' ]
+    branches: [ 'main', 'develop' ]
     tags-ignore: [ '**' ]
 
   # Triggers the workflow on pull request events
@@ -17,55 +17,98 @@ on:
 jobs:
   # This workflow contains a single job called "build"
   build:
-    name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }}
-    
+    name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }}
+
     # The type of runner that the job will run on
-    runs-on: ubuntu-latest
-  
+    runs-on: ubuntu-20.04
+
     strategy:
       fail-fast: false  # false: try to complete all jobs
-      
+
       matrix:
-      
+
         arch:
-          - github/ubuntu/gnu/9.3.0
-          
-        io_library_flag: ['']  # Switch between Serialbox and HDF5
-        # FIXME: serialbox builds are currently disabled until a compatible serialbox version is available to Github actions
-        
+          - github/ubuntu/gnu/9.4.0
+
+        io_library_flag: ['', '--with-serialbox']  # Switch between Serialbox and HDF5
+
         mpi_flag: ['', '--with-mpi']  # Enable MPI-parallel build
 
         prec_flag: ['', '--single-precision']  # Switch single/double precision
 
         gpu_flag: ['', '--with-gpu']  # GPU-variants enabled
 
+        loki_flag: ['', '--with-loki']  # Loki source-to-source translation enabled
+
+        claw_flag: ['']  # Flag to enable CLAW-generated variants
+
+        include:
+          # Add nvhpc build configurations with serialbox and HDF5
+          - arch: github/ubuntu/nvhpc/21.9
+            io_library_flag: ''
+            mpi_flag: ''
+            prec_flag: ''
+            gpu_flag: '--with-gpu'
+          - arch: github/ubuntu/nvhpc/21.9
+            io_library_flag: '--with-serialbox'
+            mpi_flag: ''
+            prec_flag: ''
+            gpu_flag: '--with-gpu'
+
     # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
-      # Sets-up environment and installs required packages
-      - name: Environment setup
+      # Installs required packages
+      - name: Package installation
         run: |
-          [[ "${{ matrix.mpi_flag }}" == "--with-mpi" ]] && sudo apt install libopenmpi-dev || true
-          [[ "${{ matrix.io_library_flag }}" != "--with-serialbox" ]] && sudo apt install libhdf5-dev || true
-      
+          sudo apt-get install libc-dev-bin gfortran-9 gcc-9 g++-9
+
+      # Install MPI
+      - name: Install MPI via Apt
+        if: contains( matrix.mpi_flag, 'with-mpi' )
+        run: sudo apt-get install libopenmpi-dev
+
+      # Install Compiler
+      - name: Install nvhpc
+        if: contains( matrix.arch, 'nvhpc' )
+        run: .github/scripts/bootstrap-nvhpc.sh
+
+      # Install HDF5
+      - name: Install HDF5 via Apt
+        if: ${{ ! contains( matrix.arch, 'nvhpc' ) && ! contains( matrix.io_library_flag, 'with-serialbox' ) }}
+        run: sudo apt-get install libhdf5-dev
+
+      - name: Install HDF5 from source
+        if: contains( matrix.arch, 'nvhpc' ) && ! contains( matrix.io_library_flag, 'with-serialbox' )
+        run: source arch/${{ matrix.arch }}/env.sh && .github/scripts/install-hdf5.sh
+
+      # Install Boost
+      - name: Install Boost libraries
+        if: contains( matrix.io_library_flag, 'with-serialbox' )
+        run: sudo apt install libboost-filesystem-dev libboost-system-dev
+
       # Check-out dependencies as part of the bundle creation
       - name: Bundle create
         run: ./cloudsc-bundle create
-        
+
       # Build the targets
       - name: Bundle build
         run: |
           ./cloudsc-bundle build --retry-verbose \
           --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \
-          ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }}
+          ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \
+          ${{ matrix.claw_flag}} ${{ matrix.loki_flag }}
 
       # Verify targets exist
       - name: Verify targets
         env:
           io_library_flag: ${{ matrix.io_library_flag }}
+          prec_flag: ${{ matrix.prec_flag }}
           gpu_flag: ${{ matrix.gpu_flag }}
+          loki_flag: ${{ matrix.loki_flag }}
+          claw_flag: ${{ matrix.claw_flag }}
         run: .github/scripts/verify-targets.sh
 
       # Run double-precision targets
@@ -73,5 +116,6 @@ jobs:
       - name: Run targets
         env:
           mpi_flag: ${{ matrix.mpi_flag }}
+          arch: ${{ matrix.arch }}
         if: ${{ matrix.prec_flag == '' }}
         run: .github/scripts/run-targets.sh
diff --git a/.gitignore b/.gitignore
index fa571f8d..d3ed9a6b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,19 @@
 *~
+.vscode
 ecbundle
 ecbundle/*
 build/*
 source/*
 serialbox2hdf5/venv
 serialbox2hdf5/serialbox
+benchmark/venv
+benchmark/rundir_*
+benchmark/jube-debug.log
+venv/
+__pycache__/
+.DS_Store
+.dace.conf
+.gt_cache/
+.idea/
+.python-version
+*.egg-info/
diff --git a/AUTHORS.md b/AUTHORS.md
index c66027c6..5a12fb9b 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -9,13 +9,16 @@
 - M. Hamrud (ECMWF)
 - M. Koehler (ECMWF)
 - M. Lange (ECMWF)
+- L. Lucido (Atos)
 - O. Marsden (ECMWF)
 - G. Mengaldo (ECMWF)
 - G. Mozdzynski (ECMWF)
+- Z. Piotrowski (ECMWF)
 - B. Reuter (ECMWF)
 - D. Salmond (ECMWF)
 - M. Tiedtke (ECMWF)
 - A. Tompkins (ECMWF)
+- S. Ubbiali (ETH Zuerich)
 - F. Vana (ECMWF)
 
 If you have contributed to this project,
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d9820fbc..9eafd77e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,10 @@
 # nor does it submit to any jurisdiction.
 
 # define minimum version of cmake required
-cmake_minimum_required( VERSION 3.12 FATAL_ERROR )
+cmake_minimum_required( VERSION 3.17 FATAL_ERROR )
+
+# Disable warnings about setting `ENABLE_ACC` variable for ecbuild_add_option
+cmake_policy( SET CMP0077 NEW )
 
 find_package( ecbuild REQUIRED )
 
@@ -15,69 +18,76 @@ find_package( ecbuild REQUIRED )
 project( dwarf-p-cloudsc LANGUAGES C Fortran )
 
 include( cmake/compat.cmake )
+if( CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
+  ecbuild_add_fortran_flags("-ffree-line-length-none")
+  if( CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER_EQUAL "10.0")
+    ecbuild_add_fortran_flags("-fallow-argument-mismatch")
+  endif()
+  if( CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "11.0")
+    set( ENABLE_ACC OFF )
+  endif()
+endif()
+
+include( cloudsc_compile_options )
 
 ### OpenACC
 if( NOT DEFINED ENABLE_ACC OR ENABLE_ACC )
-    find_package( OpenACC COMPONENTS Fortran C )
-    # Ensure targets are available
-    if( OpenACC_Fortran_FOUND AND NOT TARGET OpenACC::OpenACC_Fortran )
-        ecbuild_critical( "OpenACC::OpenACC_Fortran should have been defined as a CMake target" )
-    endif()
-    if( OpenACC_C_FOUND AND NOT TARGET OpenACC::OpenACC_C )
-        ecbuild_critical( "OpenACC::OpenACC_C should have been defined as a CMake target" )
+    # Incredibly inconvenient: FindOpenACC does _not_ set OpenACC_FOUND, only
+    # the language-specific components OpenACC_Fortran_FOUND and OpenACC_C_FOUND.
+    # This means, even internally CMake considers OpenACC as not found.
+    # (See eg get_property(... GLOBAL PROPERTY PACKAGES_NOT_FOUND))
+    # Therefore, we search for OpenACC, set OpenACC_FOUND ourselves according to
+    # the result, and then, trigger a second find_package via ecbuild_add_option.
+    # This then conveniently takes the previously set OpenACC_FOUND into account
+    # and rectifies CMake's internal bookkeeping in the process.
+    find_package( OpenACC )
+    if( OpenACC_Fortran_FOUND AND OpenACC_C_FOUND )
+        set( OpenACC_FOUND ON )
     endif()
 endif()
 ecbuild_add_option( FEATURE ACC
     DESCRIPTION "OpenACC" DEFAULT ON
-    CONDITION OpenACC_C_FOUND OR OpenACC_Fortran_FOUND )
+    REQUIRED_PACKAGES "OpenACC" )
+
+include(CheckLanguage)
+check_language( CUDA )
+ecbuild_add_option( FEATURE CUDA
+    DESCRIPTION "CUDA" DEFAULT OFF
+    CONDITION CMAKE_CUDA_COMPILER )
+if( HAVE_CUDA )
+    enable_language( CUDA )
+endif()
 
 ### OpenMP
-if( NOT DEFINED ENABLE_OMP OR ENABLE_OMP )
-    find_package( OpenMP COMPONENTS Fortran C )
-    # Ensure targets are available
-    if( OpenMP_Fortran_FOUND AND NOT TARGET OpenMP::OpenMP_Fortran )
-        ecbuild_critical( "OpenMP::OpenMP_Fortran should have been defined as a CMake target" )
-    endif()
-    if( OpenMP_C_FOUND AND NOT TARGET OpenMP::OpenMP_C )
-        ecbuild_critical( "OpenMP::OpenMP_C should have been defined as a CMake target" )
-    endif()
-endif()
 ecbuild_add_option( FEATURE OMP
     DESCRIPTION "OpenMP" DEFAULT ON
-    CONDITION OpenMP_C_FOUND AND OpenMP_Fortran_FOUND )
+    REQUIRED_PACKAGES "OpenMP COMPONENTS Fortran C" )
+include(features/OMP)
 
 ### MPI
-if( NOT DEFINED ENABLE_MPI OR ENABLE_MPI )
-    ecbuild_find_mpi( COMPONENTS Fortran )
-endif()
-if( MPI_Fortran_FOUND AND ENABLE_MPI )
-    # Ensure targets are available
-    if( MPI_Fortran_FOUND AND NOT TARGET MPI::MPI_Fortran )
-        ecbuild_critical( "MPI::MPI_Fortran should have been defined as a CMake target" )
-    endif()
-    ecbuild_add_option( FEATURE MPI
-        DESCRIPTION "MPI" DEFAULT OFF
-        CONDITION MPI_Fortran_FOUND )
-    list(APPEND CLOUDSC_DEFINITIONS HAVE_MPI ${MPI_DEFINITIONS})
+ecbuild_add_option( FEATURE MPI
+    DESCRIPTION "MPI" DEFAULT OFF
+    REQUIRED_PACKAGES "MPI COMPONENTS Fortran" )
+if( HAVE_MPI )
+    list(APPEND CLOUDSC_DEFINITIONS HAVE_MPI )
 endif()
 
 ### HDF5
 ecbuild_add_option( FEATURE HDF5
     DESCRIPTION "Use HDF5 to read input and reference data"
     REQUIRED_PACKAGES "HDF5 COMPONENTS Fortran"
-    CONDITION NOT DEFINED ENABLE_HDF5 OR ENABLE_HDF5
     DEFAULT ON )
-if( HDF5_FOUND )
-    list(APPEND CLOUDSC_DEFINITIONS HAVE_HDF5 ${HDF5_Fortran_DEFINITIONS})
+if( HAVE_HDF5 )
+    list(APPEND CLOUDSC_DEFINITIONS HAVE_HDF5 )
 endif()
 
 # Add Serialbox utility package for platform-agnostic file I/O
 ecbuild_add_option( FEATURE SERIALBOX
     DESCRIPTION "Use Serialbox to read input and reference data"
     REQUIRED_PACKAGES "Serialbox"
-    CONDITION (NOT DEFINED ENABLE_SERIALBOX OR ENABLE_SERIALBOX) AND NOT HDF5_FOUND
+    CONDITION NOT HAVE_HDF5
     DEFAULT OFF )
-if( SERIALBOX_FOUND )
+if( HAVE_SERIALBOX )
     list(APPEND CLOUDSC_DEFINITIONS HAVE_SERIALBOX)
 endif()
 
@@ -95,11 +105,10 @@ endif()
 add_subdirectory(src)
 
 # documentation
-find_package( Latex )
 ecbuild_add_option(FEATURE DOCS
                    DESCRIPTION "Documentation"
-                   DEFAULT OFF
-                   CONDITION Latex_FOUND)
+                   REQUIRED_PACKAGES "Latex"
+                   DEFAULT OFF)
 
 
 if(HAVE_DOCS)
diff --git a/README.md b/README.md
index f062de29..5f2cf11c 100644
--- a/README.md
+++ b/README.md
@@ -34,10 +34,12 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int)
 - **dwarf-cloudsc-gpu-kernels**: GPU-enabled version of the CLOUDSC dwarf
   that uses OpenACC and relies on the `!$acc kernels` directive to offload
   the computational kernel.
-- **dwarf-cloudsc-gpu-claw**: GPU-enabled and optimized version of CLOUDSC
-  that is based on an auto-generated version of CLOUDSC based on the CLAW
-  tool. The kernel in this demonstrator has been further optimized with
-  gang-level loop blocking to demonstrate potential performance gains.
+- **dwarf-cloudsc-gpu-claw** (deprecated!): GPU-enabled and optimized version of
+  CLOUDSC that is based on an auto-generated version of CLOUDSC based on the CLAW
+  tool. The kernel in this demonstrator has been further optimized with gang-level
+  loop blocking to demonstrate potential performance gains. This variant is defunct
+  on current Nvidia GPUs and therefore deactivated by default, requiring explicit
+  `--with-claw` flag to build.
 - **dwarf-cloudsc-gpu-scc**: GPU-enabled and optimized version of
   CLOUDSC that utilises the native blocked IFS memory layout via a
   "single-column coalesced" (SCC) loop layout. Here the outer NPROMA
@@ -52,15 +54,21 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int)
   The block array arguments are fully dimensioned though, and
   multi-dimensional temporaries have been declared explicitly at the
   driver level.
+- **dwarf-cloudsc-gpu-scc-cuf**: GPU-enabled and optimized version of
+  CLOUDSC that uses the SCC loop layout in combination with CUDA-Fortran
+  (CUF) to explicitly allocate temporary arrays in device memory and
+  move parameter structures to constant memory. To enable this variant,
+  a suitable CUDA installation is required and the `--with-cuda` flag
+  needs to be passed at the build stage.
 
 ## Download and Installation
 
 The code is written in Fortran 2003 and it has been tested using the various compilers, including:
 
-    GCC 7.3, 9.3
+    GCC 7.3, 9.3, 11.2
     Cray 8.7.7
-    NVHPC 20.9
-    Intel
+    NVHPC 20.9, 22.1
+    Intel (classic)
 
 This application does not need MPI nor BLAS libraries for performance. Just a compiler that understands
 OpenMP directives. Fortran must be at least level F2003.
@@ -140,7 +148,7 @@ The default build configuration relies on HDF5 input and reference data for
 dwarf-cloudsc-fortran as well as GPU and Loki versions. The original
 dwarf-P-cloudMicrophysics-IFSScheme always uses raw Fortran binary format.
 
-**Please note:** The HDF55 installation needs to have the f03 interfaces installed.
+**Please note:** The HDF55 installation needs to have the f03 interfaces installed (default with HDF5 1.10+).
 
 As an alternative to HDF5, the [Serialbox](https://github.com/GridTools/serialbox)
 library can be used to load input and reference data. This, however, requires
@@ -161,6 +169,21 @@ input or reference data have to be done via manual conversion. A small
 Python script for this with usage instructions can be found in the
 [serialbox2hdf5](serialbox2hdf5/README.md) directory.
 
+### Building on ECMWF's Atos BullSequana XH2000
+
+To build on ECMWF's Atos BullSequana XH2000 supercomputer, run the following commands:
+
+```sh
+./cloudsc-bundle create
+./cloudsc-bundle build --arch arch/ecmwf/hpc2020/compiler/version [--single-precision] [--with-mpi]
+```
+
+Currently available `compiler/version` selections are:
+
+* `gnu/9.3.0` and `gnu/11.2.0`
+* `intel/2021.4.0`
+* `nvhpc/22.1` (use with `--with-gpu` on AC's GPU partition)
+
 ### A64FX version of CLOUDSC
 
 Preliminary results for CLOUDSC have been generated for A64FX CPUs on
@@ -186,6 +209,8 @@ cd build
 ./bin/dwarf-cloudsc-c 4 16384 32   # The standalone C version
 ```
 
+### Running on ECMWF's Atos BullSequana XH2000
+
 On the Atos system, a high-watermark run on a single socket can be performed as follows:
 
 ```sh
@@ -193,33 +218,52 @@ export OMP_NUM_THREADS=64
 OMP_PLACES="{$(seq -s '},{' 0 $(($OMP_NUM_THREADS-1)) )}" srun -q np --ntasks=1 --hint=nomultithread --cpus-per-task=$OMP_NUM_THREADS ./bin/dwarf-cloudsc-fortran $OMP_NUM_THREADS 163840 32
 ```
 
-For a build with the Intel 2021.1.1 compiler, performance of ~74 GF is achieved.
+For a double-precision build with the GNU 11.2.0 compiler, performance of
+~73 GF/s is achieved.
 
-## Loki transformations for CLOUDSC
+To run the GPU variant on AC, which includes some GPU nodes, allocate
+an interactive session on a GPU node and run the binary as usual:
+
+```sh
+srun -N1 -q ng -p gpu --gres=gpu:4 --mem 200G --pty /bin/bash
+bin/dwarf-cloudsc-gpu-scc-hoist 1 262144 128
+```
 
-Loki is an in-house developed source-to-source translation tool that
-allows us to create bespoke transformations for the IFS to target and
-experiment with emerging HPC architectures and programming models. We
-use the CLOUDSC dwarf as a demonstrator for targeted transformation
-capabilities of physics and grid point computations kernels, including
-conversion to C and GPU via downstream tools like CLAW.
+For a double-precision build with NVHPC 22.1, performance of ~340 GF/s
+on a single GPU is achieved.
 
-To use the Loki demonstrators, Loki and CLAW need to be installed as
-described in the
-[Loki install instructions](https://git.ecmwf.int/projects/RDX/repos/loki/browse/INSTALL.md).
-*Please note that the in-house "volta" machine needs some manual workarounds for this atm.*
+A multi-GPU run requires MPI (build with `--with-mpi`) with a dedicated MPI
+task for each GPU and (at the moment) manually assigning CUDA devices to each
+rank, as Slurm is not yet fully configured for the GPU partition.
 
-Once Loki and CLAW are installed and activated via `source loki-activate`,
-the following build flags enable the demonstrator build targets:
+To use four GPUs on one node, allocate the relevant resources
+```sh
+salloc -N 1 --tasks-per-node 4 -q ng -p gpu --gres=gpu:4 --mem 200G
+```
+
+and then run the binary like this:
 
 ```sh
-# For general use on workstations with GNU
-# Please note that OpenACC needs to be disable with GNU,
-# since CLAW-generated code currently does not comply with GNU.
-./cloudsc-bundle build --clean --with-loki --loki-frontend=fp --arch=./arch/ecmwf/leap42/gnu/7.3.0
+srun bash -c "CUDA_VISIBLE_DEVICES=\$SLURM_LOCALID bin/dwarf-cloudsc-gpu-scc-hoist 1 \$((\$SLURM_NPROCS*262144)) 128"
+```
+
+In principle, the same should work for multi-node execution (`-N 2`, `-N 4` etc.) once interconnect issues are resolved.
+
+## Loki transformations for CLOUDSC
 
-# For GPU exploration on volta
-./cloudsc-bundle build --clean [--with-gpu]--with-loki --loki-frontend=fp --arch=./arch/ecmwf/volta/nvhpc/20.9
+[Loki](https://github.com/ecmwf-ifs/loki) is an in-house developed
+source-to-source translation tool that allows us to create bespoke
+transformations for the IFS to target and experiment with emerging HPC
+architectures and programming models. We use the CLOUDSC dwarf as a demonstrator
+for targeted transformation capabilities of physics and grid point computations
+kernels, including conversion to C and GPU, directly or via downstream tools
+like CLAW.
+
+The following build flags enable the demonstrator build targets on the
+ECMWF Atos HPC facility's GPU partition:
+
+```sh
+./cloudsc-bundle build --clean [--with-gpu] --with-loki --loki-frontend=fp --arch=./arch/ecmwf/hpc2020/nvhpc/22.1
 ```
 
 The following Loki modes are included in the dwarf, each with a bespoke demonstrator build:
@@ -231,18 +275,21 @@ The following Loki modes are included in the dwarf, each with a bespoke demonstr
 - **cloudsc-loki-sca**: Pure single-column mode that strips all horizontal
   vector loops from the kernel and introduces an outer "column-loop"
   at the driver level.
-- **cloudsc-loki-claw-cpu**: Same as SCA, but also adds the necessary CLAW
-  annotations. The resulting cloudsc.claw.F90 file is then processed
-  by CLAW to re-insert vector loops for optimal CPU execution.
-- **cloudsc-loki-claw-gpu**: Creates the same CLAW-ready kernel file, but
-  triggers the GPU-specific optimizations in the CLAW compiler to
-  insert OpenACC-offload instructions in the driver and an OpenACC
-  parallel loop inside the kernel for each block. This needs to be run
-  with large block sizes (eg. NPROMA=1024-8192).
+- **cloudsc-loki-claw-cpu** (deprecated): Same as SCA, but also adds the
+  necessary CLAW annotations. The resulting cloudsc.claw.F90 file is then
+  processed by CLAW to re-insert vector loops for optimal CPU execution.
+- **cloudsc-loki-claw-gpu** (deprecated): Creates the same CLAW-ready kernel
+  file, but triggers the GPU-specific optimizations in the CLAW compiler to insert
+  OpenACC-offload instructions in the driver and an OpenACC parallel loop inside
+  the kernel for each block. This needs to be run with large block sizes (eg.
+  NPROMA=1024-8192).
 - **cloudsc-loki-c**: A prototype C transpilation pipeline that converts
   the kernel to C and calls it via iso_c_bindings interfaces from the
   driver.
 
+To enable the deprecated and, on GPU, defunct CLAW variants, the build-flag
+`--with-claw` needs to be specified explicitly.
+
 ### A note on frontends
 
 Loki currently supports three frontends to parse the Fortran source code:
@@ -260,3 +307,10 @@ means we require the `.xmod` module description files for utility
 routines in `src/common` for processing the CLOUDSC source files with
 the OMNI frontend. These are stored in the source under
 `src/cloudsc_loki/xmod`.
+
+## Benchmarking
+
+To automate parameter space sweeps and ease testing across various platforms, a
+[JUBE](https://www.fz-juelich.de/jsc/jube) benchmark definition is included in
+the directory `benchmark`. See the included [README](benchmark/README.md) for
+further details and usage instructions.
diff --git a/VERSION b/VERSION
index 26aaba0e..f0bb29e7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.2.0
+1.3.0
diff --git a/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh b/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh
new file mode 100644
index 00000000..67caadee
--- /dev/null
+++ b/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh
@@ -0,0 +1,44 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload all modules to be certain
+module_unload gcc
+module_unload openmpi
+module_unload hpcx-openmpi
+module_unload boost
+module_unload hdf5
+module_unload cmake
+module_unload python3
+module_unload java
+
+# Load modules
+module_load prgenv/gnu
+module_load gcc/11.2.0
+module_load hpcx-openmpi/2.10.0
+module_load boost/1.71.0
+module_load hdf5/1.10.6
+module_load cmake/3.20.2
+module_load python3/3.8.8-01
+module_load java/11.0.6
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake b/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake
new file mode 100644
index 00000000..10709fc3
--- /dev/null
+++ b/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake
@@ -0,0 +1,41 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+# set( OpenMP_Fortran_FLAGS       "-fopenmp -foffload=nvptx-none" )
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+# set( OpenACC_Fortran_FLAGS "-fopenacc -foffload=nvptx-none" )
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffpe-trap=invalid,zero,overflow")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fstack-arrays")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fconvert=big-endian")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fbacktrace")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-second-underscore")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffast-math")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-unsafe-math-optimizations")
+
+# This is dangerous! But GNU 10+ complains about argument mismatch for MPI routines
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fallow-argument-mismatch")
diff --git a/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh b/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh
new file mode 100644
index 00000000..788bfa1a
--- /dev/null
+++ b/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh
@@ -0,0 +1,43 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload all modules to be certain
+module_unload gcc
+module_unload openmpi
+module_unload boost
+module_unload hdf5
+module_unload cmake
+module_unload python3
+module_unload java
+
+# Load modules
+module_load prgenv/gnu
+module_load gcc/9.3.0
+module_load openmpi/4.1.1.1
+module_load boost/1.71.0
+module_load hdf5/1.10.6
+module_load cmake/3.20.2
+module_load python3/3.8.8-01
+module_load java/11.0.6
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/ecmwf/hpc2020/gnu/9.3.0/toolchain.cmake b/arch/ecmwf/hpc2020/gnu/9.3.0/toolchain.cmake
new file mode 100644
index 00000000..a2282abb
--- /dev/null
+++ b/arch/ecmwf/hpc2020/gnu/9.3.0/toolchain.cmake
@@ -0,0 +1,26 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffpe-trap=invalid,zero,overflow")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fstack-arrays")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fconvert=big-endian")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fbacktrace")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-second-underscore")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffast-math")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-unsafe-math-optimizations")
diff --git a/arch/ecmwf/atos-tems/intel/2021.1.1/env.sh b/arch/ecmwf/hpc2020/intel/2021.4.0/env.sh
similarity index 53%
rename from arch/ecmwf/atos-tems/intel/2021.1.1/env.sh
rename to arch/ecmwf/hpc2020/intel/2021.4.0/env.sh
index 91f2c1de..6da32d30 100644
--- a/arch/ecmwf/atos-tems/intel/2021.1.1/env.sh
+++ b/arch/ecmwf/hpc2020/intel/2021.4.0/env.sh
@@ -12,54 +12,36 @@
 { tracing_=${-//[^x]/}; set +x; } 2>/dev/null
 
 module_load() {
-  if [ "$2" == "ECBUILD_CONFIGURE_ONLY" ]; then
-    if [ -n "${ECBUILD_CONFIGURE}" ]; then
-      echo "+ module load $1"
-      module load $1
-    else
-      echo " WARNING: Module $1 not loaded (only during configuration)"
-    fi
-  else
-    echo "+ module load $1"
-    module load $1
-  fi
+  echo "+ module load $1"
+  module load $1
 }
 module_unload() {
   echo "+ module unload $1"
   module unload $1
 }
 
-# Unload to be certain
+# Unload all modules to be certain
+module_unload intel
+module_unload openmpi
+module_unload hpcx-openmpi
+module_unload boost
+module_unload hdf5
 module_unload cmake
-module_unload python
 module_unload python3
-module_unload boost
-module_unload ecbuild
-module_unload ifs-support
-module_unload cdt
-module_unload boost
-module_unload PrgEnv-cray
-module_unload PrgEnv-intel
-module_unload intel
-module_unload gcc
-
-export EC_CRAYPE_INTEGRATION=off
+module_unload java
 
-prgenvswitchto intel
 # Load modules
-module_unload intel
-module_load intel/2021.1.1
-module_load python3
-module_load boost
-module_load ninja
-module_load cmake
-module_load boost
+module_load prgenv/intel
+module_load intel/2021.4.0
+module_load hpcx-openmpi/2.10.0
+module_load boost/1.71.0
+module_load hdf5/1.10.6
+module_load cmake/3.20.2
+module_load python3/3.8.8-01
+module_load java/11.0.6
 
 set -x
 
-# This is used to download binary test data
-export http_proxy="http://slb-proxy-web.ecmwf.int:3333/"
-
 # Restore tracing to stored setting
 { if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
 
diff --git a/arch/ecmwf/hpc2020/intel/2021.4.0/toolchain.cmake b/arch/ecmwf/hpc2020/intel/2021.4.0/toolchain.cmake
new file mode 120000
index 00000000..c4b5868e
--- /dev/null
+++ b/arch/ecmwf/hpc2020/intel/2021.4.0/toolchain.cmake
@@ -0,0 +1 @@
+../../../../toolchains/ecmwf-hpc2020-intel.cmake
\ No newline at end of file
diff --git a/arch/ecmwf/hpc2020/nvhpc/22.1/env.sh b/arch/ecmwf/hpc2020/nvhpc/22.1/env.sh
new file mode 100644
index 00000000..b8c9385b
--- /dev/null
+++ b/arch/ecmwf/hpc2020/nvhpc/22.1/env.sh
@@ -0,0 +1,52 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload all modules to be certain
+module_unload nvidia
+module_unload intel-mpi
+module_unload openmpi
+module_unload hpcx-openmpi
+module_unload boost
+module_unload hdf5
+module_unload cmake
+module_unload python3
+module_unload java
+
+# Load modules
+module_load prgenv/nvidia
+module_load nvidia/22.1
+module_load hpcx-openmpi/2.10.0
+# module_load boost/1.71.0
+module_load hdf5/1.10.6
+module_load cmake/3.20.2
+module_load python3/3.8.8-01
+module_load java/11.0.6
+
+# Increase stack size to maximum
+ulimit -S -s unlimited
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/ecmwf/hpc2020/nvhpc/22.1/toolchain.cmake b/arch/ecmwf/hpc2020/nvhpc/22.1/toolchain.cmake
new file mode 120000
index 00000000..7b14d221
--- /dev/null
+++ b/arch/ecmwf/hpc2020/nvhpc/22.1/toolchain.cmake
@@ -0,0 +1 @@
+../../../../toolchains/ecmwf-hpc2020-nvhpc.cmake
\ No newline at end of file
diff --git a/arch/ecmwf/volta/nvhpc/20.9/env.sh b/arch/ecmwf/volta/nvhpc/20.9/env.sh
index 34cca6a6..e8231d59 100644
--- a/arch/ecmwf/volta/nvhpc/20.9/env.sh
+++ b/arch/ecmwf/volta/nvhpc/20.9/env.sh
@@ -25,12 +25,14 @@ module_unload boost
 module_unload cmake
 module_unload intel
 module_unload pgi
+module_unload nvhpc
+module_unload nvhpc-nompi
 module_unload gnu
 
 # Load modules
 module use /opt/nvidia/hpc_sdk/modulefiles
 # module load nvhpc
-module load nvhpc-nompi/20.9
+module_load nvhpc-nompi/20.9
 module_load boost/1.61.0
 module_load cmake/3.19.5
 
diff --git a/arch/ecmwf/volta/nvhpc/22.3/env.sh b/arch/ecmwf/volta/nvhpc/22.3/env.sh
new file mode 100644
index 00000000..2663d507
--- /dev/null
+++ b/arch/ecmwf/volta/nvhpc/22.3/env.sh
@@ -0,0 +1,54 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload to be certain
+module_unload boost
+module_unload cmake
+module_unload intel
+module_unload pgi
+module_unload nvhpc-nompi
+module_unload nvhpc
+module_unload gnu
+
+# Load modules
+module use /local/hdd/daom/hpc_sdk_22.3/modulefiles/
+module_load nvhpc/22.3
+#module_load nvhpc-nompi/22.3
+module_load boost/1.61.0
+module_load cmake/3.19.5
+
+set -x
+
+# Increase stack size to maximum
+ulimit -S -s unlimited
+
+# Fix boost header location
+export BOOST_INCLUDEDIR="/usr/local/apps/boost/1.61.0/PGI/17.1/include/"
+
+# Custom HDF5 library build with F03 interfaces
+export HDF5_ROOT="/local/hdd/nabr/hdf5/nvhpc/22.3"
+
+# Restore tracing to stored setting
+if [[ -n "$tracing_" ]]; then set -x; else set +x; fi
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
+export ANT_OPTS="-Dhttp.proxyHost=proxy.ecmwf.int -Dhttp.proxyPort=3333 -Dhttps.proxyHost=proxy.ecmwf.int -Dhttps.proxyPort=3333"
diff --git a/arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake b/arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake
new file mode 100644
index 00000000..96359878
--- /dev/null
+++ b/arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake
@@ -0,0 +1,61 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module
+# unless its stored as a cache variable
+set( OpenMP_Fortran_FLAGS   "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" )
+
+# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to
+# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone
+# doesn't have any effect here as the module uses FORCE to overwrite the
+# existing value)
+set( OpenMP_C_FLAGS         "-mp -mp=bind,allcores,numa" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES     "acchost" CACHE STRING "")
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+# NB: We have to add `-mp` again to avoid undefined symbols during linking
+# (smells like an Nvidia bug)
+set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc70,lineinfo,fastmath" CACHE STRING "" )
+# Enable this to get more detailed compiler output
+# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+set(ECBUILD_Fortran_FLAGS "-fpic")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz")
+
+set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" )
+
+set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" )
+
+set( ECBUILD_CXX_FLAGS "-O2 -gopt" )
+
+# Fix for C++ template headers needed for Serialbox
+set( GNU_HEADER_INCLUDE "-I/usr/local/apps/gcc/7.3.0/lib/gcc/x86_64-linux-gnu/7.3.0/include-fixed" )
+set( ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} ${GNU_HEADER_INCLUDE}" )
diff --git a/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh b/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh
new file mode 100644
index 00000000..67eee0b4
--- /dev/null
+++ b/arch/eurohpc/lumi/amd-gpu/8.3.3/env.sh
@@ -0,0 +1,44 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload to be certain
+module reset
+
+# Load modules
+module_load LUMI/22.08
+module_load partition/G
+module_load PrgEnv-aocc/8.3.3
+module_load craype-accel-amd-gfx90a
+module_load buildtools/22.08
+module_load cray-hdf5/1.12.1.5
+module_load cray-python/3.9.12.1
+
+# Specify compilers
+export CC=amdclang CXX=amdclang++ FC=amdflang
+#export CC=cc CXX=CC FC=ftn
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake b/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake
new file mode 100644
index 00000000..557774af
--- /dev/null
+++ b/arch/eurohpc/lumi/amd-gpu/8.3.3/toolchain.cmake
@@ -0,0 +1,31 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+set( OpenMP_Fortran_FLAGS   "-fopenmp --offload-arch=gfx90a" CACHE STRING "" )
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+set( ENABLE_ACC OFF CACHE STRING "" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic -O3")
diff --git a/arch/eurohpc/lumi/amd-host/8.3.3/env.sh b/arch/eurohpc/lumi/amd-host/8.3.3/env.sh
new file mode 100644
index 00000000..6c3ba711
--- /dev/null
+++ b/arch/eurohpc/lumi/amd-host/8.3.3/env.sh
@@ -0,0 +1,43 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload to be certain
+module reset
+
+# Load modules
+module_load LUMI/22.08
+module_load partition/C
+module_load PrgEnv-aocc/8.3.3
+module_load craype-accel-host
+module_load buildtools/22.08
+module_load cray-hdf5/1.12.1.5
+module_load cray-python/3.9.12.1
+
+# Specify compilers
+export CC=cc CXX=CC FC=ftn
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake b/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake
new file mode 100644
index 00000000..dac9ed49
--- /dev/null
+++ b/arch/eurohpc/lumi/amd-host/8.3.3/toolchain.cmake
@@ -0,0 +1,25 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+set( ENABLE_ACC OFF CACHE STRING "" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic -O3")
diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh
new file mode 100644
index 00000000..2cc0f9b2
--- /dev/null
+++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh
@@ -0,0 +1,49 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload to be certain
+module reset
+
+# Load modules
+module_load PrgEnv-cray/8.3.3
+module_load LUMI/22.08
+# module_load partition/G
+module_load rocm/5.0.2
+module_load cce/14.0.2
+module_load cray-libsci/22.08.1.1
+module_load cray-mpich/8.1.18
+module_load craype/2.7.17
+module_load craype-accel-amd-gfx90a
+module_load buildtools/22.08
+module_load cray-hdf5/1.12.1.5
+module_load cray-python/3.9.12.1
+
+module list
+
+set -x
+
+export CC=cc CXX=CC FC=ftn
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake
new file mode 100644
index 00000000..0774cf51
--- /dev/null
+++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake
@@ -0,0 +1,42 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI OFF )
+set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+set( ENABLE_OMP ON CACHE STRING "" )
+set( OpenMP_C_FLAGS   "-homp" CACHE STRING "" )
+set( OpenMP_Fortran_FLAGS   "-homp" CACHE STRING "" )
+
+####################################################################
+# OpenACC FLAGS
+####################################################################
+
+set( ENABLE_ACC ON CACHE STRING "" )
+set( OpenACC_C_FLAGS "-hacc" )
+set( OpenACC_CXX_FLAGS "-hacc" )
+set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" )
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+set(ECBUILD_Fortran_FLAGS "-hcontiguous")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")
diff --git a/arch/eurohpc/lumi/cray-host/14.0.2/env.sh b/arch/eurohpc/lumi/cray-host/14.0.2/env.sh
new file mode 100644
index 00000000..afe2ec1b
--- /dev/null
+++ b/arch/eurohpc/lumi/cray-host/14.0.2/env.sh
@@ -0,0 +1,42 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload to be certain
+module reset
+
+# Load modules
+module_load PrgEnv-cray/8.3.3
+module_load LUMI/22.08
+# module_load craype-x86-milan
+module_load craype-accel-host
+module_load buildtools/22.08
+module_load cray-hdf5/1.12.1.5
+module_load cray-python/3.9.12.1
+
+module list
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake
new file mode 100644
index 00000000..638b81f3
--- /dev/null
+++ b/arch/eurohpc/lumi/cray-host/14.0.2/toolchain.cmake
@@ -0,0 +1,35 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" )
+
+####################################################################
+# OpenACC FLAGS
+####################################################################
+
+set( ENABLE_ACC OFF CACHE STRING "" )
+set( OpenACC_C_FLAGS "-hnoacc" )
+set( OpenACC_CXX_FLAGS "-hnoacc" )
+set( OpenACC_Fortran_FLAGS "-hnoacc" )
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+set(ECBUILD_Fortran_FLAGS "-hcontiguous")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-emf -N 1023 -O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")
diff --git a/arch/eurohpc/meluxina/nvhpc/21.11/env.sh b/arch/eurohpc/meluxina/nvhpc/21.11/env.sh
new file mode 100644
index 00000000..d0253d8c
--- /dev/null
+++ b/arch/eurohpc/meluxina/nvhpc/21.11/env.sh
@@ -0,0 +1,58 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload all modules to be certain
+module_unload ParaStationMPI
+module_unload NVHPC
+module_unload gompi
+module_unload HDF5
+module_unload CMake
+
+# Load modules
+module use /apps/USE/easybuild/staging/2022.1/modules/all
+
+module_load NVHPC/21.11
+module_load ParaStationMPI/5.4.11-1-GCC-10.3.0-CUDA-11.3.1
+module_load CMake/3.23.1
+module_load Boost/1.79.0-GCC-11.3.0
+module_load Python/3.10.4-GCCcore-11.3.0
+
+export CC=nvc
+export CXX=nvc++
+export F77=nvfortran
+export FC=nvfortran
+export F90=nvfortran
+
+export HDF5_ROOT=/mnt/tier2/project/p200061/nvhpc-install
+
+# Loki install workaround for new editable installs
+export SETUPTOOLS_ENABLE_FEATURES="legacy-editable"
+
+# Increase stack size to maximum
+ulimit -S -s unlimited
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake
new file mode 120000
index 00000000..a43ddee2
--- /dev/null
+++ b/arch/eurohpc/meluxina/nvhpc/21.11/toolchain.cmake
@@ -0,0 +1 @@
+../../../../toolchains/eurohpc-meluxina-nvhpc.cmake
\ No newline at end of file
diff --git a/arch/eurohpc/meluxina/nvhpc/22.3/env.sh b/arch/eurohpc/meluxina/nvhpc/22.3/env.sh
new file mode 100644
index 00000000..7a23acec
--- /dev/null
+++ b/arch/eurohpc/meluxina/nvhpc/22.3/env.sh
@@ -0,0 +1,56 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+# Unload all modules to be certain
+module_unload ParaStationMPI
+module_unload NVHPC
+module_unload gompi
+module_unload HDF5
+module_unload CMake
+
+# Load modules
+module use /apps/USE/easybuild/staging/2021.5/modules/all
+
+module_load NVHPC/22.3
+module_load ParaStationMPI/5.4.11-1-GCC-10.3.0-CUDA-11.3.1
+module_load CMake/3.20.4
+module_load CUDA/11.3.1
+module_load Boost/1.76.0-GCC-10.3.0
+module_load Python/3.9.5-GCCcore-10.3.0
+
+export CC=nvc
+export CXX=nvc++
+export F77=nvfortran
+export FC=nvfortran
+export F90=nvfortran
+
+export HDF5_ROOT=/mnt/tier2/project/p200061/nvhpc-install
+
+# Increase stack size to maximum
+ulimit -S -s unlimited
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake b/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake
new file mode 120000
index 00000000..a43ddee2
--- /dev/null
+++ b/arch/eurohpc/meluxina/nvhpc/22.3/toolchain.cmake
@@ -0,0 +1 @@
+../../../../toolchains/eurohpc-meluxina-nvhpc.cmake
\ No newline at end of file
diff --git a/arch/github/ubuntu/gnu/9.3.0/env.sh b/arch/github/ubuntu/gnu/9.4.0/env.sh
similarity index 67%
rename from arch/github/ubuntu/gnu/9.3.0/env.sh
rename to arch/github/ubuntu/gnu/9.4.0/env.sh
index 0e164879..198e0466 100644
--- a/arch/github/ubuntu/gnu/9.3.0/env.sh
+++ b/arch/github/ubuntu/gnu/9.4.0/env.sh
@@ -1,7 +1,7 @@
 # Source me to get the correct configure/build/run environment
 
-export CC=gcc
-export CXX=g++
-export FC=gfortran
+export CC=gcc-9
+export CXX=g++-9
+export FC=gfortran-9
 
 export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/github/ubuntu/gnu/9.3.0/toolchain.cmake b/arch/github/ubuntu/gnu/9.4.0/toolchain.cmake
similarity index 100%
rename from arch/github/ubuntu/gnu/9.3.0/toolchain.cmake
rename to arch/github/ubuntu/gnu/9.4.0/toolchain.cmake
diff --git a/arch/github/ubuntu/nvhpc/21.9/env.sh b/arch/github/ubuntu/nvhpc/21.9/env.sh
new file mode 100644
index 00000000..948b45a2
--- /dev/null
+++ b/arch/github/ubuntu/nvhpc/21.9/env.sh
@@ -0,0 +1,35 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+### Variables
+export NVHPC_INSTALL_DIR=${GITHUB_WORKSPACE}/nvhpc-install
+export NVHPC_VERSION=21.9
+export NVHPC_DIR=${NVHPC_INSTALL_DIR}/Linux_x86_64/${NVHPC_VERSION}
+
+### Compilers
+export PATH=${NVHPC_DIR}/compilers/bin:${PATH}
+export NVHPC_LIBRARY_PATH=${NVHPC_DIR}/compilers/lib
+export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH}
+
+### MPI
+export MPI_HOME=${NVHPC_DIR}/comm_libs/mpi
+export PATH=${MPI_HOME}/bin:${PATH}
+
+### HDF5
+export HDF5_DIR=${GITHUB_WORKSPACE}/hdf5-install
+export LD_LIBRARY_PATH=${HDF5_DIR}/lib:${LD_LIBRARY_PATH}
+export PATH=${HDF5_DIR}/bin:${PATH}
+
+### Compiler variables
+export CC=pgcc
+export CXX=pgc++
+export FC=pgf90
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/github/ubuntu/nvhpc/21.9/toolchain.cmake b/arch/github/ubuntu/nvhpc/21.9/toolchain.cmake
new file mode 120000
index 00000000..2fd38d62
--- /dev/null
+++ b/arch/github/ubuntu/nvhpc/21.9/toolchain.cmake
@@ -0,0 +1 @@
+../../../../toolchains/github-ubuntu-nvhpc.cmake
\ No newline at end of file
diff --git a/arch/isambard/a64fx/arm/21.0.0/env.sh b/arch/isambard/a64fx/arm/21.0.0/env.sh
index 1a7fc45d..3b6fa32f 100644
--- a/arch/isambard/a64fx/arm/21.0.0/env.sh
+++ b/arch/isambard/a64fx/arm/21.0.0/env.sh
@@ -21,7 +21,12 @@ export FC_INCLUDES=-I$HDF5_DIR/include
 export CC=armclang
 export CXX=armclang++
 export FC=armflang
- 
+
+# ARM PL math library
+module use /software/aarch64/tools/arm-compiler/21.0/modulefiles
+module load arm21
+module load armpl-AArch64-SVE/21.0.0
+
 module load openmpi/4.1.0/arm-21.0
 module load cmake
 
diff --git a/arch/isambard/a64fx/arm/21.0.0/toolchain.cmake b/arch/isambard/a64fx/arm/21.0.0/toolchain.cmake
index 3bb6cb5f..d94324a7 100644
--- a/arch/isambard/a64fx/arm/21.0.0/toolchain.cmake
+++ b/arch/isambard/a64fx/arm/21.0.0/toolchain.cmake
@@ -17,30 +17,24 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 # OpenMP FLAGS
 ####################################################################
 
-set( OpenMP_C_FLAGS             "-fopenmp" )
-set( OpenMP_CXX_FLAGS           "-fopenmp" )
-set( OpenMP_Fortran_FLAGS       "-fopenmp" )
+set( OpenMP_C_FLAGS   "-fopenmp" CACHE STRING "" )
+set( OpenMP_CXX_FLAGS   "-fopenmp" CACHE STRING "" )
+set( OpenMP_Fortran_FLAGS   "-fopenmp" CACHE STRING "" )
 
 ####################################################################
 # COMMON FLAGS
 ####################################################################
 
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -flto")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ofast")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -mcpu=a64fx")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=armv8.2-a+sve")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -I$ENV{ARMPL_DIR}/include ")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -DNDEBUG")
-
-set(ECBUILD_C_FLAGS "${ECBUILD_C_FLAGS} -fpic")
-set(ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} -fpic")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -fpic")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -flto")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -armpl")
+foreach(LANG IN ITEMS C CXX Fortran LINKER)
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -fpic")
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -flto")
+endforeach()
 
+set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -armpl")
 set(CMAKE_EXE_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
 set(CMAKE_SHARED_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
 
+set(ECBUILD_Fortran_FLAGS_BIT "-Ofast -mcpu=a64fx -march=armv8.2-a+sve -I$ENV{ARMPL_INCLUDES} -DNDEBUG")
+
 # Compatibility with HDF5 1.12
 set(H5_USE_110_API ON)
diff --git a/arch/isambard/a64fx/cray/10.0.1/toolchain.cmake b/arch/isambard/a64fx/cray/10.0.1/toolchain.cmake
index c9f23e8b..f17dd2fc 100644
--- a/arch/isambard/a64fx/cray/10.0.1/toolchain.cmake
+++ b/arch/isambard/a64fx/cray/10.0.1/toolchain.cmake
@@ -14,15 +14,13 @@
 set( ECBUILD_FIND_MPI ON )
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -O3")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hfp1")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hscalar3")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hvector3")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -G2")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hcontiguous")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -haggress")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -DNDEBUG")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -target-accel=host")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")
 
 ####################################################################
 # LINK FLAGS
diff --git a/arch/isambard/a64fx/fujitsu/4.3.1/toolchain.cmake b/arch/isambard/a64fx/fujitsu/4.3.1/toolchain.cmake
index 2e145a4b..24535492 100644
--- a/arch/isambard/a64fx/fujitsu/4.3.1/toolchain.cmake
+++ b/arch/isambard/a64fx/fujitsu/4.3.1/toolchain.cmake
@@ -14,46 +14,41 @@ set( ECBUILD_FIND_MPI ON )
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
 if( ENABLE_MPI )
-set(CMAKE_Fortran_COMPILER "mpifrt")
 set(CMAKE_C_COMPILER "mpifcc")
 set(CMAKE_CXX_COMPILER "mpiFCC")
+set(CMAKE_Fortran_COMPILER "mpifrt")
+else()
+set(CMAKE_C_COMPILER "fcc")
+set(CMAKE_CXX_COMPILER "FCC")
+set(CMAKE_Fortran_COMPILER "frt")
 endif()
 
 ####################################################################
 # OpenMP FLAGS
 ####################################################################
 
-set( OpenMP_C_FLAGS             "-Kopenmp" )
-set( OpenMP_CXX_FLAGS           "-Kopenmp" )
-set( OpenMP_Fortran_FLAGS       "-Kopenmp" )
+set( OpenMP_C_FLAGS   "-Kopenmp -Nfjomplib" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES   "" CACHE STRING "" )
+set( OpenMP_CXX_FLAGS   "-Kopenmp -Nfjomplib" CACHE STRING "" )
+set( OpenMP_CXX_LIB_NAMES   "" CACHE STRING "" )
+set( OpenMP_Fortran_FLAGS   "-Kopenmp -Nfjomplib" CACHE STRING "" )
 
 ####################################################################
 # COMMON FLAGS
 ####################################################################
 
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kfast")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kopenmp")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Nfjomplib")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -O3")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -KA64FX")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -KSVE")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -KARMV8_3_A")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ksimd=2")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Klto")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -SSL2")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kassume=notime_saving_compilation")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -DNDEBUG")
-
-set(ECBUILD_C_FLAGS "${ECBUILD_C_FLAGS} -fpic")
-set(ECBUILD_C_FLAGS "${ECBUILD_C_FLAGS} -Klto")
-set(ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} -fpic")
-set(ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} -Klto")
-
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -Kopenmp")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -Nfjomplib")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -Klto")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -SSL2")
+foreach(LANG IN ITEMS C CXX Fortran LINKER)
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -fpic")
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -Klto")
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -SSL2")
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -Kopenmp")
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -Nfjomplib")
+endforeach()
+
+set(CMAKE_EXE_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
+set(CMAKE_SHARED_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
+
+set( ECBUILD_Fortran_FLAGS_BIT "-Kfast -O3 -KA64FX -KSVE -KARMV8_3_A -Ksimd=2 -Kassume=notime_saving_compilation -DNDEBUG" )
 
 # Compatibility with HDF5 1.12
 set(H5_USE_110_API ON)
diff --git a/arch/isambard/a64fx/gcc/11.0.0/toolchain.cmake b/arch/isambard/a64fx/gcc/11.0.0/toolchain.cmake
deleted file mode 100644
index c4d2d6cf..00000000
--- a/arch/isambard/a64fx/gcc/11.0.0/toolchain.cmake
+++ /dev/null
@@ -1,59 +0,0 @@
-# (C) Copyright 1988- ECMWF.
-#
-# This software is licensed under the terms of the Apache Licence Version 2.0
-# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
-# In applying this licence, ECMWF does not waive the privileges and immunities
-# granted to it by virtue of its status as an intergovernmental organisation
-# nor does it submit to any jurisdiction.
-
-####################################################################
-# COMPILER
-####################################################################
-
-set( ECBUILD_FIND_MPI ON )
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-
-####################################################################
-# OpenMP FLAGS
-####################################################################
-
-set( OpenMP_C_FLAGS             "-fopenmp" )
-set( OpenMP_CXX_FLAGS           "-fopenmp" )
-set( OpenMP_Fortran_FLAGS       "-fopenmp" )
-
-####################################################################
-# COMMON FLAGS
-####################################################################
-
-
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fpic")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -flto")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ofast")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -mtune=native")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -mcpu=a64fx")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=armv8.2-a+sve")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fstack-arrays")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fallow-argument-mismatch")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fconvert=big-endian")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-second-underscore")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffree-form")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffast-math")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -DNDEBUG")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -funroll-all-loops")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -I$ENV{ARMPL_INCLUDES}")
-
-set(ECBUILD_C_FLAGS "${ECBUILD_C_FLAGS} -fpic")
-set(ECBUILD_C_FLAGS "${ECBUILD_C_FLAGS} -flto")
-set(ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} -fpic")
-set(ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} -flto")
-
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -fpic")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -flto")
-set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -L$ENV{ARMPL_LIBRARIES} -larmpl_mp -lamath -lm")
-
-set(CMAKE_EXE_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
-set(CMAKE_SHARED_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
-
-# Compatibility with HDF5 1.12
-set(H5_USE_110_API ON)
diff --git a/arch/isambard/a64fx/gcc/11.0.0/env.sh b/arch/isambard/a64fx/gnu/11.0.0/env.sh
similarity index 100%
rename from arch/isambard/a64fx/gcc/11.0.0/env.sh
rename to arch/isambard/a64fx/gnu/11.0.0/env.sh
diff --git a/arch/isambard/a64fx/gnu/11.0.0/toolchain.cmake b/arch/isambard/a64fx/gnu/11.0.0/toolchain.cmake
new file mode 100644
index 00000000..c789e411
--- /dev/null
+++ b/arch/isambard/a64fx/gnu/11.0.0/toolchain.cmake
@@ -0,0 +1,40 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+set( OpenMP_C_FLAGS   "-fopenmp" CACHE STRING "" )
+set( OpenMP_CXX_FLAGS   "-fopenmp" CACHE STRING "" )
+set( OpenMP_Fortran_FLAGS   "-fopenmp" CACHE STRING "" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+foreach(LANG IN ITEMS C CXX Fortran LINKER)
+    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -fpic")
+#    set(ECBUILD_${LANG}_FLAGS "${ECBUILD_${LANG}_FLAGS} -flto")
+endforeach()
+
+set(ECBUILD_Fortran_FLAGS_BIT "-Ofast -mtune=native -mcpu=a64fx -march=armv8.2-a+sve -fstack-arrays -fallow-argument-mismatch -fconvert=big-endian -fno-second-underscore -ffast-math -DNDEBUG -funroll-all-loops -finline-functions -I$ENV{ARMPL_INCLUDES}")
+set(ECBUILD_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS} -L$ENV{ARMPL_LIBRARIES} -larmpl_mp -lamath -lm")
+
+set(CMAKE_EXE_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
+set(CMAKE_SHARED_LINKER_FLAGS "${ECBUILD_LINKER_FLAGS}")
+
+# Compatibility with HDF5 1.12
+set(H5_USE_110_API ON)
diff --git a/arch/isambard/macs/cray-host/11.0.4/env.sh b/arch/isambard/macs/cray-host/11.0.4/env.sh
new file mode 100644
index 00000000..b1778e98
--- /dev/null
+++ b/arch/isambard/macs/cray-host/11.0.4/env.sh
@@ -0,0 +1,30 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+export CC=cc
+export CXX=CC
+export FC=ftn
+
+module use /lustre/projects/bristol/modules/modulefiles
+
+module load PrgEnv-cray/8.0.0
+module unload craype-broadwell
+module unload craype-network-infiniband
+module load craype-accel-host
+module load craype-x86-rome
+module load cmake/3.23.2
+module load intel/mpi/64/2020
+
+export HDF5_DIR=$HOME/dwarf-p-cloudsc/hdf5/cray/11.0.4
+export HDF5_ROOT=$HOME/dwarf-p-cloudsc/hdf5/cray/11.0.4
+
+module list
+
+ulimit -S -s unlimited
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/isambard/macs/cray-host/11.0.4/toolchain.cmake b/arch/isambard/macs/cray-host/11.0.4/toolchain.cmake
new file mode 100644
index 00000000..cecfa6af
--- /dev/null
+++ b/arch/isambard/macs/cray-host/11.0.4/toolchain.cmake
@@ -0,0 +1,53 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI OFF )
+set( ENABLE_ACC OFF )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+set( OpenMP_C_FLAGS   "-homp -hnoacc" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES   "" CACHE STRING "" )
+set( OpenMP_Fortran_FLAGS   "-homp -hnoacc" CACHE STRING "" )
+
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hcontiguous")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")
+
+####################################################################
+# LINK FLAGS
+####################################################################
+
+if( EXISTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" )
+  set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" )
+elseif( EXISTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" )
+  set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" )
+endif()
+
+set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp" )
+set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap" )
+set( ECBUILD_EXE_LINKER_FLAGS    "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap -Wl,--as-needed" )
+set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" )
+
+# Compatibility with HDF5 1.12
+set(H5_USE_110_API ON)
diff --git a/arch/isambard/macs/intel/2021.4.0/env.sh b/arch/isambard/macs/intel/2021.4.0/env.sh
new file mode 100644
index 00000000..27310863
--- /dev/null
+++ b/arch/isambard/macs/intel/2021.4.0/env.sh
@@ -0,0 +1,44 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+export CC=icc
+export CXX=icpc
+export FC=ifort
+
+module_unload PrgEnv-cray
+
+module use /lustre/projects/bristol/modules/modulefiles/
+
+module load gcc/10.3.0
+# module_load intel/oneapi/2021.1
+# module_load IntelOneApi/mpi/2021.4.0
+source /lustre/software/x86/tools/oneapi-2021.4.0/setvars.sh || true
+module_load cmake/3.23.2
+export HDF5_DIR=$HOME/dwarf-p-cloudsc/hdf5/intel/2021.4.0
+export HDF5_ROOT=$HOME/dwarf-p-cloudsc/hdf5/intel/2021.4.0
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/isambard/macs/intel/2021.4.0/toolchain.cmake b/arch/isambard/macs/intel/2021.4.0/toolchain.cmake
new file mode 100644
index 00000000..e4d549ce
--- /dev/null
+++ b/arch/isambard/macs/intel/2021.4.0/toolchain.cmake
@@ -0,0 +1,32 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+set( ECBUILD_TRUST_FLAGS ON )
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=500")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-O3 -march=core-avx2")
diff --git a/arch/isambard/p3/cray-host/13.0.0/env.sh b/arch/isambard/p3/cray-host/13.0.0/env.sh
new file mode 100644
index 00000000..e9e753eb
--- /dev/null
+++ b/arch/isambard/p3/cray-host/13.0.0/env.sh
@@ -0,0 +1,28 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+export CC=cc
+export CXX=CC
+export FC=ftn
+
+module reset
+
+module use /lustre/projects/bristol/modules/modulefiles
+
+module load PrgEnv-cray/8.2.0
+module load craype-accel-host
+module load craype-x86-milan
+module load cray-pals
+module load cray-hdf5/1.12.0.7
+module load cmake/3.23.2
+
+module list
+
+ulimit -S -s unlimited
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/isambard/p3/cray-host/13.0.0/toolchain.cmake b/arch/isambard/p3/cray-host/13.0.0/toolchain.cmake
new file mode 100644
index 00000000..7b19b202
--- /dev/null
+++ b/arch/isambard/p3/cray-host/13.0.0/toolchain.cmake
@@ -0,0 +1,53 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI OFF )
+set( ENABLE_ACC OFF )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+set( OpenMP_C_FLAGS   "-homp -hnoacc" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES   "sci_cray_mpi_mp;sci_cray_mp;craymp;atomic" CACHE STRING "" )
+set( OpenMP_Fortran_FLAGS   "-homp -hnoacc" CACHE STRING "" )
+
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hcontiguous")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")
+
+####################################################################
+# LINK FLAGS
+####################################################################
+
+if( EXISTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" )
+  set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" )
+elseif( EXISTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" )
+  set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" )
+endif()
+
+set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp" )
+set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap" )
+set( ECBUILD_EXE_LINKER_FLAGS    "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap -Wl,--as-needed" )
+set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" )
+
+# Compatibility with HDF5 1.12
+set(H5_USE_110_API ON)
diff --git a/arch/isambard/p3/intel/2021.4.0/env.sh b/arch/isambard/p3/intel/2021.4.0/env.sh
new file mode 100644
index 00000000..3153464c
--- /dev/null
+++ b/arch/isambard/p3/intel/2021.4.0/env.sh
@@ -0,0 +1,45 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+# Source me to get the correct configure/build/run environment
+
+# Store tracing and disable (module is *way* too verbose)
+{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null
+
+module_load() {
+  echo "+ module load $1"
+  module load $1
+}
+module_unload() {
+  echo "+ module unload $1"
+  module unload $1
+}
+
+export CC=icc
+export CXX=icpc
+export FC=ifort
+
+module reset
+module_unload PrgEnv-cray
+
+module use /lustre/projects/bristol/modules/modulefiles
+
+module load gcc/11.2.0
+module_load IntelOneApi/compiler/2021.4.0
+# module_load IntelOneApi/mpi/2021.4.0
+source /lustre/software/x86/tools/oneapi-2021.4.0/setvars.sh || true
+module_load cmake/3.23.2
+export HDF5_DIR=$HOME/dwarf-p-cloudsc/hdf5/intel/2021.4.0
+export HDF5_ROOT=$HOME/dwarf-p-cloudsc/hdf5/intel/2021.4.0
+
+set -x
+
+# Restore tracing to stored setting
+{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null
+
+export ECBUILD_TOOLCHAIN="./toolchain.cmake"
diff --git a/arch/isambard/p3/intel/2021.4.0/toolchain.cmake b/arch/isambard/p3/intel/2021.4.0/toolchain.cmake
new file mode 100644
index 00000000..e4d549ce
--- /dev/null
+++ b/arch/isambard/p3/intel/2021.4.0/toolchain.cmake
@@ -0,0 +1,32 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+set( ECBUILD_TRUST_FLAGS ON )
+
+####################################################################
+# Compiler FLAGS
+####################################################################
+
+# General Flags (add to default)
+
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=500")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline")
+
+set(ECBUILD_Fortran_FLAGS_BIT "-O3 -march=core-avx2")
diff --git a/arch/ecmwf/atos-tems/intel/2021.1.1/toolchain.cmake b/arch/toolchains/ecmwf-hpc2020-intel.cmake
similarity index 89%
rename from arch/ecmwf/atos-tems/intel/2021.1.1/toolchain.cmake
rename to arch/toolchains/ecmwf-hpc2020-intel.cmake
index 3864f4e0..eff21209 100644
--- a/arch/ecmwf/atos-tems/intel/2021.1.1/toolchain.cmake
+++ b/arch/toolchains/ecmwf-hpc2020-intel.cmake
@@ -112,7 +112,7 @@ set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE )
 # COMPILER
 ####################################################################
 
-set( ECBUILD_FIND_MPI OFF )
+set( ECBUILD_FIND_MPI ON )
 set( ECBUILD_TRUST_FLAGS ON )
 
 ####################################################################
@@ -128,10 +128,16 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -xHost")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=core-avx2")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=500")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=1500")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -no-fma")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume realloc_lhs")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-model precise")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ftz")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-speculation=safe")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fast-transcendentals")
 
 ####################################################################
 # LINK FLAGS
diff --git a/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake b/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake
new file mode 100644
index 00000000..ce8de9da
--- /dev/null
+++ b/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake
@@ -0,0 +1,57 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module
+# unless its stored as a cache variable
+set( OpenMP_Fortran_FLAGS   "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" )
+
+# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to
+# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone
+# doesn't have any effect here as the module uses FORCE to overwrite the
+# existing value)
+set( OpenMP_C_FLAGS         "-mp -mp=bind,allcores,numa" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES     "acchost" CACHE STRING "")
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+# NB: We have to add `-mp` again to avoid undefined symbols during linking
+# (smells like an Nvidia bug)
+set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" )
+# Enable this to get more detailed compiler output
+# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+set(ECBUILD_Fortran_FLAGS "-fpic")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz")
+
+set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" )
+
+set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" )
+
+set( ECBUILD_CXX_FLAGS "-O2 -gopt" )
diff --git a/arch/toolchains/ecmwf-volta-pgi-gpu.cmake b/arch/toolchains/ecmwf-volta-pgi-gpu.cmake
index e7b50d8b..96359878 100644
--- a/arch/toolchains/ecmwf-volta-pgi-gpu.cmake
+++ b/arch/toolchains/ecmwf-volta-pgi-gpu.cmake
@@ -16,15 +16,24 @@ set( ECBUILD_FIND_MPI ON )
 # OpenMP FLAGS
 ####################################################################
 
-set( OpenMP_C_FLAGS             "-mp -mp=bind,allcores,numa" )
-set( OpenMP_CXX_FLAGS           "-mp -mp=bind,allcores,numa" )
-set( OpenMP_Fortran_FLAGS       "-mp -mp=bind,allcores,numa" )
+# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module
+# unless its stored as a cache variable
+set( OpenMP_Fortran_FLAGS   "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" )
+
+# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to
+# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone
+# doesn't have any effect here as the module uses FORCE to overwrite the
+# existing value)
+set( OpenMP_C_FLAGS         "-mp -mp=bind,allcores,numa" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES     "acchost" CACHE STRING "")
 
 ####################################################################
 # OpenAcc FLAGS
 ####################################################################
 
-set( OpenACC_Fortran_FLAGS "-acc -ta=tesla:lineinfo,deepcopy,fastmath" )
+# NB: We have to add `-mp` again to avoid undefined symbols during linking
+# (smells like an Nvidia bug)
+set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc70,lineinfo,fastmath" CACHE STRING "" )
 # Enable this to get more detailed compiler output
 # set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" )
 
diff --git a/arch/toolchains/eurohpc-meluxina-nvhpc.cmake b/arch/toolchains/eurohpc-meluxina-nvhpc.cmake
new file mode 100644
index 00000000..ce8de9da
--- /dev/null
+++ b/arch/toolchains/eurohpc-meluxina-nvhpc.cmake
@@ -0,0 +1,57 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module
+# unless its stored as a cache variable
+set( OpenMP_Fortran_FLAGS   "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" )
+
+# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to
+# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone
+# doesn't have any effect here as the module uses FORCE to overwrite the
+# existing value)
+set( OpenMP_C_FLAGS         "-mp -mp=bind,allcores,numa" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES     "acchost" CACHE STRING "")
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+# NB: We have to add `-mp` again to avoid undefined symbols during linking
+# (smells like an Nvidia bug)
+set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" )
+# Enable this to get more detailed compiler output
+# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+set(ECBUILD_Fortran_FLAGS "-fpic")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz")
+
+set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" )
+
+set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" )
+
+set( ECBUILD_CXX_FLAGS "-O2 -gopt" )
diff --git a/arch/toolchains/github-ubuntu-gnu.cmake b/arch/toolchains/github-ubuntu-gnu.cmake
index 80cbcc39..f7e59fa4 100644
--- a/arch/toolchains/github-ubuntu-gnu.cmake
+++ b/arch/toolchains/github-ubuntu-gnu.cmake
@@ -11,7 +11,7 @@
 ####################################################################
 
 set( ECBUILD_FIND_MPI ON )
-set( ENABLE_ACC OFF )
+set( ENABLE_ACC OFF CACHE STRING "" )
 
 ####################################################################
 # Compiler FLAGS
@@ -23,6 +23,6 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fstack-arrays")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fconvert=big-endian")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fbacktrace")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-second-underscore")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffree-form")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffast-math")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-unsafe-math-optimizations")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -foffload=disable")
diff --git a/arch/toolchains/github-ubuntu-nvhpc.cmake b/arch/toolchains/github-ubuntu-nvhpc.cmake
new file mode 100644
index 00000000..be437031
--- /dev/null
+++ b/arch/toolchains/github-ubuntu-nvhpc.cmake
@@ -0,0 +1,57 @@
+# (C) Copyright 1988- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+####################################################################
+# COMPILER
+####################################################################
+
+set( ECBUILD_FIND_MPI ON )
+
+####################################################################
+# OpenMP FLAGS
+####################################################################
+
+# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module
+# unless its stored as a cache variable
+set( OpenMP_Fortran_FLAGS   "-mp -mp=bind,allcores,numa" CACHE STRING "" )
+
+# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to
+# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone
+# doesn't have any effect here as the module uses FORCE to overwrite the
+# existing value)
+set( OpenMP_C_FLAGS         "-mp -mp=bind,allcores,numa" CACHE STRING "" )
+set( OpenMP_C_LIB_NAMES     "acchost" CACHE STRING "")
+
+####################################################################
+# OpenAcc FLAGS
+####################################################################
+
+# NB: We have to add `-mp` again to avoid undefined symbols during linking
+# (smells like an Nvidia bug)
+set( OpenACC_Fortran_FLAGS "-acc -mp" CACHE STRING "" )
+# Enable this to get more detailed compiler output
+# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" )
+
+####################################################################
+# COMMON FLAGS
+####################################################################
+
+set(ECBUILD_Fortran_FLAGS "-fpic")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee")
+set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz")
+
+set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" )
+
+set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" )
+
+set( ECBUILD_CXX_FLAGS "-O2 -gopt" )
diff --git a/arch/toolchains/jsc-deep-gnu.cmake b/arch/toolchains/jsc-deep-gnu.cmake
index d47c2ade..7051f7da 100644
--- a/arch/toolchains/jsc-deep-gnu.cmake
+++ b/arch/toolchains/jsc-deep-gnu.cmake
@@ -14,6 +14,5 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fstack-arrays")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fconvert=big-endian")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fbacktrace")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-second-underscore")
-set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffree-form")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffast-math")
 set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-unsafe-math-optimizations")
diff --git a/benchmark/JUBE.md b/benchmark/JUBE.md
new file mode 100644
index 00000000..439a7656
--- /dev/null
+++ b/benchmark/JUBE.md
@@ -0,0 +1,60 @@
+# JUBE benchmarking environment
+
+JUBE is a benchmarking environment that provides a script-based framework to
+create benchmark sets, run them and evaluate the results. It is developed at
+Forschungszentrum Juelich, Germany.
+
+Further information: https://www.fz-juelich.de/jsc/jube
+
+Documentation: https://apps.fz-juelich.de/jsc/jube/jube2/docu/
+
+## Installation
+
+```bash
+# Python 3 module loaded
+python3 -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+```
+
+## Running a benchmark
+
+```bash
+# Virtual environment loaded
+jube run <benchmark>.yml
+```
+
+## Analysing and displaying results
+
+```bash
+# Virtual environment loaded
+jube analyse <benchmark run directory> --id <benchmark id>
+jube result <benchmark run directory> --id <benchmark id> | less -S
+```
+
+or, both in one:
+
+```bash
+# Virtual environment loaded
+jube result -a <benchmark run directory> --id <benchmark id> | less -S
+```
+
+Skipping the benchmark id is equivalent to using the latest benchmark run.
+
+## Useful commands
+
+Update benchmark results after modifying patterns or result table, without
+re-running the benchmark:
+
+```bash
+# Virtual environment loaded
+jube result -a -u <benchmark>.yml <benchmark run directory> --id <benchmark id> | less -S
+```
+
+Run without actually executing benchmark steps (useful to validate the YAML
+files and see the parameter space expansion):
+
+```bash
+# Virtual environment loaded
+jube --debug -v run <benchmark>.yml
+```
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 00000000..d552872c
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,123 @@
+# CLOUDSC benchmark setup for JUBE
+
+This provides different benchmarks:
+
+- cpu
+- gpu
+
+They use the JUBE benchmarking environment to automate their execution for a
+range of different build and run configurations. See [JUBE.md](JUBE.md) for
+an introduction.
+
+The benchmarks are defined in the main configuration file
+[`cloudsc.yml`](cloudsc.yml) and use external include files that provide the
+bits and pieces to configure their execution, with bespoke overrides for each
+benchmark in the main configuration file. The most relevant include files for
+users are:
+
+- [`include_arch.yml`](include/include_arch.yml):
+  Hardware configuration and `arch` file to use
+- [`include_parameterset.yml`](include/include_parameters.yml):
+  Defines the matrix of active build options
+- [`include_run.yml`](include/include_run.yml):
+  Defines the matrix of execution options for each build
+
+Probably less likely to require user editing are the following files, that
+implement the mechanics of the benchmark execution:
+
+- [`include_step.yml`](include/include_step.yml):
+  Implementation of the benchmark steps
+- [`include_fileset_substituteset.yml`](include/include_fileset_substituteset.yml):
+  Required script file templates and required substitutions for each of them
+- [`include_patternset.yml`](include/include_patternset.yml):
+  Regular expressions to parse output
+- [`include_analyser.yml`](include/include_analyser.yml):
+  Application of regex patterns to execution stdout to collect output data
+- [`include_result.yml`](include/include_result.yml):
+  Compilation of result tables from analyser output data
+
+## Usage
+
+If it does not exist, yet, create target platform specific include files that
+overwrite relevant configuration values. Typically, this means at least a
+bespoke copy of `include_arch.yml` in something like
+`arch/<site>/<platform>/<toolchain>/<version>/include_arch.yml` but may for certain
+scenarios also require customization of the other include files.
+
+Note that the parametersets can be initialized with the default values from the
+`include` directory by providing the `init_with` option. This allows to only
+overwrite values that need to be changed, e.g.
+
+```yaml
+parameterset:
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+    - {name: arch, _: arch/hpc2020/gnu/9.3.0}
+```
+
+With platform-specific overrides in place, JUBE can be installed and the benchmark
+executed using the following steps:
+
+```bash
+# Create a virtual environment and install JUBE
+python3 -m venv venv
+venv/bin/pip install -r requirements.txt
+
+# Execute the benchmark with the correct architecture file
+venv/bin/jube run cloudsc.yml --include arch/<site>/<platform>/<toolchain>/<version> \
+  [--only-bench=<cpu|gpu>] [-t <tag> [-t <tag> ...]] [-m "<description>"]
+
+# Analyse output and create results table
+venv/bin/jube result -a rundir_<cpu|gpu> --id=<benchmark id> | less -S
+```
+
+Note the following options to the `run` command:
+
+- `--include`: This should point to the directory with the platform-specific
+  include files to override parameters. This takes precedence (but does not
+  replace) the default include path. Multiple include paths can be specified.
+- `--only-bench`: By specifying `cpu` or `gpu`, only the relevant benchmark
+  variant is being executed.
+- `-t`: This allows to provide a "tag" to select certain readily available
+  variations of parameters. Multiple tags can be supplied, separated by
+  white space. Currently available:
+  - `dp`/`sp` to enable double (the default) and single precision builds
+  - `serialbox` to use Serialbox instead of HDF5 as input library
+  - `mpi` to build with MPI support
+  - `sweep_nproma` varies the `default_nproma` value specified for the benchmark
+    by running with 1/4, 1/2, 1, 2, 4 times that value to find the optimum
+- `-m`: This allows to provide a description for the benchmark execution to
+  help identify a specific run later on
+
+To view information about the performed benchmark runs, use the `info` command:
+
+```bash
+venv/bin/jube info rundir_<cpu|gpu> [--id=<benchmark id>]
+```
+
+Without `--id`, this lists all past runs and includes the description provided
+via `-m`. When a benchmark id is specified, it gives a summary of that specific
+benchmark run.
+
+To postprocess the result tables, the output format can be changed to CSV by
+adding `-s csv` to the `result` command.
+
+## Typical benchmarking workflow
+
+A typical benchmarking workflow for a new CPU platform may look like this:
+
+1. Create a platform-specific `include_arch.yml` file
+2. Install JUBE
+3. Run an `NPROMA` sweep for single and double precision:
+   `venv/bin/jube run cloudsc.yml --only-bench=cpu --include <path/to/dir/with/include_arch.yml> -t sp dp sweep_nproma -m "<Compiler> NPROMA sweep for platform xyz"`
+4. View results to select optimum NPROMA value:
+   `venv/bin/jube result rundir_cpu -a | less -S`
+5. If required: Create platform-specific `include_run.yml` file that changes
+   `default_nproma` value to optimum. Repeat NPROMA sweep, if necessary.
+6. Run benchmark (optionally with MPI across NUMA domains), e.g. as follows:
+   `venv/bin/jube run cloudsc.yml --only-bench=cpu --include <path/to/dir/with/include_arch.yml> -t sp dp mpi -m "<Compiler> MPI platform xyz"`
+7. View results:
+   `venv/bin/jube result rundir_cpu -a | less -S`
+8. Optional: dump results to CSV:
+   `venv/bin/jube result rundir_cpu -s csv > results.csv`
diff --git a/benchmark/arch/hpc2020/gnu/9.3.0/include_arch.yml b/benchmark/arch/hpc2020/gnu/9.3.0/include_arch.yml
new file mode 100644
index 00000000..e47a71ba
--- /dev/null
+++ b/benchmark/arch/hpc2020/gnu/9.3.0/include_arch.yml
@@ -0,0 +1,20 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/ecmwf/hpc2020/gnu/9.3.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        separator: ;
+        _: "'srun -n ${{ NPROC }} --cpus-per-task=${{ NUMOMP }} --hint nomultithread bash -c \"OMP_PLACES={\\$(seq -s \\'},{\\' \\$((\\$SLURM_LOCALID*${{ NUMOMP }})) \\$(((\\$SLURM_LOCALID+1)*${{ NUMOMP }}-1)) )}' if $mpi == 1 else 'OMP_PLACES={$(seq -s \\'},{\\' 0 $((${{ NUMOMP }}-1)) )}'"
+      - {name: launch_cmd_end, mode: python, _: "'\"' if $mpi == 1 else ''"}
diff --git a/benchmark/arch/hpc2020/intel/2021.4.0/include_arch.yml b/benchmark/arch/hpc2020/intel/2021.4.0/include_arch.yml
new file mode 100644
index 00000000..f94db0e2
--- /dev/null
+++ b/benchmark/arch/hpc2020/intel/2021.4.0/include_arch.yml
@@ -0,0 +1,20 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/ecmwf/hpc2020/intel/2021.4.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        separator: ;
+        _: "'srun -n ${{ NPROC }} --cpus-per-task=${{ NUMOMP }} --hint nomultithread bash -c \"OMP_PLACES={\\$(seq -s \\'},{\\' \\$((\\$SLURM_LOCALID*${{ NUMOMP }})) \\$(((\\$SLURM_LOCALID+1)*${{ NUMOMP }}-1)) )}' if $mpi == 1 else 'OMP_PLACES={$(seq -s \\'},{\\' 0 $((${{ NUMOMP }}-1)) )}'"
+      - {name: launch_cmd_end, mode: python, _: "'\"' if $mpi == 1 else ''"}
diff --git a/benchmark/arch/hpc2020/nvhpc/22.1/include_arch.yml b/benchmark/arch/hpc2020/nvhpc/22.1/include_arch.yml
new file mode 100644
index 00000000..1a2ede50
--- /dev/null
+++ b/benchmark/arch/hpc2020/nvhpc/22.1/include_arch.yml
@@ -0,0 +1,27 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/ecmwf/hpc2020/nvhpc/22.1"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # Number of GPUs available on a node
+      - {name: gpus, type: int, _: 4}
+
+      # Set CUDA runtime heap size on GPU for SCC variant
+      - {name: PGI_ACC_CUDA_HEAPSIZE, export: true, _: 12G}
+
+      # MPI launch command to use (inject CUDA_VISIBLE_DEVICES)
+      - name: launch_cmd
+        mode: python
+        _: "'srun -n ${{ NPROC }} --cpus-per-task=${{ NUMOMP }} bash -c \"CUDA_VISIBLE_DEVICES=\\${SLURM_LOCALID}' if $mpi == 1 else ''"
+      - name: launch_cmd_end
+        mode: python
+        _: "'\"' if $mpi == 1 else ''"
diff --git a/benchmark/arch/isambard/a64fx/arm/21.0.0/include_arch.yml b/benchmark/arch/isambard/a64fx/arm/21.0.0/include_arch.yml
new file mode 100644
index 00000000..52085ba0
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/arm/21.0.0/include_arch.yml
@@ -0,0 +1,21 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/a64fx/arm/21.0.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 4}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 12}
+
+      - {name: OMP_PLACES, export: true, _: cores}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        _: "'OMP_NUM_THREADS=${{ NUMOMP }} ' + ('mpirun --map-by socket -np ${{ NPROC }}' if $mpi == 1 else '')"
+      - {name: launch_cmd_end, _: ""}
diff --git a/benchmark/arch/isambard/a64fx/arm/21.0.0/include_run.yml b/benchmark/arch/isambard/a64fx/arm/21.0.0/include_run.yml
new file mode 100644
index 00000000..f5b7c5a6
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/arm/21.0.0/include_run.yml
@@ -0,0 +1,18 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    init_with: include/include_run.yml
+    parameter:
+      # Increase the default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 256, 'dp': 256}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 32, 'dp': 16}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
diff --git a/benchmark/arch/isambard/a64fx/cray/10.0.1/include_arch.yml b/benchmark/arch/isambard/a64fx/cray/10.0.1/include_arch.yml
new file mode 100644
index 00000000..32c3ac97
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/cray/10.0.1/include_arch.yml
@@ -0,0 +1,21 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/a64fx/cray/10.0.1"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 4}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 12}
+
+      - {name: OMP_PLACES, export: true, _: cores}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        _: "'MV2_CPU_BINDING_POLICY=hybrid MV2_THREADS_PER_PROCESS=${{ NUMOMP }} mpiexec -bind-to socket -np ${{ NPROC }}' if $mpi == 1 else 'OMP_NUM_THREADS=${{ NUMOMP }}'"
+      - {name: launch_cmd_end, _: ""}
diff --git a/benchmark/arch/isambard/a64fx/cray/10.0.1/include_run.yml b/benchmark/arch/isambard/a64fx/cray/10.0.1/include_run.yml
new file mode 100644
index 00000000..ed75bd6e
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/cray/10.0.1/include_run.yml
@@ -0,0 +1,18 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    init_with: include/include_run.yml
+    parameter:
+      # Increase the default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 128, 'dp': 128}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 32, 'dp': 16}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
diff --git a/benchmark/arch/isambard/a64fx/fujitsu/4.3.1/include_arch.yml b/benchmark/arch/isambard/a64fx/fujitsu/4.3.1/include_arch.yml
new file mode 100644
index 00000000..76b31f55
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/fujitsu/4.3.1/include_arch.yml
@@ -0,0 +1,21 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/a64fx/fujitsu/4.3.1"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 4}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 12}
+
+      - {name: OMP_PLACES, export: true, _: cores}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        _: "'OMP_NUM_THREADS=${{ NUMOMP }} ' + ('mpirun --map-by socket -np ${{ NPROC }}' if $mpi == 1 else '')"
+      - {name: launch_cmd_end, _: ""}
diff --git a/benchmark/arch/isambard/a64fx/fujitsu/4.3.1/include_run.yml b/benchmark/arch/isambard/a64fx/fujitsu/4.3.1/include_run.yml
new file mode 100644
index 00000000..ed75bd6e
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/fujitsu/4.3.1/include_run.yml
@@ -0,0 +1,18 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    init_with: include/include_run.yml
+    parameter:
+      # Increase the default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 128, 'dp': 128}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 32, 'dp': 16}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
diff --git a/benchmark/arch/isambard/a64fx/gnu/11.0.0/include_arch.yml b/benchmark/arch/isambard/a64fx/gnu/11.0.0/include_arch.yml
new file mode 100644
index 00000000..e1f26dfa
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/gnu/11.0.0/include_arch.yml
@@ -0,0 +1,21 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/a64fx/gnu/11.0.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 4}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 12}
+
+      - {name: OMP_PLACES, export: true, _: cores}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        _: "'OMP_NUM_THREADS=${{ NUMOMP }} ' + ('mpirun --map-by socket -np ${{ NPROC }}' if $mpi == 1 else '')"
+      - {name: launch_cmd_end, _: ""}
diff --git a/benchmark/arch/isambard/a64fx/gnu/11.0.0/include_run.yml b/benchmark/arch/isambard/a64fx/gnu/11.0.0/include_run.yml
new file mode 100644
index 00000000..ed75bd6e
--- /dev/null
+++ b/benchmark/arch/isambard/a64fx/gnu/11.0.0/include_run.yml
@@ -0,0 +1,18 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    init_with: include/include_run.yml
+    parameter:
+      # Increase the default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 128, 'dp': 128}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 32, 'dp': 16}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
diff --git a/benchmark/arch/isambard/macs/cray-host/11.0.4/include_arch.yml b/benchmark/arch/isambard/macs/cray-host/11.0.4/include_arch.yml
new file mode 100644
index 00000000..6361347b
--- /dev/null
+++ b/benchmark/arch/isambard/macs/cray-host/11.0.4/include_arch.yml
@@ -0,0 +1,20 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/macs/cray-host/11.0.4"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        separator: ;
+        _: "'I_MPI_PIN=off mpiexec -n ' + ('${{ NPROC }}' if $mpi == 1 else '1') + ' bash -c \"OMP_PLACES={\\$(seq -s \\'},{\\' \\$((\\$MPI_LOCALRANKID*${{ NUMOMP }})) \\$(((\\$MPI_LOCALRANKID+1)*${{ NUMOMP }}-1)) )}'"
+      - {name: launch_cmd_end, _: "\""}
diff --git a/benchmark/arch/isambard/macs/intel/2021.4.0/include_arch.yml b/benchmark/arch/isambard/macs/intel/2021.4.0/include_arch.yml
new file mode 100644
index 00000000..9045f4ae
--- /dev/null
+++ b/benchmark/arch/isambard/macs/intel/2021.4.0/include_arch.yml
@@ -0,0 +1,20 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/macs/intel/2021.4.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        separator: ;
+        _: "'I_MPI_PIN=off mpirun -n ' + ('${{ NPROC }}' if $mpi == 1 else '1') + ' bash -c \"OMP_PLACES={\\$(seq -s \\'},{\\' \\$((\\$MPI_LOCALRANKID*${{ NUMOMP }})) \\$(((\\$MPI_LOCALRANKID+1)*${{ NUMOMP }}-1)) )}'"
+      - {name: launch_cmd_end, _: "\""}
diff --git a/benchmark/arch/isambard/p3/cray-host/13.0.0/include_arch.yml b/benchmark/arch/isambard/p3/cray-host/13.0.0/include_arch.yml
new file mode 100644
index 00000000..83302a45
--- /dev/null
+++ b/benchmark/arch/isambard/p3/cray-host/13.0.0/include_arch.yml
@@ -0,0 +1,19 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/p3/cray-host/13.0.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        _: "'mpiexec -n ' + ('${{ NPROC }}' if $mpi == 1 else '1') + ' --depth ${{ NUMOMP }} --cpu-bind depth'"
+      - {name: launch_cmd_end, _: ""}
diff --git a/benchmark/arch/isambard/p3/cray-host/13.0.0/include_run.yml b/benchmark/arch/isambard/p3/cray-host/13.0.0/include_run.yml
new file mode 100644
index 00000000..e296c7b7
--- /dev/null
+++ b/benchmark/arch/isambard/p3/cray-host/13.0.0/include_run.yml
@@ -0,0 +1,18 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    init_with: include/include_run.yml
+    parameter:
+      # Increase the default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 64, 'dp': 32}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 64, 'dp': 32}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
diff --git a/benchmark/arch/isambard/p3/intel/2021.4.0/include_arch.yml b/benchmark/arch/isambard/p3/intel/2021.4.0/include_arch.yml
new file mode 100644
index 00000000..b9cc15d5
--- /dev/null
+++ b/benchmark/arch/isambard/p3/intel/2021.4.0/include_arch.yml
@@ -0,0 +1,20 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/isambard/p3/intel/2021.4.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 64}
+
+      # MPI launch command to use
+      - name: launch_cmd
+        mode: python
+        separator: ;
+        _: "'I_MPI_PIN=off mpirun -n ' + ('${{ NPROC }}' if $mpi == 1 else '1') + ' bash -c \"OMP_PLACES={\\$(seq -s \\'},{\\' \\$((\\$MPI_LOCALRANKID*${{ NUMOMP }})) \\$(((\\$MPI_LOCALRANKID+1)*${{ NUMOMP }}-1)) )}'"
+      - {name: launch_cmd_end, _: "\""}
diff --git a/benchmark/arch/isambard/p3/intel/2021.4.0/include_run.yml b/benchmark/arch/isambard/p3/intel/2021.4.0/include_run.yml
new file mode 100644
index 00000000..e296c7b7
--- /dev/null
+++ b/benchmark/arch/isambard/p3/intel/2021.4.0/include_run.yml
@@ -0,0 +1,18 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    init_with: include/include_run.yml
+    parameter:
+      # Increase the default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 64, 'dp': 32}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 64, 'dp': 32}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
diff --git a/benchmark/arch/volta/nvhpc/22.3/include_arch.yml b/benchmark/arch/volta/nvhpc/22.3/include_arch.yml
new file mode 100644
index 00000000..e24cc355
--- /dev/null
+++ b/benchmark/arch/volta/nvhpc/22.3/include_arch.yml
@@ -0,0 +1,27 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    init_with: include/include_arch.yml
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/ecmwf/volta/nvhpc/22.3"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 2}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 8}
+
+      # Number of GPUs available on a node
+      - {name: gpus, type: int, _: 2}
+
+      # Set CUDA runtime heap size on GPU for SCC variant
+      - {name: PGI_ACC_CUDA_HEAPSIZE, export: true, _: 12G}
+
+      # MPI launch command to use (inject CUDA_VISIBLE_DEVICES)
+      - name: launch_cmd
+        mode: python
+        _: "'mpirun -n ${{ NPROC }} --cpus-per-proc ${{ NUMOMP }} bash -c \"CUDA_VISIBLE_DEVICES=\\${OMPI_COMM_WORLD_RANK}' if $mpi == 1 else ''"
+      - name: launch_cmd_end
+        mode: python
+        _: "'\"' if $mpi == 1 else ''"
diff --git a/benchmark/cloudsc.yml b/benchmark/cloudsc.yml
new file mode 100644
index 00000000..1c43d942
--- /dev/null
+++ b/benchmark/cloudsc.yml
@@ -0,0 +1,80 @@
+include-path:
+  - path: include
+
+# Global parameter set
+parameterset:
+- name: generic_set
+  init_with: include_parameterset.yml
+
+# Script templates
+fileset: !include include_fileset_substituteset.yml:["fileset"]
+substituteset: !include include_fileset_substituteset.yml:["substituteset"]
+
+benchmark:
+  ###############################################
+  - name: cpu
+    outpath: rundir_cpu
+    comment: Run CPU variants of CLOUDSC
+  ###############################################
+
+    # Benchmark parameters
+    parameterset:
+    - name: build_set
+      init_with: include_parameterset.yml
+    - name: arch_set
+      init_with: include_arch.yml
+    - name: run_set
+      init_with: include_run.yml
+      parameter:
+      # Available binaries
+      - name: target
+        mode: python
+        _: |
+          ','.join(
+            ['dwarf-cloudsc-fortran'] +
+            {'hdf5': [], 'serialbox': ['dwarf-cloudsc-c']}['$io_library']
+          )
+
+    # Benchmark execution steps
+    step: !include include_step.yml:["step"]
+
+    # Parse output and compile results table
+    patternset: !include include_patternset.yml:["patternset"]
+    analyser: !include include_analyser.yml:["analyser"]
+    result: !include include_result.yml:["result"]
+
+  ###############################################
+  - name: gpu
+    outpath: rundir_gpu
+    comment: Run GPU variants of CLOUDSC
+  ###############################################
+
+    # Benchmark parameters
+    parameterset:
+    - name: build_set
+      init_with: include_parameterset.yml
+      parameter:
+      - {name: other_flags, _: "--with-gpu"}
+    - name: arch_set
+      init_with: include_arch.yml
+    - name: run_set
+      init_with: include_run.yml
+      parameter:
+      # Available binaries
+      - name: target
+        _: 'dwarf-cloudsc-gpu-omp-scc-hoist,dwarf-cloudsc-gpu-scc,dwarf-cloudsc-gpu-scc-hoist'
+      # Number of MPI ranks to launch (to target multiple GPUs, if available)
+      - name: nproc
+        type: int
+        mode: python
+        _: "','.join(str(i+1) for i in range($gpus)) if $mpi == 1 else '1'"
+      # Always run without multi-threading
+      - {name: numomp, type: int, _: 1}
+
+    # Benchmark execution steps
+    step: !include include_step.yml:["step"]
+
+    # Parse output and compile results table
+    patternset: !include include_patternset.yml:["patternset"]
+    analyser: !include include_analyser.yml:["analyser"]
+    result: !include include_result.yml:["result"]
diff --git a/benchmark/include/include_analyser.yml b/benchmark/include/include_analyser.yml
new file mode 100644
index 00000000..bed5dd05
--- /dev/null
+++ b/benchmark/include/include_analyser.yml
@@ -0,0 +1,9 @@
+# Applies patterns to the stdout/stderr of the run step
+
+analyser:
+  - name: analyse_results
+    use: [results_pattern,timing_pattern]
+    reduce: true
+    analyse:
+      step: run
+      file: [stdout,stderr]
diff --git a/benchmark/include/include_arch.yml b/benchmark/include/include_arch.yml
new file mode 100644
index 00000000..f3c5a519
--- /dev/null
+++ b/benchmark/include/include_arch.yml
@@ -0,0 +1,25 @@
+parameterset:
+  # System architecture specification
+  - name: arch_set
+    parameter:
+      # Architecture definition to pass to bundle build command
+      - {name: arch, _: "arch/ecmwf/hpc2020/gnu/9.3.0"}  # Choose from arch
+
+      # Number of NUMA domains on a node (e.g., the number of sockets)
+      - {name: numa_domains, type: int, _: 1}
+
+      # Number of cores per NUMA domain (e.g., number of cores per CPU)
+      - {name: cores_per_numa_domain, type: int, _: 4}
+
+      # Number of GPUs available on a node
+      - {name: gpus, type: int, _: 0}
+
+      # Increase OpenMP stacksize for larger column counts
+      - {name: OMP_STACKSIZE, export: true, _: 8G}
+
+      # MPI launch command to use (in the form `$launch_cmd <exe> <args> $launch_cmd_end`)
+      - name: launch_cmd
+        mode: python
+        _: "'mpirun -np ${{ NPROC }}' if $mpi == 1 else ''"
+      - name: launch_cmd_end
+        _: ""
diff --git a/benchmark/include/include_fileset_substituteset.yml b/benchmark/include/include_fileset_substituteset.yml
new file mode 100644
index 00000000..2e83b54e
--- /dev/null
+++ b/benchmark/include/include_fileset_substituteset.yml
@@ -0,0 +1,29 @@
+fileset:
+  - name: build_template
+    copy: templates/build-template.sh
+
+  - name: run_template
+    copy: templates/run-template.sh
+
+substituteset:
+  - name: build_script
+    iofile: {in: build-template.sh, out: mybuild.sh}
+    sub:
+    - {source: "${{ CLOUDSC_HOME }}", dest: $cloudsc_home}
+    - {source: "${{ JUBE_WP_ABSPATH }}", dest: $jube_wp_abspath}
+    - {source: "${{ ARCH }}", dest: $arch}
+    - {source: "${{ PRECISION_FLAG }}", dest: $precision_flag}
+    - {source: "${{ IO_LIBRARY_FLAG }}", dest: $io_library_flag}
+    - {source: "${{ MPI_FLAG }}", dest: $mpi_flag}
+    - {source: "${{ OTHER_FLAGS }}", dest: $other_flags}
+
+  - name: run_script
+    iofile: {in: run-template.sh, out: run.sh}
+    sub:
+    - {source: "${{ LAUNCH_CMD }}", dest: $launch_cmd}
+    - {source: "${{ LAUNCH_CMD_END }}", dest: $launch_cmd_end}
+    - {source: "${{ NPROC }}", dest: $nproc}
+    - {source: "${{ TARGET }}", dest: $target}
+    - {source: "${{ NUMOMP }}", dest: $numomp}
+    - {source: "${{ NGPTOTG }}", dest: $ngptotg}
+    - {source: "${{ NPROMA }}", dest: $nproma}
diff --git a/benchmark/include/include_parameterset.yml b/benchmark/include/include_parameterset.yml
new file mode 100644
index 00000000..6fb4b57a
--- /dev/null
+++ b/benchmark/include/include_parameterset.yml
@@ -0,0 +1,41 @@
+# The parameterization of the different build and run configurations
+
+# JUBE will create the parameter space as the cartesian product of all
+# parameters. Individual parameters are overwritten in the main benchmark
+# definition files for benchmark configurations where defaults are not
+# applicable (e.g., there is no OpenMP-requirement in GPU variants).
+
+parameterset:
+  # Generic options
+  - name: generic_set
+    parameter:
+      # Path to CLOUDSC main directory
+      - {name: cloudsc_home, _: $jube_benchmark_home/..}
+
+  # The build options
+  - name: build_set
+    parameter:
+      # Build single (sp) or double (dp) precision (or both by choosing `dp,sp`)
+      - {name: precision, tag: "sp+dp" , _: "dp,sp"}
+      - {name: precision, tag: "!sp" , _: "dp"}
+      - {name: precision, tag: "sp+!dp", _: "sp"}
+      - name: precision_flag
+        mode: python
+        _: "{'sp': '--single-precision', 'dp': ''}['$precision']"
+
+      # Choose IO library for reading inputs and reference data (`hdf5` or `serialbox`)
+      - {name: io_library, tag: "!serialbox", _: "hdf5"}
+      - {name: io_library, tag: "serialbox", _: "serialbox"}
+      - name: io_library_flag
+        mode: python
+        _: "{'hdf5': '', 'serialbox': '--with-serialbox'}['$io_library']"
+
+      # Enable/disable MPI
+      - {name: mpi, tag: "!mpi", type: int, _: "0"}
+      - {name: mpi, tag: "mpi", type: int, _: "1"}
+      - name: mpi_flag
+        mode: python
+        _: "['', '--with-mpi'][$mpi]"
+
+      # Specify other options that are passed to the build command
+      - {name: other_flags, _: ""}
diff --git a/benchmark/include/include_patternset.yml b/benchmark/include/include_patternset.yml
new file mode 100644
index 00000000..e9994800
--- /dev/null
+++ b/benchmark/include/include_patternset.yml
@@ -0,0 +1,202 @@
+# The regex patterns to parse the output with
+patternset:
+  - name: results_pattern
+    pattern:
+      # All max abs err
+      - {name: abs_max_err, type: float, _: '(?:P[A-Z_]+|tendency_loc%\w+)\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: max_rel_err, type: float, _: '(?:P[A-Z_]+|tendency_loc%\w+)\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # All max rel err
+      # PLUDE
+      - {name: plude_min, type: float, _: 'PLUDE\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: plude_max, type: float, _: 'PLUDE\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: plude_abs_max_err, type: float, _: 'PLUDE\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: plude_avg_abs_err, type: float, _: 'PLUDE\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: plude_max_rel_err, type: float, _: 'PLUDE\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PCOVPTOT
+      - {name: pcovptot_min, type: float, _: 'PCOVPTOT\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pcovptot_max, type: float, _: 'PCOVPTOT\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pcovptot_abs_max_err, type: float, _: 'PCOVPTOT\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pcovptot_avg_abs_err, type: float, _: 'PCOVPTOT\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pcovptot_max_rel_err, type: float, _: 'PCOVPTOT\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PRAINFRAC_TOPRFZ
+      - {name: prainfrac_toprfz_min, type: float, _: 'PRAINFRAC_TOPRFZ\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: prainfrac_toprfz_max, type: float, _: 'PRAINFRAC_TOPRFZ\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: prainfrac_toprfz_abs_max_err, type: float, _: 'PRAINFRAC_TOPRFZ\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: prainfrac_toprfz_avg_abs_err, type: float, _: 'PRAINFRAC_TOPRFZ\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: prainfrac_toprfz_max_rel_err, type: float, _: 'PRAINFRAC_TOPRFZ\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFSQLF
+      - {name: pfsqlf_min, type: float, _: 'PFSQLF\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqlf_max, type: float, _: 'PFSQLF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqlf_abs_max_err, type: float, _: 'PFSQLF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqlf_avg_abs_err, type: float, _: 'PFSQLF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfsqlf_max_rel_err, type: float, _: 'PFSQLF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFSQIF
+      - {name: pfsqif_min, type: float, _: 'PFSQIF\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqif_max, type: float, _: 'PFSQIF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqif_abs_max_err, type: float, _: 'PFSQIF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqif_avg_abs_err, type: float, _: 'PFSQIF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfsqif_max_rel_err, type: float, _: 'PFSQIF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFCQLNG
+      - {name: pfcqlng_min, type: float, _: 'PFCQLNG\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqlng_max, type: float, _: 'PFCQLNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqlng_abs_max_err, type: float, _: 'PFCQLNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqlng_avg_abs_err, type: float, _: 'PFCQLNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfcqlng_max_rel_err, type: float, _: 'PFCQLNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFCQNNG
+      - {name: pfcqnng_min, type: float, _: 'PFCQNNG\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqnng_max, type: float, _: 'PFCQNNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqnng_abs_max_err, type: float, _: 'PFCQNNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqnng_avg_abs_err, type: float, _: 'PFCQNNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfcqnng_max_rel_err, type: float, _: 'PFCQNNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFSQRF
+      - {name: pfsqrf_min, type: float, _: 'PFSQRF\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqrf_max, type: float, _: 'PFSQRF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqrf_abs_max_err, type: float, _: 'PFSQRF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqrf_avg_abs_err, type: float, _: 'PFSQRF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfsqrf_max_rel_err, type: float, _: 'PFSQRF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFSQSF
+      - {name: pfsqsf_min, type: float, _: 'PFSQSF\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqsf_max, type: float, _: 'PFSQSF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqsf_abs_max_err, type: float, _: 'PFSQSF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqsf_avg_abs_err, type: float, _: 'PFSQSF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfsqsf_max_rel_err, type: float, _: 'PFSQSF\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFCQRNG
+      - {name: pfcqrng_min, type: float, _: 'PFCQRNG\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqrng_max, type: float, _: 'PFCQRNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqrng_abs_max_err, type: float, _: 'PFCQRNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqrng_avg_abs_err, type: float, _: 'PFCQRNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfcqrng_max_rel_err, type: float, _: 'PFCQRNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFCQSNG
+      - {name: pfcqsng_min, type: float, _: 'PFCQSNG\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqsng_max, type: float, _: 'PFCQSNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqsng_abs_max_err, type: float, _: 'PFCQSNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfcqsng_avg_abs_err, type: float, _: 'PFCQSNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfcqsng_max_rel_err, type: float, _: 'PFCQSNG\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFSQLTUR
+      - {name: pfsqltur_min, type: float, _: 'PFSQLTUR\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqltur_max, type: float, _: 'PFSQLTUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqltur_abs_max_err, type: float, _: 'PFSQLTUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqltur_avg_abs_err, type: float, _: 'PFSQLTUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfsqltur_max_rel_err, type: float, _: 'PFSQLTUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFSQITUR
+      - {name: pfsqitur_min, type: float, _: 'PFSQITUR\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqitur_max, type: float, _: 'PFSQITUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqitur_abs_max_err, type: float, _: 'PFSQITUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfsqitur_avg_abs_err, type: float, _: 'PFSQITUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfsqitur_max_rel_err, type: float, _: 'PFSQITUR\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFPLSL
+      - {name: pfplsl_min, type: float, _: 'PFPLSL\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfplsl_max, type: float, _: 'PFPLSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfplsl_abs_max_err, type: float, _: 'PFPLSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfplsl_avg_abs_err, type: float, _: 'PFPLSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfplsl_max_rel_err, type: float, _: 'PFPLSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFPLSN
+      - {name: pfplsn_min, type: float, _: 'PFPLSN\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfplsn_max, type: float, _: 'PFPLSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfplsn_abs_max_err, type: float, _: 'PFPLSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfplsn_avg_abs_err, type: float, _: 'PFPLSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfplsn_max_rel_err, type: float, _: 'PFPLSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFHPSL
+      - {name: pfhpsl_min, type: float, _: 'PFHPSL\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfhpsl_max, type: float, _: 'PFHPSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfhpsl_abs_max_err, type: float, _: 'PFHPSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfhpsl_avg_abs_err, type: float, _: 'PFHPSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfhpsl_max_rel_err, type: float, _: 'PFHPSL\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # PFHPSN
+      - {name: pfhpsn_min, type: float, _: 'PFHPSN\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfhpsn_max, type: float, _: 'PFHPSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfhpsn_abs_max_err, type: float, _: 'PFHPSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: pfhpsn_avg_abs_err, type: float, _: 'PFHPSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: pfhpsn_max_rel_err, type: float, _: 'PFHPSN\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # tendency_loc%a
+      - {name: tendency_loc_a_min, type: float, _: 'tendency_loc%a\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_a_max, type: float, _: 'tendency_loc%a\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_a_abs_max_err, type: float, _: 'tendency_loc%a\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_a_avg_abs_err, type: float, _: 'tendency_loc%a\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_a_max_rel_err, type: float, _: 'tendency_loc%a\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # tendency_loc%q
+      - {name: tendency_loc_q_min, type: float, _: 'tendency_loc%q\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_q_max, type: float, _: 'tendency_loc%q\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_q_abs_max_err, type: float, _: 'tendency_loc%q\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_q_avg_abs_err, type: float, _: 'tendency_loc%q\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_q_max_rel_err, type: float, _: 'tendency_loc%q\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # tendency_loc%T
+      - {name: tendency_loc_t_min, type: float, _: 'tendency_loc%T\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_t_max, type: float, _: 'tendency_loc%T\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_t_abs_max_err, type: float, _: 'tendency_loc%T\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_t_avg_abs_err, type: float, _: 'tendency_loc%T\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_t_max_rel_err, type: float, _: 'tendency_loc%T\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+      # tendency_loc%cld
+      - {name: tendency_loc_cld_min, type: float, _: 'tendency_loc%cld\s+\dD\d\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_cld_max, type: float, _: 'tendency_loc%cld\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_cld_abs_max_err, type: float, _: 'tendency_loc%cld\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_cld_avg_abs_err, type: float, _: 'tendency_loc%cld\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp\s+$jube_pat_nfp'}
+      - {name: tendency_loc_cld_max_rel_err, type: float, _: 'tendency_loc%cld\s+\dD\d\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_nfp\s+$jube_pat_fp'}
+
+      #             Variable Dim             MinValue             MaxValue            AbsMaxErr         AvgAbsErr/GP          MaxRelErr-%
+      #                PLUDE 2D1  0.0000000000000E+00  0.1026720108982E-03  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #             PCOVPTOT 2D1  0.0000000000000E+00  0.1000000000000E+01  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #     PRAINFRAC_TOPRFZ 1D1  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFSQLF 2D1 -0.2581756360133E-05  0.4080862802485E-05  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFSQIF 2D1 -0.2810873821405E-05  0.3887836598167E-05  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #              PFCQLNG 2D1 -0.3555564290808E-07  0.1413389467871E-08  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #              PFCQNNG 2D1 -0.5285014021662E-06  0.8788562329576E-08  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFSQRF 2D1 -0.2581756360133E-05  0.4080862802485E-05  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFSQSF 2D1 -0.2768876986308E-05  0.3860349895496E-05  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #              PFCQRNG 2D1 -0.3555564290808E-07  0.1413389467871E-08  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #              PFCQSNG 2D1 -0.5285014021662E-06  0.8788562329576E-08  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #             PFSQLTUR 2D1 -0.8115949343062E-06  0.2653456385227E-06  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #             PFSQITUR 2D1 -0.2651690301090E-05  0.3864567769232E-06  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFPLSL 2D1  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFPLSN 2D1  0.0000000000000E+00  0.2969958004108E-04  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFHPSL 2D1 -0.0000000000000E+00 -0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #               PFHPSN 2D1 -0.8418345962643E+02 -0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #       tendency_loc%a 2D1 -0.2777777777778E-03  0.2631309809623E-03  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #       tendency_loc%q 2D1 -0.1997382404851E-07  0.1495580073796E-07  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #       tendency_loc%T 2D1 -0.4132200394167E-04  0.5435278446361E-04  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+      #     tendency_loc%cld 3D1 -0.1222481036148E-07  0.9816546352324E-08  0.0000000000000E+00  0.0000000000000E+00  0.0000000000000E+00
+
+  - name: timing_pattern
+    pattern:
+      - {name: thr_time,   type: int, _: '(?:$jube_pat_nint\s+){6}:\s+$jube_pat_int\s+$jube_pat_nint\s+@\s+(?:rank#$jube_pat_nint:)?core#'} #$jube_pat_nint'}  # C-version doesn't print core number?
+      - {name: thr_mflops, type: int, _: '(?:$jube_pat_nint\s+){6}:\s+$jube_pat_nint\s+$jube_pat_int\s+@\s+(?:rank#$jube_pat_nint:)?core#'} #$jube_pat_nint'}
+      - {name: rnk_time,   type: int, _: '(?:$jube_pat_nint\s+){6}:\s+$jube_pat_int\s+$jube_pat_nint\s+:\s+TOTAL\s@\srank#$jube_pat_nint'}
+      - {name: rnk_mflops, type: int, _: '(?:$jube_pat_nint\s+){6}:\s+$jube_pat_nint\s+$jube_pat_int\s+:\s+TOTAL\s@\srank#$jube_pat_nint'}
+      - {name: tot_time,   type: int, _: '(?:$jube_pat_nint\s*x\s*)?(?:$jube_pat_nint\s+){6}:\s+$jube_pat_int\s+$jube_pat_nint\s+(?::\s+)?TOTAL(?!\s@)'}
+      - {name: tot_mflops, type: int, _: '(?:$jube_pat_nint\s*x\s*)?(?:$jube_pat_nint\s+){6}:\s+$jube_pat_nint\s+$jube_pat_int\s+(?::\s+)?TOTAL(?!\s@)'}
+      - {name: tot_nproc,  type: int, _: '$jube_pat_int\s*x\s*(?:$jube_pat_nint\s+){6}:\s+(?:$jube_pat_nint\s+){2}(?::\s+)?TOTAL(?!\s@)'}
+      - {name: tot_numomp, type: int, _: '(?:$jube_pat_nint\s*x\s*)?$jube_pat_int\s+(?:$jube_pat_nint\s+){5}:\s+(?:$jube_pat_nint\s+){2}(?::\s+)?TOTAL(?!\s@)'}
+      - {name: tot_ngptot, type: int, _: '(?:$jube_pat_nint\s*x\s*)?$jube_pat_nint\s+$jube_pat_int\s+(?:$jube_pat_nint\s+){4}:\s+(?:$jube_pat_nint\s+){2}(?::\s+)?TOTAL(?!\s@)'}
+      - {name: tot_ngpblks,type: int, _: '(?:$jube_pat_nint\s*x\s*)?(?:$jube_pat_nint\s+){3}$jube_pat_int\s+(?:$jube_pat_nint\s+){2}:\s+(?:$jube_pat_nint\s+){2}(?::\s+)?TOTAL(?!\s@)'}
+      - {name: tot_nproma, type: int, _: '(?:$jube_pat_nint\s*x\s*)?(?:$jube_pat_nint\s+){4}$jube_pat_int\s+$jube_pat_nint\s+:\s+(?:$jube_pat_nint\s+){2}(?::\s+)?TOTAL(?!\s@)'}
+
+      # NUMOMP    NGPTOT  #GP-cols     #BLKS    NPROMA tid# : Time(msec)  MFlops/s
+      #      8     16384      2048       128        16    0 :        295       866 @ core#22
+      #      8     16384      2048       128        16    1 :        284       899 @ core#4
+      #      8     16384      2048       128        16    2 :        282       905 @ core#16
+      #      8     16384      2048       128        16    3 :        239      1067 @ core#1
+      #      8     16384      2048       128        16    4 :        261       975 @ core#2
+      #      8     16384      2048       128        16    5 :        266       959 @ core#3
+      #      8     16384      2048       128        16    6 :        267       955 @ core#21
+      #      8     16384      2048       128        16    7 :        273       934 @ core#23
+      #      8     16384     16384      1024        16   -1 :        295      6931 : TOTAL
+
+      # NUMPROC=8, NUMOMP=1, NGPTOTG=16384, NPROMA=16, NGPBLKS=128
+      # NUMOMP    NGPTOT  #GP-cols     #BLKS    NPROMA tid# : Time(msec)  MFlops/s
+      #      1      2048      2048       128        16    0 :        237      1075 @ rank#0:core#20
+      #      1      2048      2048       128        16   -1 :        237      1075 : TOTAL @ rank#0
+      #      1      2048      2048       128        16    0 :        230      1109 @ rank#1:core#11
+      #      1      2048      2048       128        16   -1 :        230      1109 : TOTAL @ rank#1
+      #      1      2048      2048       128        16    0 :        281       906 @ rank#2:core#6
+      #      1      2048      2048       128        16   -1 :        281       906 : TOTAL @ rank#2
+      #      1      2048      2048       128        16    0 :        254      1002 @ rank#3:core#24
+      #      1      2048      2048       128        16   -1 :        254      1002 : TOTAL @ rank#3
+      #      1      2048      2048       128        16    0 :        271       940 @ rank#4:core#3
+      #      1      2048      2048       128        16   -1 :        271       940 : TOTAL @ rank#4
+      #      1      2048      2048       128        16    0 :        249      1025 @ rank#5:core#25
+      #      1      2048      2048       128        16   -1 :        249      1025 : TOTAL @ rank#5
+      #      1      2048      2048       128        16    0 :        235      1086 @ rank#6:core#1
+      #      1      2048      2048       128        16   -1 :        235      1086 : TOTAL @ rank#6
+      #      1      2048      2048       128        16    0 :        243      1050 @ rank#7:core#15
+      #      1      2048      2048       128        16   -1 :        243      1050 : TOTAL @ rank#7
+      #  8 x 1     16384     16384      1024        16   -1 :        281      8193 : TOTAL
diff --git a/benchmark/include/include_result.yml b/benchmark/include/include_result.yml
new file mode 100644
index 00000000..d55ee64c
--- /dev/null
+++ b/benchmark/include/include_result.yml
@@ -0,0 +1,35 @@
+# Create a table with analyser results
+
+result:
+  use: analyse_results
+  table:
+    - name: compact
+      style: pretty
+      sort: jube_wp_id
+      column:
+        - jube_wp_id
+        - target
+        - precision
+        - io_library
+        - mpi
+        - tot_nproc
+        - tot_numomp
+        - tot_ngptot
+        - tot_nproma
+        - tot_ngpblks
+        - tot_time
+        - tot_mflops
+        - thr_time_min
+        - thr_time_max
+        - thr_time_avg
+        - thr_mflops_min
+        - thr_mflops_max
+        - thr_mflops_avg
+        - rnk_time_min
+        - rnk_time_max
+        - rnk_time_avg
+        - rnk_mflops_min
+        - rnk_mflops_max
+        - rnk_mflops_avg
+        - abs_max_err_max
+        - max_rel_err_max
diff --git a/benchmark/include/include_run.yml b/benchmark/include/include_run.yml
new file mode 100644
index 00000000..c6975837
--- /dev/null
+++ b/benchmark/include/include_run.yml
@@ -0,0 +1,53 @@
+parameterset:
+  # The runtime options
+  - name: run_set
+    parameter:
+      # Number of MPI ranks to launch (or iterate over NUMA domains)
+      - name: nproc
+        type: int
+        mode: python
+        _: |  # Powers of two until total number of available cores
+          ','.join(
+            [
+              str(2**i)
+              for i in range(int($numa_domains*$cores_per_numa_domain).bit_length()-1)
+            ] + [str($numa_domains*$cores_per_numa_domain)]
+            if $mpi == 1 else [
+              str(i+1) for i in range($numa_domains)
+            ]
+          )
+
+      # Number of OpenMP threads to use (per rank)
+      - name: numomp
+        type: int
+        mode: python
+        _: "$numa_domains*$cores_per_numa_domain//$nproc"
+
+      # Number of horizontal columns
+      - {name: ngptotg, type: int, _: 163840}
+
+      # Default NPROMA value per variant
+      - name: default_nproma
+        mode: python
+        type: int
+        _: >
+          {
+            'dwarf-cloudsc-fortran': {'sp': 32, 'dp': 16}['$precision'],
+            'dwarf-cloudsc-c': {'sp': 32, 'dp': 16}['$precision'],
+            'dwarf-cloudsc-gpu-claw': 8192,
+            'dwarf-cloudsc-gpu-scc': 256,
+            'dwarf-cloudsc-gpu-scc-hoist': 128,
+            'dwarf-cloudsc-gpu-omp-scc-hoist': 128,
+          }['$target']
+
+      # Optionally sweep neighbouring NPROMA values
+      - name: nproma
+        tag: "sweep_nproma"
+        type: int
+        mode: python
+        _: "','.join(str(int($default_nproma * 2**i)) for i in range(-2, 3))"
+      - {name: nproma, tag: "!sweep_nproma", _: $default_nproma}
+
+      # Executable(s) to benchmark
+      - name: target
+        _: "dwarf-cloudsc-fortran"
diff --git a/benchmark/include/include_step.yml b/benchmark/include/include_step.yml
new file mode 100644
index 00000000..f52512e1
--- /dev/null
+++ b/benchmark/include/include_step.yml
@@ -0,0 +1,33 @@
+# The operations to carry out as part of the benchmark
+
+# Each step is executed for every item of the "used" parameter spaces
+
+step:
+  # Creating the bundle
+  - name: create
+    shared: shared
+    use: generic_set
+    work_dir: $cloudsc_home
+    do: ./cloudsc-bundle create
+
+  # Compiling the code
+  - name: build
+    depend: create
+    use:
+      - arch_set
+      - build_template
+      - build_script
+      - build_set
+    do: ./mybuild.sh
+
+  # Running the dwarf
+  - name: run
+    depend: build
+    use:
+      - run_template
+      - run_script
+      - arch_set
+      - build_set
+      - run_set
+    iterations: 3
+    do: ./run.sh
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
new file mode 100644
index 00000000..c6adbca5
--- /dev/null
+++ b/benchmark/requirements.txt
@@ -0,0 +1 @@
+http://apps.fz-juelich.de/jsc/jube/jube2/download.php?version=2.4.2
diff --git a/benchmark/templates/build-template.sh b/benchmark/templates/build-template.sh
new file mode 100755
index 00000000..28cda4b8
--- /dev/null
+++ b/benchmark/templates/build-template.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env -S bash -lxeo pipefail
+set -u
+
+cd ${{ CLOUDSC_HOME }}
+
+./cloudsc-bundle build --retry-verbose \
+    --build-dir=${{ JUBE_WP_ABSPATH }} \
+    --arch=${{ ARCH }} \
+    ${{ PRECISION_FLAG }} \
+    ${{ IO_LIBRARY_FLAG }} \
+    ${{ MPI_FLAG }} \
+    ${{ OTHER_FLAGS }}
diff --git a/benchmark/templates/run-template.sh b/benchmark/templates/run-template.sh
new file mode 100755
index 00000000..8c2bf7a7
--- /dev/null
+++ b/benchmark/templates/run-template.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env -S bash -lxeo pipefail
+set -u
+cd build
+
+source ./env.sh
+
+${{ LAUNCH_CMD }} bin/${{ TARGET }} ${{ NUMOMP }} ${{ NGPTOTG }} ${{ NPROMA }} ${{ LAUNCH_CMD_END }}
diff --git a/bundle.yml b/bundle.yml
index 3a2c02e1..0c9dcbb2 100644
--- a/bundle.yml
+++ b/bundle.yml
@@ -9,12 +9,12 @@ cmake : >
 projects :
 
     - ecbuild :
-        git     : https://github.com/ecmwf/ecbuild 
-        version : 3.6.4 
+        git     : https://github.com/ecmwf/ecbuild
+        version : 3.6.4
         bundle  : false
 
     - serialbox :
-        git     : ${BITBUCKET}/escape/serialbox
+        git     : https://github.com/reuterbal/serialbox
         version : v2.5.4/patched
         optional: true
         cmake   : >
@@ -22,12 +22,14 @@ projects :
             SERIALBOX_BUILD_SHARED=ON
             SERIALBOX_ENABLE_FORTRAN=ON
             SERIALBOX_ENABLE_EXPERIMENTAL_FILESYSTEM=OFF
+            SERIALBOX_ENABLE_SDB=OFF
+            SERIALBOX_EXAMPLES=OFF
             Serialbox_TARGETS_EXPORTED=TRUE
             CMAKE_CXX_EXTENSIONS=ON
 
     - loki :
-        git     : ${BITBUCKET}/rdx/loki
-        version : master
+        git     : https://github.com/ecmwf-ifs/loki
+        version : main
         optional: true
         require : ecbuild
         cmake   : >
@@ -51,11 +53,17 @@ options :
         cmake : ENABLE_SINGLE_PRECISION=ON
 
     - with-gpu :
-        help  : Enable GPU kernel (ACC kernels and CLAW-based)
+        help  : Enable GPU kernels
         cmake : >
-            ENABLE_CLOUDSC_GPU_CLAW=ON
             ENABLE_CLOUDSC_GPU_SCC=ON
             ENABLE_CLOUDSC_GPU_SCC_HOIST=ON
+            ENABLE_CLOUDSC_GPU_OMP_SCC_HOIST=ON
+
+    - with-cuda :
+        help  : Enable GPU kernel variant based on CUDA-Fortran
+        cmake : >
+            ENABLE_CUDA=ON
+            ENABLE_CLOUDSC_GPU_SCC_CUF=ON
 
     - with-mpi :
         help  : Enable MPI-parallel kernel
@@ -67,10 +75,18 @@ options :
             ENABLE_CLOUDSC_LOKI=ON
             LOKI_ENABLE_NO_INSTALL=OFF
 
+    - with-claw :
+        help  : Enable deprecated (and defunct) CLAW-generated variants
+        cmake : >
+            ENABLE_CLOUDSC_GPU_CLAW=ON
+            ENABLE_CLOUDSC_LOKI_CLAW=ON
+            LOKI_ENABLE_CLAW=ON
+
     - without-loki-install :
         help  : Skip installation of Loki (Requires Loki to be on the PATH)
         cmake : >
             LOKI_ENABLE_NO_INSTALL=ON
+            LOKI_ENABLE_CLAW=OFF
 
     - loki-frontend :
         help  : Frontend parser to use for Loki transformations
@@ -89,13 +105,17 @@ options :
         cmake : ENABLE_CLOUDSC_C={{value}}
 
     - cloudsc-gpu-claw :
-        help  : Build the optimized CLAW-based GPU version CLOUDSC  [ON|OFF]
+        help  : Build the deprecated CLAW-based GPU version CLOUDSC  [ON|OFF]
         cmake : ENABLE_CLOUDSC_GPU_CLAW={{value}}
 
     - cloudsc-loki :
         help  : Build the optimized Loki-based GPU version CLOUDSC  [ON|OFF]
         cmake : ENABLE_CLOUDSC_LOKI={{value}}
 
+    - cloudsc-loki-claw :
+        help  : Build the deprecated Loki+CLAW-based GPU version CLOUDSC  [ON|OFF]
+        cmake : ENABLE_CLOUDSC_LOKI_CLAW={{value}}
+
     - hdf5 :
         help : Enable use of HDF5 input file [ON|OFF]
         cmake : ENABLE_HDF5={{value}}
diff --git a/cmake/cloudsc_compile_options.cmake b/cmake/cloudsc_compile_options.cmake
new file mode 100644
index 00000000..7b22f3a3
--- /dev/null
+++ b/cmake/cloudsc_compile_options.cmake
@@ -0,0 +1,44 @@
+# (C) Copyright 2020- ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+
+
+if( CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC" )
+  ecbuild_add_fortran_flags("-Mlarge_arrays")
+
+  # should really be part of configuration, or ecbuild default?
+  ecbuild_add_fortran_flags("-traceback"      BUILD DEBUG )
+  ecbuild_add_fortran_flags("-fast"           BUILD RELEASE )
+  ecbuild_add_fortran_flags("-gopt -fast"     BUILD RELWITHDEBINFO )
+endif()
+
+
+
+macro( cloudsc_add_compile_options )
+  set( options  )
+  set( single_value_args FLAGS )
+  set( multi_value_args SOURCES )
+  cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}"  ${_FIRST_ARG} ${ARGN} )
+  if(_PAR_UNPARSED_ARGUMENTS)
+    ecbuild_critical("Unknown keywords given to cloudsc_add_compile_flags(): \"${_PAR_UNPARSED_ARGUMENTS}\"")
+  endif()
+  if(NOT _PAR_SOURCES)
+    ecbuild_critical("SOURCES keyword missing to cloudsc_add_compile_flags()")
+  endif()
+  if(NOT _PAR_FLAGS)
+    ecbuild_critical("FLAGS keyword missing to cloudsc_add_compile_flags()")
+  endif()
+  foreach( _file ${_PAR_SOURCES} )
+    ecbuild_warn("Adding custom compile flags for file ${_file} : [${_PAR_FLAGS}]")
+    if( NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${_file} )
+        ecbuild_error("${_file} does not exist")
+    endif()
+    set_source_files_properties( ${_file} PROPERTIES COMPILE_FLAGS "${_PAR_FLAGS}" )
+  endforeach()
+endmacro()
+
diff --git a/cmake/features/OMP.cmake b/cmake/features/OMP.cmake
new file mode 100644
index 00000000..7cd49da8
--- /dev/null
+++ b/cmake/features/OMP.cmake
@@ -0,0 +1,44 @@
+if( HAVE_OMP )
+
+    try_compile(
+        HAVE_OMP_TARGET_TEAMS_DISTRIBUTE
+        ${CMAKE_CURRENT_BINARY_DIR}
+        ${PROJECT_SOURCE_DIR}/cmake/features/OMP/test_omp_target_teams_distribute.F90
+        LINK_LIBRARIES OpenMP::OpenMP_Fortran
+        OUTPUT_VARIABLE _HAVE_OMP_TARGET_TEAMS_DISTRIBUTE_OUTPUT
+    )
+
+    ecbuild_debug_var( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE )
+    ecbuild_debug_var( _HAVE_OMP_TARGET_TEAMS_DISTRIBUTE_OUTPUT )
+
+    try_compile(
+        HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL
+        ${CMAKE_CURRENT_BINARY_DIR}
+        ${PROJECT_SOURCE_DIR}/cmake/features/OMP/test_omp_target_loop_construct_bind_parallel.F90
+        LINK_LIBRARIES OpenMP::OpenMP_Fortran
+        OUTPUT_VARIABLE _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL_OUTPUT
+    )
+
+    ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )
+    ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL_OUTPUT )
+
+    try_compile(
+        HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD
+        ${CMAKE_CURRENT_BINARY_DIR}
+        ${PROJECT_SOURCE_DIR}/cmake/features/OMP/test_omp_target_loop_construct_bind_thread.F90
+        LINK_LIBRARIES OpenMP::OpenMP_Fortran
+        OUTPUT_VARIABLE _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD_OUTPUT
+    )
+
+    ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
+    ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD_OUTPUT )
+
+    if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OR HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
+        set( HAVE_OMP_TARGET_LOOP_CONSTRUCT ON CACHE BOOL "OpenMP target teams loop is supported" )
+    else()
+        set( HAVE_OMP_TARGET_LOOP_CONSTRUCT OFF CACHE BOOL "OpenMP target teams loop is not supported" )
+    endif()
+
+    ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT )
+
+endif()
diff --git a/cmake/features/OMP/test_omp_target_loop_construct_bind_parallel.F90 b/cmake/features/OMP/test_omp_target_loop_construct_bind_parallel.F90
new file mode 100644
index 00000000..71354af9
--- /dev/null
+++ b/cmake/features/OMP/test_omp_target_loop_construct_bind_parallel.F90
@@ -0,0 +1,22 @@
+PROGRAM OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL
+USE iso_fortran_env
+IMPLICIT NONE
+
+INTEGER, PARAMETER :: NB = 10
+INTEGER, PARAMETER :: N = 10
+INTEGER :: I, J
+REAL(KIND=REAL32) :: TMP(N, NB)
+
+!$omp target data map(tofrom: TMP)
+
+!$omp target teams loop
+DO I=1,NB
+!$omp loop bind(parallel)
+    DO J=1,N
+        TMP(J, I) = REAL(J * I, KIND=REAL32)
+    ENDDO
+ENDDO
+
+!$omp end target data
+
+END PROGRAM OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL
diff --git a/cmake/features/OMP/test_omp_target_loop_construct_bind_thread.F90 b/cmake/features/OMP/test_omp_target_loop_construct_bind_thread.F90
new file mode 100644
index 00000000..1f12e6db
--- /dev/null
+++ b/cmake/features/OMP/test_omp_target_loop_construct_bind_thread.F90
@@ -0,0 +1,22 @@
+PROGRAM OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD
+USE iso_fortran_env
+IMPLICIT NONE
+
+INTEGER, PARAMETER :: NB = 10
+INTEGER, PARAMETER :: N = 10
+INTEGER :: I, J
+REAL(KIND=REAL32) :: TMP(N, NB)
+
+!$omp target data map(tofrom: TMP)
+
+!$omp target teams loop
+DO I=1,NB
+!$omp loop bind(thread)
+    DO J=1,N
+        TMP(J, I) = REAL(J * I, KIND=REAL32)
+    ENDDO
+ENDDO
+
+!$omp end target data
+
+END PROGRAM OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD
diff --git a/cmake/features/OMP/test_omp_target_teams_distribute.F90 b/cmake/features/OMP/test_omp_target_teams_distribute.F90
new file mode 100644
index 00000000..8587b1ea
--- /dev/null
+++ b/cmake/features/OMP/test_omp_target_teams_distribute.F90
@@ -0,0 +1,22 @@
+PROGRAM OMP_TARGET_LOOP_CONSTRUCT
+USE iso_fortran_env
+IMPLICIT NONE
+
+INTEGER, PARAMETER :: NB = 10
+INTEGER, PARAMETER :: N = 10
+INTEGER :: I, J
+REAL(KIND=REAL32) :: TMP(N, NB)
+
+!$omp target data map(tofrom: TMP)
+
+!$omp target teams distribute
+DO I=1,NB
+!$omp parallel do
+    DO J=1,N
+        TMP(J, I) = REAL(J * I, KIND=REAL32)
+    ENDDO
+ENDDO
+
+!$omp end target data
+
+END PROGRAM OMP_TARGET_LOOP_CONSTRUCT
diff --git a/src/cloudsc_c/CMakeLists.txt b/src/cloudsc_c/CMakeLists.txt
index 82bd8d66..6566a252 100644
--- a/src/cloudsc_c/CMakeLists.txt
+++ b/src/cloudsc_c/CMakeLists.txt
@@ -19,42 +19,55 @@ if( HAVE_CLOUDSC_C )
 
     ecbuild_add_library(
         TARGET dwarf-cloudsc-c-lib
-	INSTALL_HEADERS LISTED
+        INSTALL_HEADERS LISTED
         SOURCES
-	    cloudsc/yoecldp_c.h
-	    cloudsc/yoethf_c.h
-	    cloudsc/yomcst_c.h
-	    cloudsc/load_state.h
-	    cloudsc/load_state.c
-	    cloudsc/cloudsc_c.h
-	    cloudsc/cloudsc_c.c
-	    cloudsc/cloudsc_driver.h
-	    cloudsc/cloudsc_driver.c
-	    cloudsc/cloudsc_validate.h
-	    cloudsc/cloudsc_validate.c
-	    cloudsc/mycpu.h
-	    cloudsc/mycpu.c
+            cloudsc/yoecldp_c.h
+            cloudsc/yoethf_c.h
+            cloudsc/yomcst_c.h
+            cloudsc/load_state.h
+            cloudsc/load_state.c
+            cloudsc/cloudsc_c.h
+            cloudsc/cloudsc_c.c
+            cloudsc/cloudsc_driver.h
+            cloudsc/cloudsc_driver.c
+            cloudsc/cloudsc_validate.h
+            cloudsc/cloudsc_validate.c
+            cloudsc/mycpu.h
+            cloudsc/mycpu.c
+        PUBLIC_INCLUDES
+            $<INSTALL_INTERFACE:include>
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
+        PUBLIC_LIBS
+            Serialbox::Serialbox_C
+            $<${HAVE_OMP}:OpenMP::OpenMP_C>
     )
-    target_link_libraries( dwarf-cloudsc-c-lib PRIVATE Serialbox::Serialbox_C )
-    if( HAVE_OMP AND TARGET OpenMP::OpenMP_C )
-        target_link_libraries( dwarf-cloudsc-c-lib PRIVATE OpenMP::OpenMP_C )
-    endif()
-    target_include_directories( dwarf-cloudsc-c-lib PUBLIC $<INSTALL_INTERFACE:include> )
-    target_include_directories( dwarf-cloudsc-c-lib PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc> )
 
     ecbuild_add_executable(
         TARGET  dwarf-cloudsc-c
         SOURCES dwarf_cloudsc.c
-    )
-    target_link_libraries(
-        dwarf-cloudsc-c PRIVATE
-        dwarf-cloudsc-c-lib
+        LIBS dwarf-cloudsc-c-lib
     )
 
     # Create symlink for the input data
     execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
         ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )
 
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-c-serial
+        COMMAND bin/dwarf-cloudsc-c
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-c-omp
+        COMMAND bin/dwarf-cloudsc-c
+        ARGS 4 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 4
+        CONDITION HAVE_OMP
+    )
+
 else()
-    message(STATUS "Serialbox not found, disabling C prototype" )
+    ecbuild_info( "Serialbox not found, disabling C prototype" )
 endif()
diff --git a/src/cloudsc_c/cloudsc/cloudsc_c.c b/src/cloudsc_c/cloudsc/cloudsc_c.c
index ee5ba4cc..de4ad05f 100644
--- a/src/cloudsc_c/cloudsc/cloudsc_c.c
+++ b/src/cloudsc_c/cloudsc/cloudsc_c.c
@@ -334,7 +334,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
   // FOEDELTA (PTARE) = MAX (0.0_JPRB,SIGN(1.0_JPRB,PTARE-RTT))
   // REAL(KIND=JPRB) :: FOEALFA
   // FOEALFA (PTARE) = MIN(1.0_JPRB,((MAX(RTICE,MIN(RTWAT,PTARE))-RTICE)&
-  //  &*RTWAT_RTICE_R)**2) 
+  //  &*RTWAT_RTICE_R)**2)
   // REAL(KIND=JPRB) :: FOEEWM,FOEDEM,FOELDCPM
   // FOEEWM ( PTARE ) = R2ES *&
   //      &(FOEALFA(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))+&
@@ -343,11 +343,11 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
   //              &(1.0_JPRB-FOEALFA(PTARE))*R5ALSCP*(1.0_JPRB/(PTARE-R4IES)**2)
   // FOELDCPM ( PTARE ) = FOEALFA(PTARE)*RALVDCP+&
   //             &(1.0_JPRB-FOEALFA(PTARE))*RALSDCP
-  // REAL(KIND=JPRB) :: FOEELIQ, FOEEICE 
+  // REAL(KIND=JPRB) :: FOEELIQ, FOEEICE
   // FOEELIQ( PTARE ) = R2ES*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))
   // FOEEICE( PTARE ) = R2ES*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES))
   // #include "fccld.func.h"
-  // REAL(KIND=JPRB) :: FOKOOP 
+  // REAL(KIND=JPRB) :: FOKOOP
   // FOKOOP (PTARE) = MIN(RKOOP1-RKOOP2*PTARE,FOEELIQ(PTARE)/FOEEICE(PTARE))
   //===============================================================================
   //IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE)
@@ -402,7 +402,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
   iphase[3-1] = 1;
   iphase[2-1] = 2;
   iphase[4-1] = 2;    // ---------------------------------------------------
-  // Set up melting/freezing index, 
+  // Set up melting/freezing index,
   // if an ice category melts/freezes, where does it go?
   // ---------------------------------------------------
   imelt[5-1] = -99;
@@ -457,7 +457,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
   //             1.  *** INITIAL VALUES FOR VARIABLES ***
   //######################################################################
   // ----------------------
-  // non CLV initialization 
+  // non CLV initialization
   // ----------------------
   for (jk=1; jk<=klev; jk+=1) {
     for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -584,25 +584,25 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     for (jl=kidia; jl<=kfdia; jl+=1) {
       //----------------------------------------
       // old *diagnostic* mixed phase saturation
-      //---------------------------------------- 
+      //----------------------------------------
       zfoealfa[jk-1][jl-1] = (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk-1][jl-1])) - rtice)*rtwat_rtice_r, 2)));
       zfoeewmt[jk-1][jl-1] = fmin((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk-1][jl-1])) - rtice)*rtwat_rtice_r, 2)))*exp((r3les*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4les)) + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk-1][jl-1])) - rtice)*rtwat_rtice_r, 2))))*exp((r3ies*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4ies))))/pap[jk-1][jl-1], 0.5);
       zqsmix[jk-1][jl-1] = zfoeewmt[jk-1][jl-1];
       zqsmix[jk-1][jl-1] = zqsmix[jk-1][jl-1]/(1.0 - retv*zqsmix[jk-1][jl-1]);        //---------------------------------------------
       // ice saturation T<273K
-      // liquid water saturation for T>273K 
+      // liquid water saturation for T>273K
       //---------------------------------------------
       zalfa = (double)(fmax(0.0, copysign(1.0, ztp1[jk-1][jl-1] - rtt)));
       zfoeew[jk-1][jl-1] = fmin((zalfa*(double)(r2es*exp((r3les*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4les))) + (1.0 - zalfa)*(double)(r2es*exp((r3ies*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4ies))))/pap[jk-1][jl-1], 0.5);
       zfoeew[jk-1][jl-1] = fmin(0.5, zfoeew[jk-1][jl-1]);
       zqsice[jk-1][jl-1] = zfoeew[jk-1][jl-1]/(1.0 - retv*zfoeew[jk-1][jl-1]);        //----------------------------------
       // liquid water saturation
-      //---------------------------------- 
+      //----------------------------------
       zfoeeliqt[jk-1][jl-1] = fmin((double)(r2es*exp((r3les*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4les)))/pap[jk-1][jl-1], 0.5);
       zqsliq[jk-1][jl-1] = zfoeeliqt[jk-1][jl-1];
       zqsliq[jk-1][jl-1] = zqsliq[jk-1][jl-1]/(1.0 - retv*zqsliq[jk-1][jl-1]);        //      //----------------------------------
       //      // ice water saturation
-      //      //---------------------------------- 
+      //      //----------------------------------
       //      ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB)
       //      ZQSICE(JL,JK)=ZFOEEICET(JL,JK)
       //      ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK))
@@ -642,11 +642,11 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
   //----------------------------------------------------------------------
   // This is test code. Instead of resetting cloud water or cloud cover
   // to zero if one of the two is zero, here we slave the cloud cover
-  // to the cloud water variable. I.e. if cloud cover is zero it is 
+  // to the cloud water variable. I.e. if cloud cover is zero it is
   // set to an appropriate non-zero value.
   // It uses a Beta curve to get the variance and then derive cloud cover.
-  // It is quite slow since it involves iteration, and should be left 
-  // until a fully prognostic variance equation for total water is 
+  // It is quite slow since it involves iteration, and should be left
+  // until a fully prognostic variance equation for total water is
   // implemented
   //-----------------------------------------------------------------------
   //IF (.FALSE.) THEN
@@ -655,24 +655,24 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
   // & ( KIDIA, KFDIA, KLON  , KLEV  , 1    , KLEV, &
   // &   ZTP1, ZQP1, ZQSMIX, ZLI, PAP, &
   //---output
-  // &   ZVAR, ZQTMIN, ZQTMAX ) //last two are dummy args  
+  // &   ZVAR, ZQTMIN, ZQTMAX ) //last two are dummy args
   //  CALL COVER &
   //---input
   // & ( KIDIA, KFDIA , KLON, KLEV, 1, KLEV, &
   // &   ZA, ZTP1,  ZQP1, ZQSMIX, ZLI, PAP, ZVAR, &
   //---output
-  // &   ZQTMAX, ZABETA )  
+  // &   ZQTMAX, ZABETA )
   //  DO JK=1,KLEV
   //    DO JL=KIDIA,KFDIA
   //      IF (ZLI(JL,JK)/MAX(ZA(JL,JK),ZEPSEC)>RCLDMAX) THEN
-  //        ZA(JL,JK)=ZABETA(JL,JK) // not part of tendency       
+  //        ZA(JL,JK)=ZABETA(JL,JK) // not part of tendency
   //      ENDIF
   //    ENDDO
   //  ENDDO
   //ENDIF
   //--------------------------------------
   // NPM
-  // Initialize liq water temperature T_L 
+  // Initialize liq water temperature T_L
   // Not used at present
   //--------------------------------------
   //ZTL(:,:)=ZTP1(:,:)
@@ -759,17 +759,17 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       zlcond2[jl-1] = 0.0;
       zsupsat[jl-1] = 0.0;
       zlevapl[jl-1] = 0.0;
-      zlevapi[jl-1] = 0.0;        //-------------------------------------                
-      // solvers for cloud fraction                          
-      //-------------------------------------                
+      zlevapi[jl-1] = 0.0;        //-------------------------------------
+      // solvers for cloud fraction
+      //-------------------------------------
       zsolab[jl-1] = 0.0;
       zsolac[jl-1] = 0.0;
       zicetot[jl-1] = 0.0;
     }
 
-    //------------------------------------------           
-    // reset matrix so missing pathways are set            
-    //------------------------------------------           
+    //------------------------------------------
+    // reset matrix so missing pathways are set
+    //------------------------------------------
     for (jm=1; jm<=5; jm+=1) {
       for (jn=1; jn<=5; jn+=1) {
         for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -781,9 +781,9 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
     }
 
-    //----------------------------------                   
-    // reset new microphysics variables                    
-    //----------------------------------                   
+    //----------------------------------
+    // reset new microphysics variables
+    //----------------------------------
     for (jm=1; jm<=5; jm+=1) {
       for (jl=kidia; jl<=kfdia; jl+=1) {
         zfallsrce[jm-1][jl-1] = 0.0;
@@ -861,15 +861,15 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //---------------------------------------------------------------------
     //  3.1  ICE SUPERSATURATION ADJUSTMENT
     //---------------------------------------------------------------------
-    // Note that the supersaturation adjustment is made with respect to 
-    // liquid saturation:  when T>0C 
+    // Note that the supersaturation adjustment is made with respect to
+    // liquid saturation:  when T>0C
     // ice saturation:     when T<0C
-    //                     with an adjustment made to allow for ice 
+    //                     with an adjustment made to allow for ice
     //                     supersaturation in the clear sky
     // Note also that the KOOP factor automatically clips the supersaturation
     // to a maximum set by the liquid water saturation mixing ratio
     // important for temperatures near to but below 0C
-    //----------------------------------------------------------------------- 
+    //-----------------------------------------------------------------------
     //DIR$ NOFUSION
     for (jl=kidia; jl<=kfdia; jl+=1) {
       //-----------------------------------
@@ -890,7 +890,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       }
 
       //-------------------------------------------------------------------
-      // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT 
+      // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT
       //       correction factor
       // [#Note: QSICE or QSLIQ]
       //-------------------------------------------------------------------
@@ -913,14 +913,14 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       {
         if (ztp1[jk-1][jl-1] > yrecldp->rthomo)
         {
-          // Turn supersaturation into liquid water        
+          // Turn supersaturation into liquid water
           zsolqa[5-1][1-1][jl-1] = zsolqa[5-1][1-1][jl-1] + zsupsat[jl-1];
           zsolqa[1-1][5-1][jl-1] = zsolqa[1-1][5-1][jl-1] - zsupsat[jl-1];            // Include liquid in first guess
           zqxfg[1-1][jl-1] = zqxfg[1-1][jl-1] + zsupsat[jl-1];
         } else {
-          // Turn supersaturation into ice water        
+          // Turn supersaturation into ice water
           zsolqa[5-1][2-1][jl-1] = zsolqa[5-1][2-1][jl-1] + zsupsat[jl-1];
-          zsolqa[2-1][5-1][jl-1] = zsolqa[2-1][5-1][jl-1] - zsupsat[jl-1];            // Add ice to first guess for deposition term 
+          zsolqa[2-1][5-1][jl-1] = zsolqa[2-1][5-1][jl-1] - zsupsat[jl-1];            // Add ice to first guess for deposition term
           zqxfg[2-1][jl-1] = zqxfg[2-1][jl-1] + zsupsat[jl-1];
         }
 
@@ -938,12 +938,12 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	{
 	  // Turn supersaturation into liquid water
 	  zsolqa[1-1][1-1][jl-1] = zsolqa[1-1][1-1][jl-1] + psupsat[jk-1][jl-1];
-	  zpsupsatsrce[1-1][jl-1] = psupsat[jk-1][jl-1];                // Add liquid to first guess for deposition term 
+	  zpsupsatsrce[1-1][jl-1] = psupsat[jk-1][jl-1];                // Add liquid to first guess for deposition term
 	  zqxfg[1-1][jl-1] = zqxfg[1-1][jl-1] + psupsat[jk-1][jl-1];                // Store cloud budget diagnostics if required
 	} else {
 	  // Turn supersaturation into ice water
 	  zsolqa[2-1][2-1][jl-1] = zsolqa[2-1][2-1][jl-1] + psupsat[jk-1][jl-1];
-	  zpsupsatsrce[2-1][jl-1] = psupsat[jk-1][jl-1];                // Add ice to first guess for deposition term 
+	  zpsupsatsrce[2-1][jl-1] = psupsat[jk-1][jl-1];                // Add ice to first guess for deposition term
 	  zqxfg[2-1][jl-1] = zqxfg[2-1][jl-1] + psupsat[jk-1][jl-1];                // Store cloud budget diagnostics if required
 	}
 
@@ -957,11 +957,11 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //  3.2  DETRAINMENT FROM CONVECTION
     //---------------------------------------------------------------------
     // * Diagnostic T-ice/liq split retained for convection
-    //    Note: This link is now flexible and a future convection 
+    //    Note: This link is now flexible and a future convection
     //    scheme can detrain explicit seperate budgets of:
     //    cloud water, ice, rain and snow
-    // * There is no (1-ZA) multiplier term on the cloud detrainment 
-    //    term, since is now written in mass-flux terms  
+    // * There is no (1-ZA) multiplier term on the cloud detrainment
+    //    term, since is now written in mass-flux terms
     // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA]
     //---------------------------------------------------------------------
     if (jk >= yrecldp->ncldtop && jk < klev)
@@ -996,7 +996,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //---------------------------------------------------------------------
     //-----------------------------------------------
     // Subsidence source from layer above
-    //               and 
+    //               and
     // Evaporation of cloud within the layer
     //-----------------------------------------------
     if (jk > yrecldp->ncldtop)
@@ -1018,9 +1018,9 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
       }
 
-      // Now have to work out how much liquid evaporates at arrival point 
-      // since there is no prognostic memory for in-cloud humidity, i.e. 
-      // we always assume cloud is saturated. 
+      // Now have to work out how much liquid evaporates at arrival point
+      // since there is no prognostic memory for in-cloud humidity, i.e.
+      // we always assume cloud is saturated.
       for (jl=kidia; jl<=kfdia; jl+=1) {
 	zdtdp = ((zrdcp*0.5)*(ztp1[jk-1-1][jl-1] + ztp1[jk-1][jl-1]))/paph[jk-1][jl-1];
 	zdtforc = zdtdp*(pap[jk-1][jl-1] - pap[jk-1-1][jl-1]);            //[#Note: Diagnostic mixed phase should be replaced below]
@@ -1058,7 +1058,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     }
 
     //---------------------------------------------------------------------
-    // Subsidence sink of cloud to the layer below 
+    // Subsidence sink of cloud to the layer below
     // (Implicit - re. CFL limit on convective mass flux)
     //---------------------------------------------------------------------
     for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -1077,7 +1077,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //----------------------------------------------------------------------
     // 3.4  EROSION OF CLOUDS BY TURBULENT MIXING
     //----------------------------------------------------------------------
-    // NOTE: In default tiedtke scheme this process decreases the cloud 
+    // NOTE: In default tiedtke scheme this process decreases the cloud
     //       area but leaves the specific cloud water content
     //       within clouds unchanged
     //----------------------------------------------------------------------
@@ -1095,13 +1095,13 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
     // At the moment, works on mixed RH profile and partitioned ice/liq fraction
     // so that it is similar to previous scheme
-    // Should apply RHw for liquid cloud and RHi for ice cloud separately 
+    // Should apply RHw for liquid cloud and RHi for ice cloud separately
     for (jl=kidia; jl<=kfdia; jl+=1) {
       if (zli[jk-1][jl-1] > zepsec)
       {
 	// Calculate environmental humidity
 	//      ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/&
-	//    &      MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK))  
+	//    &      MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK))
 	//      ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB)
 	ze = zldifdt[jl-1]*fmax(zqsmix[jk-1][jl-1] - zqx[5-1][jk-1][jl-1], 0.0);
 	zleros = za[jk-1][jl-1]*ze;
@@ -1124,15 +1124,15 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //  Note: For the separate prognostic Qi and Ql, one would ideally use
     //  Qsat/DT wrt liquid/Koop here, since the physics is that new clouds
     //  forms by liquid droplets [liq] or when aqueous aerosols [Koop] form.
-    //  These would then instantaneous freeze if T<-38C or lead to ice growth 
-    //  by deposition in warmer mixed phase clouds.  However, since we do 
-    //  not have a separate prognostic equation for in-cloud humidity or a 
-    //  statistical scheme approach in place, the depositional growth of ice 
-    //  in the mixed phase can not be modelled and we resort to supersaturation  
-    //  wrt ice instanteously converting to ice over one timestep 
+    //  These would then instantaneous freeze if T<-38C or lead to ice growth
+    //  by deposition in warmer mixed phase clouds.  However, since we do
+    //  not have a separate prognostic equation for in-cloud humidity or a
+    //  statistical scheme approach in place, the depositional growth of ice
+    //  in the mixed phase can not be modelled and we resort to supersaturation
+    //  wrt ice instanteously converting to ice over one timestep
     //  (see Tompkins et al. QJRMS 2007 for details)
-    //  Thus for the initial implementation the diagnostic mixed phase is 
-    //  retained for the moment, and the level of approximation noted.  
+    //  Thus for the initial implementation the diagnostic mixed phase is
+    //  retained for the moment, and the level of approximation noted.
     //----------------------------------------------------------------------
     for (jl=kidia; jl<=kfdia; jl+=1) {
       zdtdp = (zrdcp*ztp1[jk-1][jl-1])/pap[jk-1][jl-1];
@@ -1231,7 +1231,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
         //-------------------------------------------------------------------------
         // All increase goes into liquid unless so cold cloud homogeneously freezes
-        // Include new liquid formation in first guess value, otherwise liquid 
+        // Include new liquid formation in first guess value, otherwise liquid
         // remains at cold temperatures until next timestep.
         //-------------------------------------------------------------------------
         if (ztp1[jk-1][jl-1] > yrecldp->rthomo)
@@ -1271,7 +1271,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
         //      ENDIF
         //---------------------------
         // Supersaturation options
-        //---------------------------      
+        //---------------------------
         if (yrecldp->nssopt == 0)
         {
           // No scheme
@@ -1280,7 +1280,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
         } else {
           if (yrecldp->nssopt == 1)
           {
-            // Tompkins 
+            // Tompkins
             zqe = (zqx[5-1][jk-1][jl-1] - za[jk-1][jl-1]*zqsice[jk-1][jl-1])*1.0/fmax(zepsec, 1.0 - za[jk-1][jl-1]);
             zqe = fmax(0.0, zqe);
           } else {
@@ -1312,7 +1312,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
         if (zqe >= zqsice[jk-1][jl-1]*zfac*zrhc && zqe < zqsice[jk-1][jl-1]*zfac)
         {
-          // note: not **2 on 1-a term if ZQE is used. 
+          // note: not **2 on 1-a term if ZQE is used.
           // Added correction term ZFAC to numerator 15/03/2010
           zacond = -((1.0 - za[jk-1][jl-1])*zfac)*zdqs[jl-1]*1.0/fmax(2.0*(zfac*zqsice[jk-1][jl-1] - zqe), zepsec);
           zacond = fmin(zacond, 1.0 - za[jk-1][jl-1]);            // Linear term:
@@ -1342,7 +1342,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
           zsolac[jl-1] = zsolac[jl-1] + zacond;            // Store cloud fraction diagnostic if required
           //------------------------------------------------------------------------
           // All increase goes into liquid unless so cold cloud homogeneously freezes
-          // Include new liquid formation in first guess value, otherwise liquid 
+          // Include new liquid formation in first guess value, otherwise liquid
           // remains at cold temperatures until next timestep.
           //------------------------------------------------------------------------
           if (ztp1[jk-1][jl-1] > yrecldp->rthomo)
@@ -1363,14 +1363,14 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     }
 
     //----------------------------------------------------------------------
-    // 3.7 Growth of ice by vapour deposition 
+    // 3.7 Growth of ice by vapour deposition
     //----------------------------------------------------------------------
     // Following Rotstayn et al. 2001:
     // does not use the ice nuclei number from cloudaer.F90
-    // but rather a simple Meyers et al. 1992 form based on the 
-    // supersaturation and assuming clouds are saturated with 
+    // but rather a simple Meyers et al. 1992 form based on the
+    // supersaturation and assuming clouds are saturated with
     // respect to liquid water (well mixed), (or Koop adjustment)
-    // Growth considered as sink of liquid water if present so 
+    // Growth considered as sink of liquid water if present so
     // Bergeron-Findeisen adjustment in autoconversion term no longer needed
     //----------------------------------------------------------------------
     //--------------------------------------------------------
@@ -1383,7 +1383,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     {
       for (jl=kidia; jl<=kfdia; jl+=1) {
 	//--------------------------------------------------------------
-	// Calculate distance from cloud top 
+	// Calculate distance from cloud top
 	// defined by cloudy layer below a layer with cloud frac <0.01
 	// ZDZ = ZDP(JL)/(ZRHO(JL)*RG)
 	//--------------------------------------------------------------
@@ -1395,8 +1395,8 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	}
 
 	//--------------------------------------------------------------
-	// only treat depositional growth if liquid present. due to fact 
-	// that can not model ice growth from vapour without additional 
+	// only treat depositional growth if liquid present. due to fact
+	// that can not model ice growth from vapour without additional
 	// in-cloud water vapour variable
 	//--------------------------------------------------------------
 	if (zqxfg[1-1][jl-1] > yrecldp->rlmin && ztp1[jk-1][jl-1] < rtt)
@@ -1417,26 +1417,26 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  //------------------
 	  zinew = pow((0.666*zcvds)*ptsphy + pow(zice0, 0.666), 1.5);              //---------------------------
 	  // grid-mean deposition rate:
-	  //--------------------------- 
+	  //---------------------------
 	  zdepos = fmax(za[jk-1][jl-1]*(zinew - zice0), 0.0);              //--------------------------------------------------------------------
 	  // Limit deposition to liquid water amount
-	  // If liquid is all frozen, ice would use up reservoir of water 
-	  // vapour in excess of ice saturation mixing ratio - However this 
-	  // can not be represented without a in-cloud humidity variable. Using 
-	  // the grid-mean humidity would imply a large artificial horizontal 
-	  // flux from the clear sky to the cloudy area. We thus rely on the 
+	  // If liquid is all frozen, ice would use up reservoir of water
+	  // vapour in excess of ice saturation mixing ratio - However this
+	  // can not be represented without a in-cloud humidity variable. Using
+	  // the grid-mean humidity would imply a large artificial horizontal
+	  // flux from the clear sky to the cloudy area. We thus rely on the
 	  // supersaturation check to clean up any remaining supersaturation
 	  //--------------------------------------------------------------------
 	  zdepos = fmin(zdepos, zqxfg[1-1][jl-1]);              //--------------------------------------------------------------------
 	  // At top of cloud, reduce deposition rate near cloud top to account for
-	  // small scale turbulent processes, limited ice nucleation and ice fallout 
+	  // small scale turbulent processes, limited ice nucleation and ice fallout
 	  //--------------------------------------------------------------------
 	  //      ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB)
 	  // Change to include dependence on ice nuclei concentration
-	  // to increase deposition rate with decreasing temperatures 
+	  // to increase deposition rate with decreasing temperatures
 	  zinfactor = fmin(zicenuclei[jl-1]/15000.0, 1.0);
 	  zdepos = zdepos*fmin(zinfactor + (1.0 - zinfactor)*(yrecldp->rdepliqrefrate + zcldtopdist[jl-1]/yrecldp->rdepliqrefdepth), 1.0);              //--------------
-	  // add to matrix 
+	  // add to matrix
 	  //--------------
 	  zsolqa[1-1][2-1][jl-1] = zsolqa[1-1][2-1][jl-1] + zdepos;
 	  zsolqa[2-1][1-1][jl-1] = zsolqa[2-1][1-1][jl-1] - zdepos;
@@ -1456,7 +1456,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       {
 	for (jl=kidia; jl<=kfdia; jl+=1) {
 	  //--------------------------------------------------------------
-	  // Calculate distance from cloud top 
+	  // Calculate distance from cloud top
 	  // defined by cloudy layer below a layer with cloud frac <0.01
 	  // ZDZ = ZDP(JL)/(ZRHO(JL)*RG)
 	  //--------------------------------------------------------------
@@ -1468,8 +1468,8 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  }
 
 	  //--------------------------------------------------------------
-	  // only treat depositional growth if liquid present. due to fact 
-	  // that can not model ice growth from vapour without additional 
+	  // only treat depositional growth if liquid present. due to fact
+	  // that can not model ice growth from vapour without additional
 	  // in-cloud water vapour variable
 	  //--------------------------------------------------------------
 	  if (zqxfg[1-1][jl-1] > yrecldp->rlmin && ztp1[jk-1][jl-1] < rtt)
@@ -1490,22 +1490,22 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	    zterm2 = (0.65*yrecldp->rcl_const6i)*pow(zpr02, yrecldp->rcl_const4i) + (((yrecldp->rcl_const3i*sqrt(zcorrfac))*sqrt(zrho[jl-1]))*pow(zpr02, yrecldp->rcl_const5i))/sqrt(zcorrfac2);
 	    zdepos = fmax(((za[jk-1][jl-1]*zterm1)*zterm2)*ptsphy, 0.0);                //--------------------------------------------------------------------
 	    // Limit deposition to liquid water amount
-	    // If liquid is all frozen, ice would use up reservoir of water 
-	    // vapour in excess of ice saturation mixing ratio - However this 
-	    // can not be represented without a in-cloud humidity variable. Using 
-	    // the grid-mean humidity would imply a large artificial horizontal 
-	    // flux from the clear sky to the cloudy area. We thus rely on the 
+	    // If liquid is all frozen, ice would use up reservoir of water
+	    // vapour in excess of ice saturation mixing ratio - However this
+	    // can not be represented without a in-cloud humidity variable. Using
+	    // the grid-mean humidity would imply a large artificial horizontal
+	    // flux from the clear sky to the cloudy area. We thus rely on the
 	    // supersaturation check to clean up any remaining supersaturation
 	    //--------------------------------------------------------------------
 	    zdepos = fmin(zdepos, zqxfg[1-1][jl-1]);                //--------------------------------------------------------------------
 	    // At top of cloud, reduce deposition rate near cloud top to account for
-	    // small scale turbulent processes, limited ice nucleation and ice fallout 
+	    // small scale turbulent processes, limited ice nucleation and ice fallout
 	    //--------------------------------------------------------------------
 	    // Change to include dependence on ice nuclei concentration
-	    // to increase deposition rate with decreasing temperatures 
+	    // to increase deposition rate with decreasing temperatures
 	    zinfactor = fmin(zicenuclei[jl-1]/15000.0, 1.0);
 	    zdepos = zdepos*fmin(zinfactor + (1.0 - zinfactor)*(yrecldp->rdepliqrefrate + zcldtopdist[jl-1]/yrecldp->rdepliqrefdepth), 1.0);                //--------------
-	    // add to matrix 
+	    // add to matrix
 	    //--------------
 	    zsolqa[1-1][2-1][jl-1] = zsolqa[1-1][2-1][jl-1] + zdepos;
 	    zsolqa[2-1][1-1][jl-1] = zsolqa[2-1][1-1][jl-1] - zdepos;
@@ -1543,7 +1543,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       {
 	for (jl=kidia; jl<=kfdia; jl+=1) {
 	  //------------------------
-	  // source from layer above 
+	  // source from layer above
 	  //------------------------
 	  if (jk > yrecldp->ncldtop)
 	  {
@@ -1556,11 +1556,11 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  //-------------------------------------------------
 	  // sink to next layer, constant fall speed
 	  //-------------------------------------------------
-	  // if aerosol effect then override 
+	  // if aerosol effect then override
 	  //  note that for T>233K this is the same as above.
 	  if (yrecldp->laericesed && jm == 2)
 	  {
-	    zre_ice = pre_ice[jk-1][jl-1];                // The exponent value is from 
+	    zre_ice = pre_ice[jk-1][jl-1];                // The exponent value is from
 	    // Morrison et al. JAS 2005 Appendix
 	    zvqx[2-1] = 0.002*pow(zre_ice, 1.0);
 	  }
@@ -1577,17 +1577,17 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
     //---------------------------------------------------------------
     // Precip cover overlap using MAX-RAN Overlap
-    // Since precipitation is now prognostic we must 
+    // Since precipitation is now prognostic we must
     //   1) apply an arbitrary minimum coverage (0.3) if precip>0
     //   2) abandon the 2-flux clr/cld treatment
     //   3) Thus, since we have no memory of the clear sky precip
-    //      fraction, we mimic the previous method by reducing 
-    //      ZCOVPTOT(JL), which has the memory, proportionally with 
-    //      the precip evaporation rate, taking cloud fraction 
+    //      fraction, we mimic the previous method by reducing
+    //      ZCOVPTOT(JL), which has the memory, proportionally with
+    //      the precip evaporation rate, taking cloud fraction
     //      into account
-    //   #3 above leads to much smoother vertical profiles of 
-    //   precipitation fraction than the Klein-Jakob scheme which 
-    //   monotonically increases precip fraction and then resets 
+    //   #3 above leads to much smoother vertical profiles of
+    //   precipitation fraction than the Klein-Jakob scheme which
+    //   monotonically increases precip fraction and then resets
     //   it to zero in a step function once clear-sky precip reaches
     //   zero.
     //---------------------------------------------------------------
@@ -1624,7 +1624,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  zzco = (ptsphy*yrecldp->rsnowlin1)*exp(yrecldp->rsnowlin2*(ztp1[jk-1][jl-1] - rtt));
 	  if (yrecldp->laericeauto)
 	  {
-	    zlcrit = picrit_aer[jk-1][jl-1];                // 0.3 = N**0.333 with N=0.027 
+	    zlcrit = picrit_aer[jk-1][jl-1];                // 0.3 = N**0.333 with N=0.027
 	    zzco = zzco*pow(yrecldp->rnice/pnice[jk-1][jl-1], 0.333);
 	  } else {
 	    zlcrit = yrecldp->rlcritsnow;
@@ -1653,10 +1653,10 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  zzco = yrecldp->rkconv*ptsphy;
 	  if (yrecldp->laerliqautolsp)
 	  {
-	    zlcrit = plcrit_aer[jk-1][jl-1];                // 0.3 = N**0.333 with N=125 cm-3 
+	    zlcrit = plcrit_aer[jk-1][jl-1];                // 0.3 = N**0.333 with N=125 cm-3
 	    zzco = zzco*pow(yrecldp->rccn/pccn[jk-1][jl-1], 0.333);
 	  } else {
-	    // Modify autoconversion threshold dependent on: 
+	    // Modify autoconversion threshold dependent on:
 	    //  land (polluted, high CCN, smaller droplets, higher threshold)
 	    //  sea  (clean, low CCN, larger droplets, lower threshold)
 	    if (plsm[jl-1] > 0.5)
@@ -1670,15 +1670,15 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
 	  //------------------------------------------------------------------
 	  // Parameters for cloud collection by rain and snow.
-	  // Note that with new prognostic variable it is now possible 
+	  // Note that with new prognostic variable it is now possible
 	  // to REPLACE this with an explicit collection parametrization
-	  //------------------------------------------------------------------   
+	  //------------------------------------------------------------------
 	  zprecip = (zpfplsx[4-1][jk-1][jl-1] + zpfplsx[3-1][jk-1][jl-1])*1.0/fmax(zepsec, zcovptot[jl-1]);
 	  zcfpr = 1.0 + yrecldp->rprc1*sqrt(fmax(zprecip, 0.0));              //      ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*&
 	  //       &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC))
 	  if (yrecldp->laerliqcoll)
 	  {
-	    // 5.0 = N**0.333 with N=125 cm-3 
+	    // 5.0 = N**0.333 with N=125 cm-3
 	    zcfpr = zcfpr*pow(yrecldp->rccn/pccn[jk-1][jl-1], 0.333);
 	  }
 
@@ -1770,10 +1770,8 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       for (jl=kidia; jl<=kfdia; jl+=1) {
 	if (ztp1[jk-1][jl-1] <= rtt && zliqcld[jl-1] > zepsec)
 	{
-	  // Fallspeed air density correction 
-	  // TODO: THIS IS A BUG! Due to a missing ``_JPRB`` in the original,
-	  // we need to cast the exponent down to single precision to re-create.
-	  zfallcorr = pow(yrecldp->rdensref/zrho[jl-1], (float)0.4);
+	  // Fallspeed air density correction
+	  zfallcorr = pow(yrecldp->rdensref/zrho[jl-1], 0.4);
 	  //------------------------------------------------------------------
 	  // Riming of snow by cloud water - implicit in lwc
 	  //------------------------------------------------------------------
@@ -1822,12 +1820,12 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       if (zicetot[jl-1] > zepsec && ztp1[jk-1][jl-1] > rtt)
       {
 	// Calculate subsaturation
-	zsubsat = fmax(zqsice[jk-1][jl-1] - zqx[5-1][jk-1][jl-1], 0.0);            // Calculate difference between dry-bulb (ZTP1) and the temperature 
+	zsubsat = fmax(zqsice[jk-1][jl-1] - zqx[5-1][jk-1][jl-1], 0.0);            // Calculate difference between dry-bulb (ZTP1) and the temperature
 	// at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx.
 	// Melting only occurs if the wet-bulb temperature >0
-	// i.e. warming of ice particle due to melting > cooling 
+	// i.e. warming of ice particle due to melting > cooling
 	// due to evaporation.
-	ztdmtw0 = ztp1[jk-1][jl-1] - rtt - zsubsat*(ztw1 + ztw2*(pap[jk-1][jl-1] - ztw3) - ztw4*(ztp1[jk-1][jl-1] - ztw5));            // Not implicit yet... 
+	ztdmtw0 = ztp1[jk-1][jl-1] - rtt - zsubsat*(ztw1 + ztw2*(pap[jk-1][jl-1] - ztw3) - ztw4*(ztp1[jk-1][jl-1] - ztw5));            // Not implicit yet...
 	// Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0
 	zcons1 = fabs((ptsphy*(1.0 + 0.5*ztdmtw0))/yrecldp->rtaumel);
 	zmeltmax[jl-1] = fmax((ztdmtw0*zcons1)*zrldcp, 0.0);
@@ -1843,7 +1841,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	for (jl=kidia; jl<=kfdia; jl+=1) {
 	  if (zicetot[jl-1] > zepsec && zmeltmax[jl-1] > zepsec)
 	  {
-	    // Apply melting in same proportion as frozen hydrometeor fractions 
+	    // Apply melting in same proportion as frozen hydrometeor fractions
 	    zalfa = zqxfg[jm-1][jl-1]/zicetot[jl-1];
 	    zmelt = fmin(zqxfg[jm-1][jl-1], zalfa*zmeltmax[jl-1]);                // needed in first guess
 	    // This implies that zqpretot has to be recalculated below
@@ -1890,7 +1888,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	    zfrz = ((ptsphy*(yrecldp->rcl_const5r/zrho[jl-1]))*(exp(ztemp) - 1.0))*pow(zlambda, yrecldp->rcl_const6r);
 	    zfrzmax[jl-1] = fmax(zfrz, 0.0);
 	  } else {
-	    // Majority of raindrops only partially melted 
+	    // Majority of raindrops only partially melted
 	    // Refreeze with a shorter timescale (reverse of melting...for now)
 	    zcons1 = fabs((ptsphy*(1.0 + 0.5*(rtt - ztp1[jk-1][jl-1])))/yrecldp->rtaumel);
 	    zfrzmax[jl-1] = fmax(((rtt - ztp1[jk-1][jl-1])*zcons1)*zrldcp, 0.0);
@@ -1910,10 +1908,10 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     }
 
     //----------------------------------------------------------------------
-    // 4.4c  FREEZING of LIQUID 
+    // 4.4c  FREEZING of LIQUID
     //----------------------------------------------------------------------
     for (jl=kidia; jl<=kfdia; jl+=1) {
-      // not implicit yet... 
+      // not implicit yet...
       zfrzmax[jl-1] = fmax((yrecldp->rthomo - ztp1[jk-1][jl-1])*zrldcp, 0.0);
     }
 
@@ -1960,7 +1958,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  // add evaporation term to explicit sink.
 	  // this has to be explicit since if treated in the implicit
 	  // term evaporation can not reduce rain to zero and model
-	  // produces small amounts of rainfall everywhere. 
+	  // produces small amounts of rainfall everywhere.
 	  //---------------------------------------------------------
 	  // Evaporate rain
 	  zevap = fmin(zdpevap, zqxfg[3-1][jl-1]);
@@ -1985,10 +1983,10 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       {
 	for (jl=kidia; jl<=kfdia; jl+=1) {
 	  //-----------------------------------------------------------------------
-	  // Calculate relative humidity limit for rain evaporation 
+	  // Calculate relative humidity limit for rain evaporation
 	  // to avoid cloud formation and saturation of the grid box
 	  //-----------------------------------------------------------------------
-	  // Limit RH for rain evaporation dependent on precipitation fraction 
+	  // Limit RH for rain evaporation dependent on precipitation fraction
 	  zzrh = yrecldp->rprecrhmax + ((1.0 - yrecldp->rprecrhmax)*zcovpmax[jl-1])*1.0/fmax(zepsec, 1.0 - za[jk-1][jl-1]);
 	  zzrh = fmin(fmax(zzrh, yrecldp->rprecrhmax), 1.0);              // Critical relative humidity
 	  //ZRHC=RAMID
@@ -2008,7 +2006,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	    // Abel and Boutle (2012) evaporation
 	    //-------------------------------------------
 	    // Calculate local precipitation (kg/kg)
-	    zpreclr = zqxfg[3-1][jl-1]/zcovptot[jl-1];                // Fallspeed air density correction 
+	    zpreclr = zqxfg[3-1][jl-1]/zcovptot[jl-1];                // Fallspeed air density correction
 	    zfallcorr = pow(yrecldp->rdensref/zrho[jl-1], 0.4);                // Saturation vapour pressure with respect to liquid phase
 	    zesatliq = (rv/rd)*(double)(r2es*exp((r3les*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4les)));                // Slope of particle size distribution
 	    zlambda = pow(yrecldp->rcl_fac1/((zrho[jl-1]*zpreclr)), yrecldp->rcl_fac2);
@@ -2022,7 +2020,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	    // Add evaporation term to explicit sink.
 	    // this has to be explicit since if treated in the implicit
 	    // term evaporation can not reduce rain to zero and model
-	    // produces small amounts of rainfall everywhere. 
+	    // produces small amounts of rainfall everywhere.
 	    //---------------------------------------------------------
 	    // Limit rain evaporation
 	    zevap = fmin(zdpevap, zqxfg[3-1][jl-1]);
@@ -2033,7 +2031,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	    // to mimic the previous scheme which had a diagnostic
 	    // 2-flux treatment, abandoned due to the new prognostic precip
 	    //-------------------------------------------------------------
-	    zcovptot[jl-1] = fmax(yrecldp->rcovpmin, zcovptot[jl-1] - fmax(0.0, ((zcovptot[jl-1] - za[jk-1][jl-1])*zevap)/zqxfg[3-1][jl-1]));                // Update fg field 
+	    zcovptot[jl-1] = fmax(yrecldp->rcovpmin, zcovptot[jl-1] - fmax(0.0, ((zcovptot[jl-1] - za[jk-1][jl-1])*zevap)/zqxfg[3-1][jl-1]));                // Update fg field
 	    zqxfg[3-1][jl-1] = zqxfg[3-1][jl-1] - zevap;
 	  }
 
@@ -2071,7 +2069,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	  // add evaporation term to explicit sink.
 	  // this has to be explicit since if treated in the implicit
 	  // term evaporation can not reduce snow to zero and model
-	  // produces small amounts of snowfall everywhere. 
+	  // produces small amounts of snowfall everywhere.
 	  //---------------------------------------------------------
 	  // Evaporate snow
 	  zevap = fmin(zdpevap, zqxfg[4-1][jl-1]);
@@ -2094,7 +2092,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       {
 	for (jl=kidia; jl<=kfdia; jl+=1) {
 	  //-----------------------------------------------------------------------
-	  // Calculate relative humidity limit for snow evaporation 
+	  // Calculate relative humidity limit for snow evaporation
 	  //-----------------------------------------------------------------------
 	  zzrh = yrecldp->rprecrhmax + ((1.0 - yrecldp->rprecrhmax)*zcovpmax[jl-1])*1.0/fmax(zepsec, 1.0 - za[jk-1][jl-1]);
 	  zzrh = fmin(fmax(zzrh, yrecldp->rprecrhmax), 1.0);
@@ -2108,8 +2106,8 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 	    // Calculate local precipitation (kg/kg)
 	    zpreclr = zqx[4-1][jk-1][jl-1]/zcovptot[jl-1];
 	    zvpice = ((double)(r2es*exp((r3ies*(ztp1[jk-1][jl-1] - rtt))/(ztp1[jk-1][jl-1] - r4ies)))*rv)/rd;                // Particle size distribution
-	    // ZTCG increases Ni with colder temperatures - essentially a 
-	    // Fletcher or Meyers scheme? 
+	    // ZTCG increases Ni with colder temperatures - essentially a
+	    // Fletcher or Meyers scheme?
 	    ztcg = 1.0;                // ZFACX1I modification is based on Andrew Barrett's results
 	    zfacx1s = 1.0;
 	    zaplusb = yrecldp->rcl_apb1*zvpice - yrecldp->rcl_apb2*zvpice*ztp1[jk-1][jl-1] + (pap[jk-1][jl-1]*yrecldp->rcl_apb3)*pow(ztp1[jk-1][jl-1], 3);
@@ -2185,7 +2183,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     // 5.2 solver for the microphysics
     //--------------------------------
     //--------------------------------------------------------------
-    // Truncate explicit sinks to avoid negatives 
+    // Truncate explicit sinks to avoid negatives
     // Note: Species are treated in the order in which they run out
     // since the clipping will alter the balance for the other vars
     //--------------------------------------------------------------
@@ -2229,7 +2227,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     }
 
     //--------------------------------------------
-    // scale the sink terms, in the correct order, 
+    // scale the sink terms, in the correct order,
     // recalculating the scale factor each time
     //--------------------------------------------
     for (jm=1; jm<=5; jm+=1) {
@@ -2291,7 +2289,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     // 5.2.2 Solver
     //------------------------
     //------------------------
-    // set the LHS of equation  
+    // set the LHS of equation
     //------------------------
     for (jm=1; jm<=5; jm+=1) {
       for (jn=1; jn<=5; jn+=1) {
@@ -2323,7 +2321,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     }
 
     //------------------------
-    // set the RHS of equation  
+    // set the RHS of equation
     //------------------------
     for (jm=1; jm<=5; jm+=1) {
       for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -2347,9 +2345,9 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //       assumes a good behaviour (i.e. non-zero diagonal
     //       terms with comparable orders) of the matrix stored
     //       in ZQLHS. For the moment this is the case but
-    //       be aware to preserve it when doing eventual 
+    //       be aware to preserve it when doing eventual
     //       modifications.
-    // Non pivoting recursive factorization 
+    // Non pivoting recursive factorization
     for (jn=1; jn<=4; jn+=1) {
       for (jm=jn + 1; jm<=5; jm+=1) {
 	for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -2367,8 +2365,8 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
 
     }
 
-    // Backsubstitution 
-    //  step 1 
+    // Backsubstitution
+    //  step 1
     for (jn=2; jn<=5; jn+=1) {
       for (jm=1; jm<=jn - 1; jm+=1) {
 	for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -2453,7 +2451,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
     //              6  *** UPDATE TENDANCIES ***
     //######################################################################
     //--------------------------------
-    // 6.1 Temperature and CLV budgets 
+    // 6.1 Temperature and CLV budgets
     //--------------------------------
     for (jm=1; jm<=4; jm+=1) {
       for (jl=kidia; jl<=kfdia; jl+=1) {
@@ -2478,7 +2476,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       }
 
       //----------------------------------------------------------------------
-      // New prognostic tendencies - ice,liquid rain,snow 
+      // New prognostic tendencies - ice,liquid rain,snow
       // Note: CLV arrays use PCLV in calculation of tendency while humidity
       //       uses ZQX. This is due to clipping at start of cloudsc which
       //       include the tendency already in tendency_loc%T and tendency_loc%q. ZQX was reset
@@ -2495,7 +2493,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       //----------------------
       tendency_loc_q[jk-1][jl-1] = tendency_loc_q[jk-1][jl-1] + (zqxn[5-1][jl-1] - zqx[5-1][jk-1][jl-1])*zqtmst;
       //-------------------
-      // 6.3 cloud cover 
+      // 6.3 cloud cover
       //-----------------------
       tendency_loc_a[jk-1][jl-1] = tendency_loc_a[jk-1][jl-1] + zda[jl-1]*zqtmst;
     }
@@ -2560,7 +2558,7 @@ int cloudsc_c(int kidia, int kfdia, int klon, int klev, double ptsphy, double *
       zalfaw = zfoealfa[jk-1][jl-1];          // Liquid , LS scheme minus detrainment
       pfsqlf[jk+1-1][jl-1] = pfsqlf[jk+1-1][jl-1] + (zqxn2d[1-1][jk-1][jl-1] - zqx0[1-1][jk-1][jl-1] + pvfl[jk-1][jl-1]*ptsphy - zalfaw*plude[jk-1][jl-1])*zgdph_r;          // liquid, negative numbers
       pfcqlng[jk+1-1][jl-1] = pfcqlng[jk+1-1][jl-1] + zlneg[1-1][jk-1][jl-1]*zgdph_r;          // liquid, vertical diffusion
-      pfsqltur[jk+1-1][jl-1] = pfsqltur[jk+1-1][jl-1] + (pvfl[jk-1][jl-1]*ptsphy)*zgdph_r;          // Rain, LS scheme 
+      pfsqltur[jk+1-1][jl-1] = pfsqltur[jk+1-1][jl-1] + (pvfl[jk-1][jl-1]*ptsphy)*zgdph_r;          // Rain, LS scheme
       pfsqrf[jk+1-1][jl-1] = pfsqrf[jk+1-1][jl-1] + (zqxn2d[3-1][jk-1][jl-1] - zqx0[3-1][jk-1][jl-1])*zgdph_r;          // rain, negative numbers
       pfcqrng[jk+1-1][jl-1] = pfcqrng[jk+1-1][jl-1] + zlneg[3-1][jk-1][jl-1]*zgdph_r;          // Ice , LS scheme minus detrainment
       pfsqif[jk+1-1][jl-1] = pfsqif[jk+1-1][jl-1] + (zqxn2d[2-1][jk-1][jl-1] - zqx0[2-1][jk-1][jl-1] + pvfi[jk-1][jl-1]*ptsphy - (1.0 - zalfaw)*plude[jk-1][jl-1])*zgdph_r;          // ice, negative numbers
diff --git a/src/cloudsc_c/cloudsc/mycpu.c b/src/cloudsc_c/cloudsc/mycpu.c
index 1aed0e4d..8c6e8506 100644
--- a/src/cloudsc_c/cloudsc/mycpu.c
+++ b/src/cloudsc_c/cloudsc/mycpu.c
@@ -8,9 +8,11 @@
  * nor does it submit to any jurisdiction.
  */
 
-//#define _GNU_SOURCE
-
+#if defined(__APPLE__)
+static int sched_getcpu() { return 0; }
+#else
 #include <sched.h>
+#endif
 
 /*
  * Find the core the thread belongs to
@@ -20,7 +22,10 @@ int mycpu_ ()
 {
   /* int sched_getcpu(void); */
   int cpu;
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wimplicit-function-declaration"
   cpu = sched_getcpu();
+#pragma clang diagnostic pop
   return cpu;
 }
 int mycpu() { return mycpu_(); }
diff --git a/src/cloudsc_fortran/CMakeLists.txt b/src/cloudsc_fortran/CMakeLists.txt
index f0e8a704..3719bcfc 100644
--- a/src/cloudsc_fortran/CMakeLists.txt
+++ b/src/cloudsc_fortran/CMakeLists.txt
@@ -15,25 +15,16 @@ ecbuild_add_option( FEATURE CLOUDSC_FORTRAN
 if( HAVE_CLOUDSC_FORTRAN )
 
     # Define the binary build target for this variant
-    ecbuild_add_executable( TARGET dwarf-cloudsc-fortran
+    ecbuild_add_executable(
+        TARGET dwarf-cloudsc-fortran
         SOURCES
             dwarf_cloudsc.F90
             cloudsc_driver_mod.F90
             cloudsc.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS}
     )
-    target_link_libraries( dwarf-cloudsc-fortran PRIVATE cloudsc-common-lib )
-
-    target_include_directories(dwarf-cloudsc-fortran PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
-    target_include_directories(dwarf-cloudsc-fortran PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/../common)
-
-    if( HAVE_OMP AND TARGET OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-cloudsc-fortran PRIVATE OpenMP::OpenMP_Fortran )
-    endif()
-
-    if( HAVE_MPI AND TARGET MPI::MPI_Fortran )
-        target_link_libraries( dwarf-cloudsc-fortran PRIVATE MPI::MPI_Fortran )
-    endif()
 
     # Create symlink for the input data
     if( HAVE_SERIALBOX )
@@ -42,11 +33,43 @@ if( HAVE_CLOUDSC_FORTRAN )
     endif()
 
     if( HAVE_HDF5 )
-      target_include_directories( dwarf-cloudsc-fortran PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-      target_link_libraries( dwarf-cloudsc-fortran PRIVATE ${HDF5_LIBRARIES} )
       execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
           ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 )
       execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
           ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 )
     endif()
+
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-fortran-serial
+        COMMAND bin/dwarf-cloudsc-fortran
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-fortran-omp
+        COMMAND bin/dwarf-cloudsc-fortran
+        ARGS 4 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 4
+        CONDITION HAVE_OMP
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-fortran-mpi
+        COMMAND bin/dwarf-cloudsc-fortran
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        MPI 2
+        OMP 1
+        CONDITION HAVE_MPI
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-fortran-mpi-omp
+        COMMAND bin/dwarf-cloudsc-fortran
+        ARGS 4 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        MPI 2
+        OMP 4
+        CONDITION HAVE_OMP AND HAVE_MPI
+    )
 endif()
diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt
index 590b93fc..5509c602 100644
--- a/src/cloudsc_gpu/CMakeLists.txt
+++ b/src/cloudsc_gpu/CMakeLists.txt
@@ -14,17 +14,27 @@ ecbuild_add_option( FEATURE CLOUDSC_GPU_KERNELS
 
 # Define the CLAW-based GPU dwarf variant as an ECBuild feature
 ecbuild_add_option( FEATURE CLOUDSC_GPU_CLAW
-    DESCRIPTION "Build optimized GPU version of CLOUDSC derived from CLAW" DEFAULT OFF
+    DESCRIPTION "Build optimized GPU version of CLOUDSC derived from CLAW using OpenACC" DEFAULT OFF
     CONDITION Serialbox_FOUND OR HDF5_FOUND
 )
 
 ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC
-    DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC layout" DEFAULT OFF
+    DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC layout and OpenACC" DEFAULT OFF
     CONDITION Serialbox_FOUND OR HDF5_FOUND
 )
 
+ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC_CUF
+    DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC layout in CUDA" DEFAULT OFF
+    CONDITION HAVE_CUDA AND ( Serialbox_FOUND OR HDF5_FOUND )
+)
+
 ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC_HOIST
-    DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with hoisted temporary arrays" DEFAULT OFF
+    DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with hoisted temporary arrays and OpenACC" DEFAULT OFF
+    CONDITION Serialbox_FOUND OR HDF5_FOUND
+)
+
+ecbuild_add_option( FEATURE CLOUDSC_GPU_OMP_SCC_HOIST
+    DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with hoisted temporary arrays and OpenMP offload" DEFAULT OFF
     CONDITION Serialbox_FOUND OR HDF5_FOUND
 )
 
@@ -36,21 +46,19 @@ if( HAVE_CLOUDSC_GPU_CLAW )
             dwarf_cloudsc_gpu.F90
             cloudsc_driver_gpu_claw_mod.F90
             cloudsc.claw.gpu.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_CLAW
     )
-    if( HAVE_ACC AND TARGET OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-claw PRIVATE OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-claw PRIVATE cloudsc-common-lib-static )
-    else()
-        target_link_libraries( dwarf-cloudsc-gpu-claw PRIVATE cloudsc-common-lib )
-    endif()
 
-    if( HAVE_MPI AND TARGET MPI::MPI_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-claw PRIVATE MPI::MPI_Fortran )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-gpu-claw PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-gpu-claw-serial
+        COMMAND bin/dwarf-cloudsc-gpu-claw
+        ARGS 1 1280 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+        ENABLED OFF  # CLAW variant is currently broken
+    )
 endif()
 
 
@@ -61,21 +69,19 @@ if( HAVE_CLOUDSC_GPU_SCC )
             dwarf_cloudsc_gpu.F90
             cloudsc_driver_gpu_scc_mod.F90
             cloudsc_gpu_scc_mod.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC
     )
-    if( HAVE_ACC AND TARGET OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-scc PRIVATE OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-scc PRIVATE cloudsc-common-lib-static )
-    else()
-        target_link_libraries( dwarf-cloudsc-gpu-scc PRIVATE cloudsc-common-lib )
-    endif()
 
-    if( HAVE_MPI AND TARGET MPI::MPI_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-scc PRIVATE MPI::MPI_Fortran )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-gpu-scc PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-gpu-scc-serial
+        COMMAND bin/dwarf-cloudsc-gpu-scc
+        ARGS 1 1000 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+        ENVIRONMENT "NVCOMPILER_ACC_CUDA_HEAPSIZE=64M"
+    )
 endif()
 
 
@@ -86,21 +92,87 @@ if( HAVE_CLOUDSC_GPU_SCC_HOIST )
             dwarf_cloudsc_gpu.F90
             cloudsc_driver_gpu_scc_hoist_mod.F90
             cloudsc_gpu_scc_hoist_mod.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_HOIST
     )
-    if( HAVE_ACC AND TARGET OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-scc-hoist PRIVATE OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-scc-hoist PRIVATE cloudsc-common-lib-static )
-    else()
-        target_link_libraries( dwarf-cloudsc-gpu-scc-hoist PRIVATE cloudsc-common-lib )
-    endif()
 
-    if( HAVE_MPI AND TARGET MPI::MPI_Fortran )
-        target_link_libraries( dwarf-cloudsc-gpu-scc-hoist PRIVATE MPI::MPI_Fortran )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-gpu-scc-hoist-serial
+        COMMAND bin/dwarf-cloudsc-gpu-scc-hoist
+        ARGS 1 1000 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+endif()
+
+
+if( HAVE_CLOUDSC_GPU_OMP_SCC_HOIST )
+    list( APPEND CLOUDSC_GPU_OMP_SCC_HOIST_DEFINITIONS CLOUDSC_GPU_OMP_SCC_HOIST )
+    if( HAVE_OMP_TARGET_LOOP_CONSTRUCT )
+        list( APPEND CLOUDSC_GPU_OMP_SCC_HOIST_DEFINITIONS HAVE_OMP_TARGET_LOOP_CONSTRUCT )
+        if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )
+            list( APPEND CLOUDSC_GPU_OMP_SCC_HOIST_DEFINITIONS HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )
+        endif()
+        if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
+            list( APPEND CLOUDSC_GPU_OMP_SCC_HOIST_DEFINITIONS HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
+        endif()
     endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-gpu-scc-hoist PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
+
+    if( NOT (HAVE_OMP_TARGET_TEAMS_DISTRIBUTE OR HAVE_OMP_TARGET_LOOP_CONSTRUCT) )
+        ecbuild_warn( "No support for OpenMP target offload, binary will be CPU-only" )
     endif()
+
+    ecbuild_add_executable(
+        TARGET dwarf-cloudsc-gpu-omp-scc-hoist
+        SOURCES
+            dwarf_cloudsc_gpu.F90
+            cloudsc_driver_gpu_omp_scc_hoist_mod.F90
+            cloudsc_gpu_omp_scc_hoist_mod.F90
+        LIBS
+            cloudsc-common-lib
+        DEFINITIONS ${CLOUDSC_DEFINITIONS} ${CLOUDSC_GPU_OMP_SCC_HOIST_DEFINITIONS}
+    )
+
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-gpu-omp-scc-hoist-serial
+        COMMAND bin/dwarf-cloudsc-gpu-omp-scc-hoist
+        ARGS 1 1000 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+endif()
+
+
+if( HAVE_CLOUDSC_GPU_SCC_CUF )
+    # Compile CUDA fortran files with -MCuda.
+    cloudsc_add_compile_options(
+        SOURCES cloudsc_gpu_scc_cuf_mod.F90  cloudsc_driver_gpu_scc_cuf_mod.F90
+        FLAGS   "-Mcuda=maxregcount:128")
+
+    ecbuild_add_executable(
+        TARGET dwarf-cloudsc-gpu-scc-cuf
+        SOURCES
+            dwarf_cloudsc_gpu.F90
+	    cloudsc_driver_gpu_scc_cuf_mod.F90
+	    cloudsc_gpu_scc_cuf_mod.F90
+	    nlev_mod.F90
+        LIBS
+            cloudsc-common-lib
+        DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_CUF
+    )
+
+    # Small hack around the fact that CMake does not understand
+    # CUDA-Fortran natively yet. So we simply force linking here.
+    target_link_options(dwarf-cloudsc-gpu-scc-cuf PUBLIC "-Mcuda")
+
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-gpu-scc-cuf-serial
+        COMMAND bin/dwarf-cloudsc-gpu-scc-cuf
+        ARGS 1 1000 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
 endif()
 
 
diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_omp_scc_hoist_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_omp_scc_hoist_mod.F90
new file mode 100644
index 00000000..f51a65ea
--- /dev/null
+++ b/src/cloudsc_gpu/cloudsc_driver_gpu_omp_scc_hoist_mod.F90
@@ -0,0 +1,238 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+MODULE CLOUDSC_DRIVER_GPU_OMP_SCC_HOIST_MOD
+
+  USE PARKIND1, ONLY: JPIM, JPRB
+  USE YOMPHYDER, ONLY: STATE_TYPE
+  USE YOECLDP, ONLY : NCLV, YRECLDP, TECLDP
+  USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK
+  USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM
+
+  USE CLOUDSC_GPU_OMP_SCC_HOIST_MOD, ONLY: CLOUDSC_SCC_HOIST
+
+  IMPLICIT NONE
+
+CONTAINS
+
+  SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_HOIST( &
+     & NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, &
+     & PT, PQ, &
+     & BUFFER_CML, BUFFER_TMP, BUFFER_LOC, &
+     & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, &
+     & PHRSW,    PHRLW, &
+     & PVERVEL,  PAP,      PAPH, &
+     & PLSM,     LDCUM,    KTYPE, &
+     & PLU,      PLUDE,    PSNDE,    PMFU,     PMFD, &
+     & PA, &
+     & PCLV,     PSUPSAT,&
+     & PLCRIT_AER,PICRIT_AER, PRE_ICE, &
+     & PCCN,     PNICE,&
+     & PCOVPTOT, PRAINFRAC_TOPRFZ, &
+     & PFSQLF,   PFSQIF ,  PFCQNNG,  PFCQLNG, &
+     & PFSQRF,   PFSQSF ,  PFCQRNG,  PFCQSNG, &
+     & PFSQLTUR, PFSQITUR, &
+     & PFPLSL,   PFPLSN,   PFHPSL,   PFHPSN &
+     & )
+    ! Driver routine that invokes the optimized CLAW-based CLOUDSC GPU kernel
+
+    INTEGER(KIND=JPIM)                                    :: NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG
+    INTEGER(KIND=JPIM)                                    :: KFLDX
+    REAL(KIND=JPRB)                                       :: PTSPHY       ! Physics timestep
+    REAL(KIND=JPRB), INTENT(IN)    :: PT(NPROMA, NLEV, NGPBLKS) ! T at start of callpar
+    REAL(KIND=JPRB), INTENT(IN)    :: PQ(NPROMA, NLEV, NGPBLKS) ! Q at start of callpar
+    REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_CML(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_CML
+    REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_TMP(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_TMP
+    REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_LOC(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_LOC
+    REAL(KIND=JPRB), INTENT(IN)    :: PVFA(NPROMA, NLEV, NGPBLKS)     ! CC from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN)    :: PVFL(NPROMA, NLEV, NGPBLKS)     ! Liq from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN)    :: PVFI(NPROMA, NLEV, NGPBLKS)     ! Ice from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN)    :: PDYNA(NPROMA, NLEV, NGPBLKS)    ! CC from Dynamics
+    REAL(KIND=JPRB), INTENT(IN)    :: PDYNL(NPROMA, NLEV, NGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN)    :: PDYNI(NPROMA, NLEV, NGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN)    :: PHRSW(NPROMA, NLEV, NGPBLKS)    ! Short-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN)    :: PHRLW(NPROMA, NLEV, NGPBLKS)    ! Long-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN)    :: PVERVEL(NPROMA, NLEV, NGPBLKS)  !Vertical velocity
+    REAL(KIND=JPRB), INTENT(IN)    :: PAP(NPROMA, NLEV, NGPBLKS)      ! Pressure on full levels
+    REAL(KIND=JPRB), INTENT(IN)    :: PAPH(NPROMA, NLEV+1, NGPBLKS) ! Pressure on half levels
+    REAL(KIND=JPRB), INTENT(IN)    :: PLSM(NPROMA, NGPBLKS)    ! Land fraction (0-1)
+    LOGICAL, INTENT(IN)            :: LDCUM(NPROMA, NGPBLKS)    ! Convection active
+    INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(NPROMA, NGPBLKS)    ! Convection type 0,1,2
+    REAL(KIND=JPRB), INTENT(IN)    :: PLU(NPROMA, NLEV, NGPBLKS)      ! Conv. condensate
+    REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(NPROMA, NLEV, NGPBLKS)    ! Conv. detrained water
+    REAL(KIND=JPRB), INTENT(IN)    :: PSNDE(NPROMA, NLEV, NGPBLKS)    ! Conv. detrained snow
+    REAL(KIND=JPRB), INTENT(IN)    :: PMFU(NPROMA, NLEV, NGPBLKS)     ! Conv. mass flux up
+    REAL(KIND=JPRB), INTENT(IN)    :: PMFD(NPROMA, NLEV, NGPBLKS)     ! Conv. mass flux down
+    REAL(KIND=JPRB), INTENT(IN)    :: PA(NPROMA, NLEV, NGPBLKS)       ! Original Cloud fraction (t)
+    REAL(KIND=JPRB), INTENT(IN)    :: PCLV(NPROMA, NLEV, NCLV, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PSUPSAT(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PLCRIT_AER(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PICRIT_AER(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PRE_ICE(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PCCN(NPROMA, NLEV, NGPBLKS)     ! liquid cloud condensation nuclei
+    REAL(KIND=JPRB), INTENT(IN)    :: PNICE(NPROMA, NLEV, NGPBLKS)    ! ice number concentration (cf. CCN)
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: PCOVPTOT(NPROMA, NLEV, NGPBLKS)    ! Precip fraction
+    REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(NPROMA, NGPBLKS)
+    ! Flux diagnostics for DDH budget
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(NPROMA, NLEV+1, NGPBLKS)    ! Flux of liquid
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(NPROMA, NLEV+1, NGPBLKS)    ! Flux of ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(NPROMA, NLEV+1, NGPBLKS)   ! -ve corr for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(NPROMA, NLEV+1, NGPBLKS)   ! -ve corr for ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(NPROMA, NLEV+1, NGPBLKS)    ! Flux diagnostics
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(NPROMA, NLEV+1, NGPBLKS)    !    for DDH, generic
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(NPROMA, NLEV+1, NGPBLKS)   ! rain
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(NPROMA, NLEV+1, NGPBLKS)   ! snow
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(NPROMA, NLEV+1, NGPBLKS)  ! liquid flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(NPROMA, NLEV+1, NGPBLKS)  ! ice flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(NPROMA, NLEV+1, NGPBLKS)    ! liq+rain sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(NPROMA, NLEV+1, NGPBLKS)    ! ice+snow sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(NPROMA, NLEV+1, NGPBLKS)    ! Enthalpy flux for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(NPROMA, NLEV+1, NGPBLKS)    ! ice number concentration (cf. CCN)
+
+    ! Local declarations of promoted temporaries
+    REAL(KIND=JPRB) :: ZFOEALFA(NPROMA, NLEV+1, NGPBLKS)
+    REAL(KIND=JPRB) :: ZTP1(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZLI(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZA(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZAORIG(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZLIQFRAC(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZICEFRAC(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZQX(NPROMA, NLEV, NCLV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZQX0(NPROMA, NLEV, NCLV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZPFPLSX(NPROMA, NLEV+1, NCLV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZLNEG(NPROMA, NLEV, NCLV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZQXN2D(NPROMA, NLEV, NCLV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZQSMIX(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZQSLIQ(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZQSICE(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZFOEEWMT(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZFOEEW(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB) :: ZFOEELIQT(NPROMA, NLEV, NGPBLKS)
+    INTEGER(KIND=JPIM) :: JL
+
+    INTEGER(KIND=JPIM) :: JKGLO,IBL,ICEND
+    TYPE(PERFORMANCE_TIMER) :: TIMER
+    INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1
+
+    ! Local copy of cloud parameters for offload
+    TYPE(TECLDP) :: LOCAL_YRECLDP
+
+    NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1)
+1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0)
+    if (irank == 0) then
+      write(0,1003) NUMPROC,NUMOMP,NGPTOTG,NPROMA,NGPBLKS
+    end if
+
+    ! Global timer for the parallel region
+    CALL TIMER%START(NUMOMP)
+
+!$omp target enter data map(alloc: ZFOEALFA, ZTP1, ZLI, ZA, ZAORIG, ZLIQFRAC, ZICEFRAC, ZQX, ZQX0,  &
+!$omp &   ZPFPLSX, ZLNEG, ZQXN2D, ZQSMIX, ZQSLIQ, ZQSICE, ZFOEEWMT,  &
+!$omp &   ZFOEEW, ZFOEELIQT)
+
+
+    ! Workaround for PGI / OpenACC oddities:
+    ! Create a local copy of the parameter struct to ensure they get
+    ! moved to the device the in ``acc data`` clause below
+    LOCAL_YRECLDP = YRECLDP
+
+!$omp target data &
+!$omp map(to: &
+!$omp   pt,pq,buffer_cml,buffer_tmp,pvfa, &
+!$omp   pvfl,pvfi,pdyna,pdynl,pdyni,phrsw,phrlw,pvervel, &
+!$omp   pap,paph,plsm,ldcum,ktype,plu,psnde, &
+!$omp   pmfu,pmfd,pa,pclv,psupsat,plcrit_aer,picrit_aer, &
+!$omp   pre_ice,pccn,pnice, yrecldp) &
+!$omp map(tofrom: &
+!$omp   buffer_loc,plude,pcovptot,prainfrac_toprfz) &
+!$omp map(from: &
+!$omp   pfsqlf,pfsqif,pfcqnng, &
+!$omp   pfcqlng ,pfsqrf,pfsqsf,pfcqrng,pfcqsng,pfsqltur, &
+!$omp   pfsqitur,pfplsl,pfplsn,pfhpsl,pfhpsn)
+
+    ! Local timer for each thread
+    TID = GET_THREAD_NUM()
+    CALL TIMER%THREAD_START(TID)
+
+#ifdef HAVE_OMP_TARGET_LOOP_CONSTRUCT
+!$omp target teams loop bind(teams)
+#else
+!$omp target teams distribute
+#endif
+    DO JKGLO=1,NGPTOT,NPROMA
+       IBL=(JKGLO-1)/NPROMA+1
+       ICEND=MIN(NPROMA,NGPTOT-JKGLO+1)
+
+#ifdef HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL
+!$omp loop bind(parallel)
+#elif defined(HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD)
+!$omp loop bind(thread)
+#else
+!$omp parallel do
+#endif
+      DO JL=1,ICEND
+        CALL CLOUDSC_SCC_HOIST &
+         & (1, ICEND, NPROMA, NLEV, PTSPHY,&
+         & PT(:,:,IBL), PQ(:,:,IBL), &
+         & BUFFER_TMP(:,:,1,IBL), BUFFER_TMP(:,:,3,IBL), BUFFER_TMP(:,:,2,IBL), BUFFER_TMP(:,:,4:8,IBL), &
+         & BUFFER_LOC(:,:,1,IBL), BUFFER_LOC(:,:,3,IBL), BUFFER_LOC(:,:,2,IBL), BUFFER_LOC(:,:,4:8,IBL), &
+         & PVFA(:,:,IBL), PVFL(:,:,IBL), PVFI(:,:,IBL), PDYNA(:,:,IBL), PDYNL(:,:,IBL), PDYNI(:,:,IBL), &
+         & PHRSW(:,:,IBL),    PHRLW(:,:,IBL),&
+         & PVERVEL(:,:,IBL),  PAP(:,:,IBL),      PAPH(:,:,IBL),&
+         & PLSM(:,IBL),       LDCUM(:,IBL),      KTYPE(:,IBL), &
+         & PLU(:,:,IBL),      PLUDE(:,:,IBL),    PSNDE(:,:,IBL),    PMFU(:,:,IBL),     PMFD(:,:,IBL),&
+                                !---prognostic fields
+         & PA(:,:,IBL),       PCLV(:,:,:,IBL),   PSUPSAT(:,:,IBL),&
+                                !-- arrays for aerosol-cloud interactions
+         & PLCRIT_AER(:,:,IBL),PICRIT_AER(:,:,IBL),&
+         & PRE_ICE(:,:,IBL),&
+         & PCCN(:,:,IBL),     PNICE(:,:,IBL),&
+                                !---diagnostic output
+         & PCOVPTOT(:,:,IBL), PRAINFRAC_TOPRFZ(:,IBL),&
+                                !---resulting fluxes
+         & PFSQLF(:,:,IBL),   PFSQIF (:,:,IBL),  PFCQNNG(:,:,IBL),  PFCQLNG(:,:,IBL),&
+         & PFSQRF(:,:,IBL),   PFSQSF (:,:,IBL),  PFCQRNG(:,:,IBL),  PFCQSNG(:,:,IBL),&
+         & PFSQLTUR(:,:,IBL), PFSQITUR (:,:,IBL), &
+         & PFPLSL(:,:,IBL),   PFPLSN(:,:,IBL),   PFHPSL(:,:,IBL),   PFHPSN(:,:,IBL),&
+         & LOCAL_YRECLDP, &
+         & ZFOEALFA(:,:,IBL), ZTP1(:,:,IBL), ZLI(:,:,IBL), ZA(:,:,IBL), ZAORIG(:,:,IBL), &
+         & ZLIQFRAC(:,:,IBL), ZICEFRAC(:,:,IBL), ZQX(:,:,:,IBL), ZQX0(:,:,:,IBL), ZPFPLSX(:,:,:,IBL), &
+         & ZLNEG(:,:,:,IBL), ZQXN2D(:,:,:,IBL), ZQSMIX(:,:,IBL), ZQSLIQ(:,:,IBL), ZQSICE(:,:,IBL), &
+         & ZFOEEWMT(:,:,IBL), ZFOEEW(:,:,IBL), ZFOEELIQT(:,:,IBL), JL=JL)
+      ENDDO
+#ifdef HAVE_OMP_TARGET_LOOP_CONSTRUCT
+!$omp end loop
+#else
+!$omp end parallel do
+#endif
+    ENDDO
+#ifdef HAVE_OMP_TARGET_LOOP_CONSTRUCT
+!$omp end target teams loop
+#else
+!$omp end target teams distribute
+#endif
+
+
+    CALL TIMER%THREAD_END(TID)
+
+!$omp end target data
+
+    CALL TIMER%END()
+
+    ! On GPUs, adding block-level column totals is cumbersome and
+    ! error prone, and of little value due to the large number of
+    ! processing "thread teams". Instead we register the total here.
+    CALL TIMER%THREAD_LOG(TID=TID, IGPC=NGPTOT)
+
+    CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT)
+
+  END SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_HOIST
+
+END MODULE CLOUDSC_DRIVER_GPU_OMP_SCC_HOIST_MOD
diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_cuf_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_cuf_mod.F90
new file mode 100644
index 00000000..4f7e0935
--- /dev/null
+++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_cuf_mod.F90
@@ -0,0 +1,280 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+MODULE CLOUDSC_DRIVER_GPU_SCC_CUF_MOD
+
+  USE PARKIND1, ONLY: JPIM, JPRB
+  USE YOMPHYDER, ONLY: STATE_TYPE
+  USE YOECLDP, ONLY : NCLV, YRECLDP, TECLDP
+  USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK
+  USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM
+  USE YOMCST_CUF,ONLY : YOMCST_UPDATE_DEVICE
+  USE YOETHF_CUF,ONLY : YOETHF_UPDATE_DEVICE
+
+  USE CLOUDSC_GPU_SCC_CUF_MOD, ONLY: CLOUDSC_SCC_CUF
+  USE NLEV_MOD, ONLY : NLEV
+
+  USE CUDAFOR
+
+  IMPLICIT NONE
+
+
+CONTAINS
+
+  SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_CUF( &
+     & NUMOMP, NPROMA, NLEV_IN, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, &
+     & PT, PQ, &
+     & BUFFER_CML, BUFFER_TMP, BUFFER_LOC, &
+     & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, &
+     & PHRSW,    PHRLW, &
+     & PVERVEL,  PAP,      PAPH, &
+     & PLSM,     LDCUM,    KTYPE, &
+     & PLU,      PLUDE,    PSNDE,    PMFU,     PMFD, &
+     & PA, &
+     & PCLV,     PSUPSAT,&
+     & PLCRIT_AER,PICRIT_AER, PRE_ICE, &
+     & PCCN,     PNICE,&
+     & PCOVPTOT, PRAINFRAC_TOPRFZ, &
+     & PFSQLF,   PFSQIF ,  PFCQNNG,  PFCQLNG, &
+     & PFSQRF,   PFSQSF ,  PFCQRNG,  PFCQSNG, &
+     & PFSQLTUR, PFSQITUR, &
+     & PFPLSL,   PFPLSN,   PFHPSL,   PFHPSN &
+     & )
+    ! Driver routine that invokes the optimized CLAW-based CLOUDSC GPU kernel
+
+    INTEGER(KIND=JPIM)                                    :: NUMOMP, NPROMA, NLEV_IN, NGPTOT, NGPBLKS, NGPTOTG
+    INTEGER(KIND=JPIM)                                    :: KFLDX 
+    REAL(KIND=JPRB)                                       :: PTSPHY       ! Physics timestep
+    REAL(KIND=JPRB), INTENT(IN)    :: PT(NPROMA, NLEV_IN, NGPBLKS) ! T at start of callpar
+    REAL(KIND=JPRB), INTENT(IN)    :: PQ(NPROMA, NLEV_IN, NGPBLKS) ! Q at start of callpar
+    REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_CML(NPROMA,NLEV_IN,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_CML
+    REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_TMP(NPROMA,NLEV_IN,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_TMP
+    REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_LOC(NPROMA,NLEV_IN,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_LOC
+    REAL(KIND=JPRB), INTENT(IN)    :: PVFA(NPROMA, NLEV_IN, NGPBLKS)     ! CC from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN)    :: PVFL(NPROMA, NLEV_IN, NGPBLKS)     ! Liq from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN)    :: PVFI(NPROMA, NLEV_IN, NGPBLKS)     ! Ice from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN)    :: PDYNA(NPROMA, NLEV_IN, NGPBLKS)    ! CC from Dynamics
+    REAL(KIND=JPRB), INTENT(IN)    :: PDYNL(NPROMA, NLEV_IN, NGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN)    :: PDYNI(NPROMA, NLEV_IN, NGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN)    :: PHRSW(NPROMA, NLEV_IN, NGPBLKS)    ! Short-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN)    :: PHRLW(NPROMA, NLEV_IN, NGPBLKS)    ! Long-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN)    :: PVERVEL(NPROMA, NLEV_IN, NGPBLKS)  !Vertical velocity
+    REAL(KIND=JPRB), INTENT(IN)    :: PAP(NPROMA, NLEV_IN, NGPBLKS)      ! Pressure on full levels
+    REAL(KIND=JPRB), INTENT(IN)    :: PAPH(NPROMA, NLEV_IN+1, NGPBLKS) ! Pressure on half levels
+    REAL(KIND=JPRB), INTENT(IN)    :: PLSM(NPROMA, NGPBLKS)    ! Land fraction (0-1)
+    LOGICAL, INTENT(IN)            :: LDCUM(NPROMA, NGPBLKS)    ! Convection active
+    INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(NPROMA, NGPBLKS)    ! Convection type 0,1,2
+    REAL(KIND=JPRB), INTENT(IN)    :: PLU(NPROMA, NLEV_IN, NGPBLKS)      ! Conv. condensate
+    REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(NPROMA, NLEV_IN, NGPBLKS)    ! Conv. detrained water
+    REAL(KIND=JPRB), INTENT(IN)    :: PSNDE(NPROMA, NLEV_IN, NGPBLKS)    ! Conv. detrained snow
+    REAL(KIND=JPRB), INTENT(IN)    :: PMFU(NPROMA, NLEV_IN, NGPBLKS)     ! Conv. mass flux up
+    REAL(KIND=JPRB), INTENT(IN)    :: PMFD(NPROMA, NLEV_IN, NGPBLKS)     ! Conv. mass flux down
+    REAL(KIND=JPRB), INTENT(IN)    :: PA(NPROMA, NLEV_IN, NGPBLKS)       ! Original Cloud fraction (t)
+    REAL(KIND=JPRB), INTENT(IN)    :: PCLV(NPROMA, NLEV_IN, NCLV, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PSUPSAT(NPROMA, NLEV_IN, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PLCRIT_AER(NPROMA, NLEV_IN, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PICRIT_AER(NPROMA, NLEV_IN, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PRE_ICE(NPROMA, NLEV_IN, NGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN)    :: PCCN(NPROMA, NLEV_IN, NGPBLKS)     ! liquid cloud condensation nuclei
+    REAL(KIND=JPRB), INTENT(IN)    :: PNICE(NPROMA, NLEV_IN, NGPBLKS)    ! ice number concentration (cf. CCN)
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: PCOVPTOT(NPROMA, NLEV_IN, NGPBLKS)    ! Precip fraction
+    REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(NPROMA, NGPBLKS)
+    ! Flux diagnostics for DDH budget
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(NPROMA, NLEV_IN+1, NGPBLKS)    ! Flux of liquid
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(NPROMA, NLEV_IN+1, NGPBLKS)    ! Flux of ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(NPROMA, NLEV_IN+1, NGPBLKS)   ! -ve corr for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(NPROMA, NLEV_IN+1, NGPBLKS)   ! -ve corr for ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(NPROMA, NLEV_IN+1, NGPBLKS)    ! Flux diagnostics
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(NPROMA, NLEV_IN+1, NGPBLKS)    !    for DDH, generic
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(NPROMA, NLEV_IN+1, NGPBLKS)   ! rain
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(NPROMA, NLEV_IN+1, NGPBLKS)   ! snow
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(NPROMA, NLEV_IN+1, NGPBLKS)  ! liquid flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(NPROMA, NLEV_IN+1, NGPBLKS)  ! ice flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(NPROMA, NLEV_IN+1, NGPBLKS)    ! liq+rain sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(NPROMA, NLEV_IN+1, NGPBLKS)    ! ice+snow sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(NPROMA, NLEV_IN+1, NGPBLKS)    ! Enthalpy flux for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(NPROMA, NLEV_IN+1, NGPBLKS)    ! ice number concentration (cf. CCN)
+
+    INTEGER(KIND=JPIM) :: JKGLO,IBL,ICSTART, ICEND
+    TYPE(PERFORMANCE_TIMER) :: TIMER
+    INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1
+    INTEGER :: ISTAT
+
+    ! Local copy of cloud parameters for offload
+    TYPE(TECLDP), DEVICE :: LOCAL_YRECLDP
+ 
+    TYPE(DIM3) :: GRIDDIM, BLOCKDIM
+
+!!  device variables
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PT_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS) ! T at start of callpar
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PQ_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS) ! Q at start of callpar
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: BUFFER_CML_d(:,:,:,:) !!(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_CML
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: BUFFER_TMP_d(:,:,:,:) !!(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_TMP
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: BUFFER_LOC_d(:,:,:,:) !!(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_LOC
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PVFA_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)     ! CC from VDF scheme
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PVFL_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)     ! Liq from VDF scheme
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PVFI_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)     ! Ice from VDF scheme
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PDYNA_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! CC from Dynamics
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PDYNL_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PDYNI_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PHRSW_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Short-wave heating rate
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PHRLW_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Long-wave heating rate
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PVERVEL_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)  !Vertical velocity
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PAP_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)      ! Pressure on full levels
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PAPH_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS) ! Pressure on half levels
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PLSM_d(:,:) !!(NPROMA, NGPBLKS)    ! Land fraction (0-1)
+    LOGICAL,            DEVICE, ALLOCATABLE :: LDCUM_d(:,:) !!(NPROMA, NGPBLKS)    ! Convection active
+    INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: KTYPE_d(:,:) !!(NPROMA, NGPBLKS)    ! Convection type 0,1,2
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PLU_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)      ! Conv. condensate
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PLUDE_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Conv. detrained water
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PSNDE_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Conv. detrained snow
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PMFU_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)     ! Conv. mass flux up
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PMFD_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)     ! Conv. mass flux down
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PA_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)       ! Original Cloud fraction (t)
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PCLV_d(:,:,:,:) !!(NPROMA, NLEV, NCLV, NGPBLKS)
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PSUPSAT_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PLCRIT_AER_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PICRIT_AER_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PRE_ICE_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PCCN_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)     ! liquid cloud condensation nuclei
+    REAL(KIND=JPRB),    DEVICE, ALLOCATABLE :: PNICE_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! ice number concentration (cf. CCN)
+
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PCOVPTOT_d(:,:,:) !!(NPROMA, NLEV, NGPBLKS)    ! Precip fraction
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PRAINFRAC_TOPRFZ_d(:,:) !!(NPROMA, NGPBLKS)
+    ! Flux diagnostics for DDH budget
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFSQLF_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! Flux of liquid
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFSQIF_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! Flux of ice
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFCQLNG_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)   ! -ve corr for liq
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFCQNNG_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)   ! -ve corr for ice
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFSQRF_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! Flux diagnostics
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFSQSF_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    !    for DDH, generic
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFCQRNG_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)   ! rain
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFCQSNG_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)   ! snow
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFSQLTUR_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)  ! liquid flux due to VDF
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFSQITUR_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)  ! ice flux due to VDF
+
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFPLSL_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! liq+rain sedim flux
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFPLSN_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! ice+snow sedim flux
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFHPSL_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! Enthalpy flux for liq
+    REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: PFHPSN_d(:,:,:) !!(NPROMA, NLEV+1, NGPBLKS)    ! ice number concentration (cf. CCN)
+
+#include "abor1.intfb.h"
+
+    ! Transfer global module-scope parameters to constant device memory
+    CALL YOMCST_UPDATE_DEVICE()
+    CALL YOETHF_UPDATE_DEVICE()
+
+    NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1)
+
+    IF (NLEV_IN /= NLEV) THEN
+      CALL ABOR1('ERROR: Vertical dimension NLEV does not equal parametrised constant NLEV=137')
+    END IF
+
+    ALLOCATE( PT_d(NPROMA, NLEV, NGPBLKS),PQ_d(NPROMA, NLEV, NGPBLKS),BUFFER_CML_d(NPROMA,NLEV,3+NCLV,NGPBLKS))
+    ALLOCATE( BUFFER_TMP_d(NPROMA,NLEV,3+NCLV,NGPBLKS), BUFFER_LOC_d(NPROMA,NLEV,3+NCLV,NGPBLKS))
+    ALLOCATE( PVFA_d(NPROMA, NLEV, NGPBLKS), PVFL_d(NPROMA, NLEV, NGPBLKS), PVFI_d(NPROMA, NLEV, NGPBLKS) )
+    ALLOCATE( PDYNA_d(NPROMA, NLEV, NGPBLKS), PDYNL_d(NPROMA, NLEV, NGPBLKS), PDYNI_d(NPROMA, NLEV, NGPBLKS) )
+    ALLOCATE( PHRSW_d(NPROMA, NLEV, NGPBLKS), PHRLW_d(NPROMA, NLEV, NGPBLKS), PVERVEL_d(NPROMA, NLEV, NGPBLKS) )
+    ALLOCATE( PAP_d(NPROMA, NLEV, NGPBLKS), PAPH_d(NPROMA, NLEV+1, NGPBLKS), PLSM_d(NPROMA, NGPBLKS) )
+    ALLOCATE( LDCUM_d(NPROMA, NGPBLKS), KTYPE_d(NPROMA, NGPBLKS), PLU_d(NPROMA, NLEV, NGPBLKS) )
+    ALLOCATE( PLUDE_d(NPROMA, NLEV, NGPBLKS), PSNDE_d(NPROMA, NLEV, NGPBLKS), PMFU_d(NPROMA, NLEV, NGPBLKS) )
+    ALLOCATE( PMFD_d(NPROMA, NLEV, NGPBLKS), PA_d(NPROMA, NLEV, NGPBLKS), PCLV_d(NPROMA, NLEV, NCLV, NGPBLKS) )
+    ALLOCATE( PSUPSAT_d(NPROMA, NLEV, NGPBLKS), PLCRIT_AER_d(NPROMA, NLEV, NGPBLKS), PICRIT_AER_d(NPROMA, NLEV, NGPBLKS) )
+    ALLOCATE( PRE_ICE_d(NPROMA, NLEV, NGPBLKS), PCCN_d(NPROMA, NLEV, NGPBLKS),  PNICE_d(NPROMA, NLEV, NGPBLKS) )
+
+    ALLOCATE(PCOVPTOT_d(NPROMA, NLEV, NGPBLKS), PRAINFRAC_TOPRFZ_d(NPROMA, NGPBLKS) )
+    ALLOCATE(PFSQLF_d(NPROMA, NLEV+1, NGPBLKS), PFSQIF_d(NPROMA, NLEV+1, NGPBLKS), PFCQLNG_d(NPROMA, NLEV+1, NGPBLKS) )
+    ALLOCATE(PFCQNNG_d(NPROMA, NLEV+1, NGPBLKS), PFSQRF_d(NPROMA, NLEV+1, NGPBLKS), PFSQSF_d(NPROMA, NLEV+1, NGPBLKS) )
+    ALLOCATE(PFCQRNG_d(NPROMA, NLEV+1, NGPBLKS), PFCQSNG_d(NPROMA, NLEV+1, NGPBLKS) ,PFSQLTUR_d(NPROMA, NLEV+1, NGPBLKS) )
+    ALLOCATE( PFSQITUR_d(NPROMA, NLEV+1, NGPBLKS))
+
+    ALLOCATE(PFPLSL_d(NPROMA, NLEV+1, NGPBLKS), PFPLSN_d(NPROMA, NLEV+1, NGPBLKS))
+    ALLOCATE(PFHPSL_d(NPROMA, NLEV+1, NGPBLKS), PFHPSN_d(NPROMA, NLEV+1, NGPBLKS) )
+
+
+1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0)
+    if (irank == 0) then
+      write(0,1003) NUMPROC,NUMOMP,NGPTOTG,NPROMA,NGPBLKS
+    end if
+
+    ! Global timer for the parallel region
+    CALL TIMER%START(NUMOMP)
+
+    ! Workaround for PGI / OpenACC oddities:
+    ! Create a local copy of the parameter struct to ensure they get
+    ! moved to the device the in ``acc data`` clause below
+    LOCAL_YRECLDP = YRECLDP
+
+    pt_d=pt; pq_d=pq; buffer_tmp_d=buffer_tmp; buffer_loc_d = buffer_loc; pvfa_d=pvfa; pvfl_d=pvfl; pvfi_d=pvfi; pdyna_d=pdyna;
+    pdynl_d=pdynl; pdyni_d=pdyni; phrsw_d=phrsw; phrlw_d=phrlw; pvervel_d=pvervel; pap_d=pap; paph_d=paph;
+    plsm_d=plsm; ldcum_d=ldcum; ktype_d=ktype; plu_d=plu; plude_d=plude; psnde_d=psnde; pmfu_d=pmfu; pmfd_d=pmfd;
+    pa_d=pa; pclv_d=pclv; psupsat_d=psupsat; plcrit_aer_d=plcrit_aer; picrit_aer_d=picrit_aer; pre_ice_d=pre_ice;
+    pccn_d=pccn; pnice_d=pnice; pcovptot_d=pcovptot; prainfrac_toprfz_d=prainfrac_toprfz; pfsqlf_d=pfsqlf;
+    pfsqif_d=pfsqif; pfcqnng_d=pfcqnng; pfcqlng_d=pfcqlng; pfsqrf_d=pfsqrf; pfsqsf_d=pfsqsf; pfcqrng_d=pfcqrng; pfcqsng_d=pfcqsng
+    pfsqltur_d=pfsqltur; pfsqitur_d=pfsqitur; pfplsl_d=pfplsl; pfplsn_d=pfplsn; pfhpsl_d=pfhpsl; pfhpsn_d=pfhpsn
+    
+    ! Local timer for each thread
+    TID = GET_THREAD_NUM()
+    CALL TIMER%THREAD_START(TID)
+
+
+    ICSTART=1
+    ICEND=MIN(NPROMA,NGPTOT-JKGLO+1)
+
+    GRIDDIM = DIM3(1,1,CEILING(REAL(NGPTOT)/REAL(NPROMA)))
+    BLOCKDIM = DIM3(NPROMA,1,1)
+
+    CALL CLOUDSC_SCC_CUF<<<GRIDDIM,BLOCKDIM >>> &
+        & (ICSTART, ICEND, NPROMA, NGPBLKS, PTSPHY,&
+        & PT_D,PQ_D, &
+        & BUFFER_TMP_D, &
+        & BUFFER_LOC_D, &
+        & PVFA_D, PVFL_D, PVFI_D, PDYNA_D, PDYNL_D, PDYNI_D, &
+        & PHRSW_D, PHRLW_D, &
+        & PVERVEL_D, PAP_D, PAPH_D, &
+        & PLSM_D, LDCUM_D, KTYPE_D, &
+        & PLU_D, PLUDE_D, PSNDE_D, PMFU_D, PMFD_D, &
+        !---prognostic fields
+        & PA_D, PCLV_D, PSUPSAT_D, &
+        !-- arrays for aerosol-cloud interactions
+        & PLCRIT_AER_D, PICRIT_AER_D, &
+        & PRE_ICE_D, &
+        & PCCN_D, PNICE_D, &
+        !---diagnostic output
+        & PCOVPTOT_D, PRAINFRAC_TOPRFZ_D, &
+        !---resulting fluxes
+        & PFSQLF_D, PFSQIF_D, PFCQNNG_D, PFCQLNG_D, &
+        & PFSQRF_D, PFSQSF_D, PFCQRNG_D, PFCQSNG_D, &
+        & PFSQLTUR_D, PFSQITUR_D, &
+        & PFPLSL_D, PFPLSN_D, PFHPSL_D, PFHPSN_D, &
+        & LOCAL_YRECLDP )
+
+    ISTAT = cudaDeviceSynchronize()
+    
+    CALL TIMER%THREAD_END(TID)
+
+    buffer_tmp=buffer_tmp_d; buffer_loc = buffer_loc_d; 
+    plude=plude_d;
+    pcovptot=pcovptot_d;
+    pfsqlf=pfsqlf_d; pfsqif=pfsqif_d; pfcqnng=pfcqnng_d; pfcqlng=pfcqlng_d; pfsqrf=pfsqrf_d; pfsqsf=pfsqsf_d; pfcqrng=pfcqrng_d; 
+    pfcqsng=pfcqsng_d; pfsqltur=pfsqltur_d; pfsqitur=pfsqitur_d; pfplsl=pfplsl_d; pfplsn=pfplsn_d; pfhpsl=pfhpsl_d; pfhpsn=pfhpsn_d;
+
+    CALL TIMER%END()
+
+    ! On GPUs, adding block-level column totals is cumbersome and
+    ! error prone, and of little value due to the large number of
+    ! processing "thread teams". Instead we register the total here.
+    CALL TIMER%THREAD_LOG(TID=TID, IGPC=NGPTOT)
+
+    CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT)
+
+  END SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_CUF
+
+END MODULE CLOUDSC_DRIVER_GPU_SCC_CUF_MOD
diff --git a/src/cloudsc_gpu/cloudsc_gpu_omp_scc_hoist_mod.F90 b/src/cloudsc_gpu/cloudsc_gpu_omp_scc_hoist_mod.F90
new file mode 100644
index 00000000..706642c5
--- /dev/null
+++ b/src/cloudsc_gpu/cloudsc_gpu_omp_scc_hoist_mod.F90
@@ -0,0 +1,2658 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+MODULE CLOUDSC_GPU_OMP_SCC_HOIST_MOD
+
+!$omp declare target(cloudsc_scc_hoist)
+
+CONTAINS
+
+  SUBROUTINE CLOUDSC_SCC_HOIST (KIDIA, KFDIA, KLON, KLEV, PTSPHY, PT, PQ, TENDENCY_TMP_T, TENDENCY_TMP_Q, TENDENCY_TMP_A,  &
+  & TENDENCY_TMP_CLD, TENDENCY_LOC_T, TENDENCY_LOC_Q, TENDENCY_LOC_A, TENDENCY_LOC_CLD, PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI,  &
+  & PHRSW, PHRLW, PVERVEL, PAP, PAPH, PLSM, LDCUM, KTYPE, PLU, PLUDE, PSNDE, PMFU, PMFD, PA, PCLV, PSUPSAT, PLCRIT_AER,  &
+  & PICRIT_AER, PRE_ICE, PCCN, PNICE, PCOVPTOT, PRAINFRAC_TOPRFZ, PFSQLF, PFSQIF, PFCQNNG, PFCQLNG, PFSQRF, PFSQSF, PFCQRNG,  &
+  & PFCQSNG, PFSQLTUR, PFSQITUR, PFPLSL, PFPLSN, PFHPSL, PFHPSN, YRECLDP, ZFOEALFA, ZTP1, ZLI, ZA, ZAORIG, ZLIQFRAC, ZICEFRAC,  &
+  & ZQX, ZQX0, ZPFPLSX, ZLNEG, ZQXN2D, ZQSMIX, ZQSLIQ, ZQSICE, ZFOEEWMT, ZFOEEW, ZFOEELIQT, JL)
+    !---input
+    !---prognostic fields
+    !-- arrays for aerosol-cloud interactions
+    !!! & PQAER,    KAER, &
+    !---diagnostic output
+    !---resulting fluxes
+
+    !===============================================================================
+    !**** *CLOUDSC* -  ROUTINE FOR PARAMATERIZATION OF CLOUD PROCESSES
+    !                  FOR PROGNOSTIC CLOUD SCHEME
+    !!
+    !     M.Tiedtke, C.Jakob, A.Tompkins, R.Forbes     (E.C.M.W.F.)
+    !!
+    !     PURPOSE
+    !     -------
+    !          THIS ROUTINE UPDATES THE CONV/STRAT CLOUD FIELDS.
+    !          THE FOLLOWING PROCESSES ARE CONSIDERED:
+    !        - Detrainment of cloud water from convective updrafts
+    !        - Evaporation/condensation of cloud water in connection
+    !           with heating/cooling such as by subsidence/ascent
+    !        - Erosion of clouds by turbulent mixing of cloud air
+    !           with unsaturated environmental air
+    !        - Deposition onto ice when liquid water present (Bergeron-Findeison)
+    !        - Conversion of cloud water into rain (collision-coalescence)
+    !        - Conversion of cloud ice to snow (aggregation)
+    !        - Sedimentation of rain, snow and ice
+    !        - Evaporation of rain and snow
+    !        - Melting of snow and ice
+    !        - Freezing of liquid and rain
+    !        Note: Turbulent transports of s,q,u,v at cloud tops due to
+    !           buoyancy fluxes and lw radiative cooling are treated in
+    !           the VDF scheme
+    !!
+    !     INTERFACE.
+    !     ----------
+    !          *CLOUDSC* IS CALLED FROM *CALLPAR*
+    !     THE ROUTINE TAKES ITS INPUT FROM THE LONG-TERM STORAGE:
+    !     T,Q,L,PHI AND DETRAINMENT OF CLOUD WATER FROM THE
+    !     CONVECTIVE CLOUDS (MASSFLUX CONVECTION SCHEME), BOUNDARY
+    !     LAYER TURBULENT FLUXES OF HEAT AND MOISTURE, RADIATIVE FLUXES,
+    !     OMEGA.
+    !     IT RETURNS ITS OUTPUT TO:
+    !      1.MODIFIED TENDENCIES OF MODEL VARIABLES T AND Q
+    !        AS WELL AS CLOUD VARIABLES L AND C
+    !      2.GENERATES PRECIPITATION FLUXES FROM STRATIFORM CLOUDS
+    !!
+    !     EXTERNALS.
+    !     ----------
+    !          NONE
+    !!
+    !     MODIFICATIONS.
+    !     -------------
+    !      M. TIEDTKE    E.C.M.W.F.     8/1988, 2/1990
+    !     CH. JAKOB      E.C.M.W.F.     2/1994 IMPLEMENTATION INTO IFS
+    !     A.TOMPKINS     E.C.M.W.F.     2002   NEW NUMERICS
+    !        01-05-22 : D.Salmond   Safety modifications
+    !        02-05-29 : D.Salmond   Optimisation
+    !        03-01-13 : J.Hague     MASS Vector Functions  J.Hague
+    !        03-10-01 : M.Hamrud    Cleaning
+    !        04-12-14 : A.Tompkins  New implicit solver and physics changes
+    !        04-12-03 : A.Tompkins & M.Ko"hler  moist PBL
+    !     G.Mozdzynski  09-Jan-2006  EXP security fix
+    !        19-01-09 : P.Bechtold  Changed increased RCLDIFF value for KTYPE=2
+    !        07-07-10 : A.Tompkins/R.Forbes  4-Phase flexible microphysics
+    !        01-03-11 : R.Forbes    Mixed phase changes and tidy up
+    !        01-10-11 : R.Forbes    Melt ice to rain, allow rain to freeze
+    !        01-10-11 : R.Forbes    Limit supersat to avoid excessive values
+    !        31-10-11 : M.Ahlgrimm  Add rain, snow and PEXTRA to DDH output
+    !        17-02-12 : F.Vana      Simplified/optimized LU factorization
+    !        18-05-12 : F.Vana      Cleaning + better support of sequential physics
+    !        N.Semane+P.Bechtold     04-10-2012 Add RVRFACTOR factor for small planet
+    !        01-02-13 : R.Forbes    New params of autoconv/acc,rain evap,snow riming
+    !        15-03-13 : F. Vana     New dataflow + more tendencies from the first call
+    !        K. Yessad (July 2014): Move some variables.
+    !        F. Vana  05-Mar-2015  Support for single precision
+    !        15-01-15 : R.Forbes    Added new options for snow evap & ice deposition
+    !        10-01-15 : R.Forbes    New physics for rain freezing
+    !        23-10-14 : P. Bechtold remove zeroing of convection arrays
+    !
+    !     SWITCHES.
+    !     --------
+    !!
+    !     MODEL PARAMETERS
+    !     ----------------
+    !     RCLDIFF:    PARAMETER FOR EROSION OF CLOUDS
+    !     RCLCRIT_SEA:  THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER SEA
+    !     RCLCRIT_LAND: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER LAND
+    !     RLCRITSNOW: THRESHOLD VALUE FOR SNOW AUTOCONVERSION
+    !     RKCONV:     PARAMETER FOR AUTOCONVERSION OF CLOUDS (KESSLER)
+    !     RCLDMAX:    MAXIMUM POSSIBLE CLW CONTENT (MASON,1971)
+    !!
+    !     REFERENCES.
+    !     ----------
+    !     TIEDTKE MWR 1993
+    !     JAKOB PhD 2000
+    !     GREGORY ET AL. QJRMS 2000
+    !     TOMPKINS ET AL. QJRMS 2007
+    !!
+    !===============================================================================
+
+    USE PARKIND1, ONLY: JPIM, JPRB
+    USE YOMPHYDER, ONLY: state_type
+    USE YOMCST, ONLY: RG, RD, RCPD, RETV, RLVTT, RLSTT, RLMLT, RTT, RV
+    USE YOETHF, ONLY: R2ES, R3LES, R3IES, R4LES, R4IES, R5LES, R5IES, R5ALVCP, R5ALSCP, RALVDCP, RALSDCP, RALFDCP, RTWAT, RTICE,  &
+    & RTICECU, RTWAT_RTICE_R, RTWAT_RTICECU_R, RKOOP1, RKOOP2
+    USE YOECLDP, ONLY: TECLDP, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, NCLV
+
+
+
+
+
+    IMPLICIT NONE
+
+    !-------------------------------------------------------------------------------
+    !                 Declare input/output arguments
+    !-------------------------------------------------------------------------------
+
+    ! PLCRIT_AER : critical liquid mmr for rain autoconversion process
+    ! PICRIT_AER : critical liquid mmr for snow autoconversion process
+    ! PRE_LIQ : liq Re
+    ! PRE_ICE : ice Re
+    ! PCCN    : liquid cloud condensation nuclei
+    ! PNICE   : ice number concentration (cf. CCN)
+
+    REAL(KIND=JPRB), INTENT(IN) :: PLCRIT_AER(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(IN) :: PICRIT_AER(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(IN) :: PRE_ICE(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(IN) :: PCCN(KLON, KLEV)    ! liquid cloud condensation nuclei
+    REAL(KIND=JPRB), INTENT(IN) :: PNICE(KLON, KLEV)
+    ! ice number concentration (cf. CCN)
+
+    INTEGER(KIND=JPIM), INTENT(IN) :: KLON    ! Number of grid points
+    INTEGER(KIND=JPIM), INTENT(IN) :: KLEV    ! Number of levels
+    INTEGER(KIND=JPIM), INTENT(IN) :: KIDIA
+    INTEGER(KIND=JPIM), INTENT(IN) :: KFDIA
+    REAL(KIND=JPRB), INTENT(IN) :: PTSPHY    ! Physics timestep
+    REAL(KIND=JPRB), INTENT(IN) :: PT(KLON, KLEV)    ! T at start of callpar
+    REAL(KIND=JPRB), INTENT(IN) :: PQ(KLON, KLEV)    ! Q at start of callpar
+    REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_T(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_Q(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_A(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_CLD(KLON, KLEV, NCLV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_T(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_Q(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_A(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_CLD(KLON, KLEV, NCLV)
+    REAL(KIND=JPRB), INTENT(IN) :: PVFA(KLON, KLEV)    ! CC from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN) :: PVFL(KLON, KLEV)    ! Liq from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN) :: PVFI(KLON, KLEV)    ! Ice from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN) :: PDYNA(KLON, KLEV)    ! CC from Dynamics
+    REAL(KIND=JPRB), INTENT(IN) :: PDYNL(KLON, KLEV)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN) :: PDYNI(KLON, KLEV)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN) :: PHRSW(KLON, KLEV)    ! Short-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN) :: PHRLW(KLON, KLEV)    ! Long-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN) :: PVERVEL(KLON, KLEV)    !Vertical velocity
+    REAL(KIND=JPRB), INTENT(IN) :: PAP(KLON, KLEV)    ! Pressure on full levels
+    REAL(KIND=JPRB), INTENT(IN) :: PAPH(KLON, KLEV + 1)    ! Pressure on half levels
+    REAL(KIND=JPRB), INTENT(IN) :: PLSM(KLON)    ! Land fraction (0-1)
+    LOGICAL, INTENT(IN) :: LDCUM(KLON)    ! Convection active
+    INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(KLON)    ! Convection type 0,1,2
+    REAL(KIND=JPRB), INTENT(IN) :: PLU(KLON, KLEV)    ! Conv. condensate
+    REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(KLON, KLEV)    ! Conv. detrained water
+    REAL(KIND=JPRB), INTENT(IN) :: PSNDE(KLON, KLEV)    ! Conv. detrained snow
+    REAL(KIND=JPRB), INTENT(IN) :: PMFU(KLON, KLEV)    ! Conv. mass flux up
+    REAL(KIND=JPRB), INTENT(IN) :: PMFD(KLON, KLEV)    ! Conv. mass flux down
+    REAL(KIND=JPRB), INTENT(IN) :: PA(KLON, KLEV)
+    ! Original Cloud fraction (t)
+
+    REAL(KIND=JPRB), INTENT(IN) :: PCLV(KLON, KLEV, NCLV)
+
+    ! Supersat clipped at previous time level in SLTEND
+    REAL(KIND=JPRB), INTENT(IN) :: PSUPSAT(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(OUT) :: PCOVPTOT(KLON, KLEV)    ! Precip fraction
+    REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(KLON)
+    ! Flux diagnostics for DDH budget
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(KLON, KLEV + 1)    ! Flux of liquid
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(KLON, KLEV + 1)    ! Flux of ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(KLON, KLEV + 1)    ! -ve corr for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(KLON, KLEV + 1)    ! -ve corr for ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(KLON, KLEV + 1)    ! Flux diagnostics
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(KLON, KLEV + 1)    !    for DDH, generic
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(KLON, KLEV + 1)    ! rain
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(KLON, KLEV + 1)    ! snow
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(KLON, KLEV + 1)    ! liquid flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(KLON, KLEV + 1)    ! ice flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(KLON, KLEV + 1)    ! liq+rain sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(KLON, KLEV + 1)    ! ice+snow sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(KLON, KLEV + 1)    ! Enthalpy flux for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(KLON, KLEV + 1)
+    ! Enthalp flux for ice
+
+    TYPE(tecldp), INTENT(INOUT) :: YRECLDP
+
+    !-------------------------------------------------------------------------------
+    !                       Declare local variables
+    !-------------------------------------------------------------------------------
+
+    REAL(KIND=JPRB) :: ZLCOND1, ZLCOND2, ZLEVAP, ZLEROS, ZLEVAPL, ZLEVAPI, ZRAINAUT, ZSNOWAUT, ZLIQCLD, ZICECLD
+    !  condensation and evaporation terms
+    ! autoconversion terms
+    REAL(KIND=JPRB) :: ZFOKOOP
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZFOEALFA(KLON, KLEV + 1)
+    REAL(KIND=JPRB) :: ZICENUCLEI
+    ! number concentration of ice nuclei
+
+    REAL(KIND=JPRB) :: ZLICLD
+    REAL(KIND=JPRB) :: ZACOND
+    REAL(KIND=JPRB) :: ZAEROS
+    REAL(KIND=JPRB) :: ZLFINALSUM
+    REAL(KIND=JPRB) :: ZDQS
+    REAL(KIND=JPRB) :: ZTOLD
+    REAL(KIND=JPRB) :: ZQOLD
+    REAL(KIND=JPRB) :: ZDTGDP
+    REAL(KIND=JPRB) :: ZRDTGDP
+    REAL(KIND=JPRB) :: ZTRPAUS
+    REAL(KIND=JPRB) :: ZCOVPCLR
+    REAL(KIND=JPRB) :: ZPRECLR
+    REAL(KIND=JPRB) :: ZCOVPTOT
+    REAL(KIND=JPRB) :: ZCOVPMAX
+    REAL(KIND=JPRB) :: ZQPRETOT
+    REAL(KIND=JPRB) :: ZDPEVAP
+    REAL(KIND=JPRB) :: ZDTFORC
+    REAL(KIND=JPRB) :: ZDTDIAB
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZTP1(KLON, KLEV)
+    REAL(KIND=JPRB) :: ZLDEFR
+    REAL(KIND=JPRB) :: ZLDIFDT
+    REAL(KIND=JPRB) :: ZDTGDPF
+    REAL(KIND=JPRB) :: ZLCUST(NCLV)
+    REAL(KIND=JPRB) :: ZACUST
+    REAL(KIND=JPRB) :: ZMF
+
+    REAL(KIND=JPRB) :: ZRHO
+    REAL(KIND=JPRB) :: ZTMP1, ZTMP2, ZTMP3
+    REAL(KIND=JPRB) :: ZTMP4, ZTMP5, ZTMP6, ZTMP7
+    REAL(KIND=JPRB) :: ZALFAWM
+
+    ! Accumulators of A,B,and C factors for cloud equations
+    REAL(KIND=JPRB) :: ZSOLAB    ! -ve implicit CC
+    REAL(KIND=JPRB) :: ZSOLAC    ! linear CC
+    REAL(KIND=JPRB) :: ZANEW
+    REAL(KIND=JPRB) :: ZANEWM1
+
+    REAL(KIND=JPRB) :: ZGDP
+
+    !---for flux calculation
+    REAL(KIND=JPRB) :: ZDA
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZLI(KLON, KLEV), ZA(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZAORIG(KLON, KLEV)
+    ! start of scheme value for CC
+
+    LOGICAL :: LLFLAG
+    LOGICAL :: LLO1
+
+    INTEGER(KIND=JPIM) :: ICALL, IK, JK, JL, JM, JN, JO, JLEN, IS
+
+    REAL(KIND=JPRB) :: ZDP, ZPAPHD
+
+    REAL(KIND=JPRB) :: ZALFA
+    ! & ZALFACU, ZALFALS
+    REAL(KIND=JPRB) :: ZALFAW
+    REAL(KIND=JPRB) :: ZBETA, ZBETA1
+    !REAL(KIND=JPRB) :: ZBOTT
+    REAL(KIND=JPRB) :: ZCFPR
+    REAL(KIND=JPRB) :: ZCOR
+    REAL(KIND=JPRB) :: ZCDMAX
+    REAL(KIND=JPRB) :: ZMIN
+    REAL(KIND=JPRB) :: ZLCONDLIM
+    REAL(KIND=JPRB) :: ZDENOM
+    REAL(KIND=JPRB) :: ZDPMXDT
+    REAL(KIND=JPRB) :: ZDPR
+    REAL(KIND=JPRB) :: ZDTDP
+    REAL(KIND=JPRB) :: ZE
+    REAL(KIND=JPRB) :: ZEPSEC
+    REAL(KIND=JPRB) :: ZFAC, ZFACI, ZFACW
+    REAL(KIND=JPRB) :: ZGDCP
+    REAL(KIND=JPRB) :: ZINEW
+    REAL(KIND=JPRB) :: ZLCRIT
+    REAL(KIND=JPRB) :: ZMFDN
+    REAL(KIND=JPRB) :: ZPRECIP
+    REAL(KIND=JPRB) :: ZQE
+    REAL(KIND=JPRB) :: ZQSAT, ZQTMST, ZRDCP
+    REAL(KIND=JPRB) :: ZRHC, ZSIG, ZSIGK
+    REAL(KIND=JPRB) :: ZWTOT
+    REAL(KIND=JPRB) :: ZZCO, ZZDL, ZZRH, ZZZDT, ZQADJ
+    REAL(KIND=JPRB) :: ZQNEW, ZTNEW
+    REAL(KIND=JPRB) :: ZRG_R, ZGDPH_R, ZCONS1, ZCOND, ZCONS1A
+    REAL(KIND=JPRB) :: ZLFINAL
+    REAL(KIND=JPRB) :: ZMELT
+    REAL(KIND=JPRB) :: ZEVAP
+    REAL(KIND=JPRB) :: ZFRZ
+    REAL(KIND=JPRB) :: ZVPLIQ, ZVPICE
+    REAL(KIND=JPRB) :: ZADD, ZBDD, ZCVDS, ZICE0, ZDEPOS
+    REAL(KIND=JPRB) :: ZSUPSAT
+    REAL(KIND=JPRB) :: ZFALL
+    REAL(KIND=JPRB) :: ZRE_ICE
+    REAL(KIND=JPRB) :: ZRLDCP
+    REAL(KIND=JPRB) :: ZQP1ENV
+
+    !----------------------------
+    ! Arrays for new microphysics
+    !----------------------------
+    INTEGER(KIND=JPIM) :: IPHASE(NCLV)
+    ! marker for water phase of each species
+    ! 0=vapour, 1=liquid, 2=ice
+
+    INTEGER(KIND=JPIM) :: IMELT(NCLV)
+    ! marks melting linkage for ice categories
+    ! ice->liquid, snow->rain
+
+    LOGICAL :: LLFALL(NCLV)
+    ! marks falling species
+    ! LLFALL=0, cloud cover must > 0 for zqx > 0
+    ! LLFALL=1, no cloud needed, zqx can evaporate
+
+    LOGICAL :: LLINDEX1(NCLV)    ! index variable
+    LOGICAL :: LLINDEX3(NCLV, NCLV)    ! index variable
+    REAL(KIND=JPRB) :: ZMAX
+    REAL(KIND=JPRB) :: ZRAT
+    INTEGER(KIND=JPIM) :: IORDER(NCLV)
+    ! array for sorting explicit terms
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZLIQFRAC(KLON, KLEV)    ! cloud liquid water fraction: ql/(ql+qi)
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZICEFRAC(KLON, KLEV)    ! cloud ice water fraction: qi/(ql+qi)
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZQX(KLON, KLEV, NCLV)    ! water variables
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZQX0(KLON, KLEV, NCLV)    ! water variables at start of scheme
+    REAL(KIND=JPRB) :: ZQXN(NCLV)    ! new values for zqx at time+1
+    REAL(KIND=JPRB) :: ZQXFG(NCLV)    ! first guess values including precip
+    REAL(KIND=JPRB) :: ZQXNM1(NCLV)    ! new values for zqx at time+1 at level above
+    REAL(KIND=JPRB) :: ZFLUXQ(NCLV)
+    ! fluxes convergence of species (needed?)
+    ! Keep the following for possible future total water variance scheme?
+    !REAL(KIND=JPRB) :: ZTL(KLON,KLEV)       ! liquid water temperature
+    !REAL(KIND=JPRB) :: ZABETA(KLON,KLEV)    ! cloud fraction
+    !REAL(KIND=JPRB) :: ZVAR(KLON,KLEV)      ! temporary variance
+    !REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV)
+    !REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV)
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZPFPLSX(KLON, KLEV + 1, NCLV)    ! generalized precipitation flux
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZLNEG(KLON, KLEV, NCLV)    ! for negative correction diagnostics
+    REAL(KIND=JPRB) :: ZMELTMAX
+    REAL(KIND=JPRB) :: ZFRZMAX
+    REAL(KIND=JPRB) :: ZICETOT
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZQXN2D(KLON, KLEV, NCLV)
+    ! water variables store
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZQSMIX(KLON, KLEV)
+    ! diagnostic mixed phase saturation
+    !REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZQSLIQ(KLON, KLEV)    ! liquid water saturation
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZQSICE(KLON, KLEV)
+    ! ice water saturation
+
+    !REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH
+    !REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq
+    !REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice
+
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZFOEEWMT(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZFOEEW(KLON, KLEV)
+    REAL(KIND=JPRB), INTENT(INOUT) :: ZFOEELIQT(KLON, KLEV)
+    !REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV)
+
+    REAL(KIND=JPRB) :: ZDQSLIQDT, ZDQSICEDT, ZDQSMIXDT
+    REAL(KIND=JPRB) :: ZCORQSLIQ
+    REAL(KIND=JPRB) :: ZCORQSICE
+    !REAL(KIND=JPRB) :: ZCORQSBIN(KLON)
+    REAL(KIND=JPRB) :: ZCORQSMIX
+    REAL(KIND=JPRB) :: ZEVAPLIMLIQ, ZEVAPLIMICE, ZEVAPLIMMIX
+
+    !-------------------------------------------------------
+    ! SOURCE/SINK array for implicit and explicit terms
+    !-------------------------------------------------------
+    ! a POSITIVE value entered into the arrays is a...
+    !            Source of this variable
+    !            |
+    !            |   Sink of this variable
+    !            |   |
+    !            V   V
+    ! ZSOLQA(JL,IQa,IQb)  = explicit terms
+    ! ZSOLQB(JL,IQa,IQb)  = implicit terms
+    ! Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is
+    ! a source of NCLDQL and a sink of IQV
+    ! put 'magic' source terms such as PLUDE from
+    ! detrainment into explicit source/sink array diagnognal
+    ! ZSOLQA(NCLDQL,NCLDQL)= -PLUDE
+    ! i.e. A positive value is a sink!????? weird...
+    !-------------------------------------------------------
+
+    REAL(KIND=JPRB) :: ZSOLQA(NCLV, NCLV)    ! explicit sources and sinks
+    REAL(KIND=JPRB) :: ZSOLQB(NCLV, NCLV)
+    ! implicit sources and sinks
+    ! e.g. microphysical pathways between ice variables.
+    REAL(KIND=JPRB) :: ZQLHS(NCLV, NCLV)    ! n x n matrix storing the LHS of implicit solver
+    REAL(KIND=JPRB) :: ZVQX(NCLV)    ! fall speeds of three categories
+    REAL(KIND=JPRB) :: ZEXPLICIT, ZRATIO(NCLV), ZSINKSUM(NCLV)
+
+    ! for sedimentation source/sink terms
+    REAL(KIND=JPRB) :: ZFALLSINK(NCLV)
+    REAL(KIND=JPRB) :: ZFALLSRCE(NCLV)
+
+    ! for convection detrainment source and subsidence source/sink terms
+    REAL(KIND=JPRB) :: ZCONVSRCE(NCLV)
+    REAL(KIND=JPRB) :: ZCONVSINK(NCLV)
+
+    ! for supersaturation source term from previous timestep
+    REAL(KIND=JPRB) :: ZPSUPSATSRCE(NCLV)
+
+    ! Numerical fit to wet bulb temperature
+    REAL(KIND=JPRB), PARAMETER :: ZTW1 = 1329.31_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW2 = 0.0074615_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW3 = 0.85E5_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW4 = 40.637_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW5 = 275.0_JPRB
+
+    REAL(KIND=JPRB) :: ZSUBSAT    ! Subsaturation for snow melting term
+    REAL(KIND=JPRB) :: ZTDMTW0
+    ! Diff between dry-bulb temperature and
+    ! temperature when wet-bulb = 0degC
+
+    ! Variables for deposition term
+    REAL(KIND=JPRB) :: ZTCG    ! Temperature dependent function for ice PSD
+    REAL(KIND=JPRB) :: ZFACX1I, ZFACX1S    ! PSD correction factor
+    REAL(KIND=JPRB) :: ZAPLUSB, ZCORRFAC, ZCORRFAC2, ZPR02, ZTERM1, ZTERM2    ! for ice dep
+    REAL(KIND=JPRB) :: ZCLDTOPDIST    ! Distance from cloud top
+    REAL(KIND=JPRB) :: ZINFACTOR
+    ! No. of ice nuclei factor for deposition
+
+    ! Autoconversion/accretion/riming/evaporation
+    INTEGER(KIND=JPIM) :: IWARMRAIN
+    INTEGER(KIND=JPIM) :: IEVAPRAIN
+    INTEGER(KIND=JPIM) :: IEVAPSNOW
+    INTEGER(KIND=JPIM) :: IDEPICE
+    REAL(KIND=JPRB) :: ZRAINACC
+    REAL(KIND=JPRB) :: ZRAINCLD
+    REAL(KIND=JPRB) :: ZSNOWRIME
+    REAL(KIND=JPRB) :: ZSNOWCLD
+    REAL(KIND=JPRB) :: ZESATLIQ
+    REAL(KIND=JPRB) :: ZFALLCORR
+    REAL(KIND=JPRB) :: ZLAMBDA
+    REAL(KIND=JPRB) :: ZEVAP_DENOM
+    REAL(KIND=JPRB) :: ZCORR2
+    REAL(KIND=JPRB) :: ZKA
+    REAL(KIND=JPRB) :: ZCONST
+    REAL(KIND=JPRB) :: ZTEMP
+
+    ! Rain freezing
+    LOGICAL :: LLRAINLIQ
+    ! True if majority of raindrops are liquid (no ice core)
+
+    !----------------------------
+    ! End: new microphysics
+    !----------------------------
+
+    !----------------------
+    ! SCM budget statistics
+    !----------------------
+    REAL(KIND=JPRB) :: ZRAIN
+
+    REAL(KIND=JPRB) :: ZHOOK_HANDLE
+    REAL(KIND=JPRB) :: ZTMPL, ZTMPI, ZTMPA
+
+    REAL(KIND=JPRB) :: ZMM, ZRR
+    REAL(KIND=JPRB) :: ZRG
+
+    REAL(KIND=JPRB) :: ZZSUM, ZZRATIO
+    REAL(KIND=JPRB) :: ZEPSILON
+
+    REAL(KIND=JPRB) :: ZCOND1, ZQP
+
+    REAL(KIND=JPRB) :: PSUM_SOLQA
+
+
+#include "fcttre.func.h"
+#include "fccld.func.h"
+!$acc routine seq
+
+
+    !===============================================================================
+    !IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE)
+
+    !===============================================================================
+    !  0.0     Beginning of timestep book-keeping
+    !----------------------------------------------------------------------
+
+
+    !######################################################################
+    !             0.  *** SET UP CONSTANTS ***
+    !######################################################################
+
+    ZEPSILON = 100._JPRB*EPSILON(ZEPSILON)
+
+    ! ---------------------------------------------------------------------
+    ! Set version of warm-rain autoconversion/accretion
+    ! IWARMRAIN = 1 ! Sundquist
+    ! IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000)
+    ! ---------------------------------------------------------------------
+    IWARMRAIN = 2
+    ! ---------------------------------------------------------------------
+    ! Set version of rain evaporation
+    ! IEVAPRAIN = 1 ! Sundquist
+    ! IEVAPRAIN = 2 ! Abel and Boutle (2013)
+    ! ---------------------------------------------------------------------
+    IEVAPRAIN = 2
+    ! ---------------------------------------------------------------------
+    ! Set version of snow evaporation
+    ! IEVAPSNOW = 1 ! Sundquist
+    ! IEVAPSNOW = 2 ! New
+    ! ---------------------------------------------------------------------
+    IEVAPSNOW = 1
+    ! ---------------------------------------------------------------------
+    ! Set version of ice deposition
+    ! IDEPICE = 1 ! Rotstayn (2001)
+    ! IDEPICE = 2 ! New
+    ! ---------------------------------------------------------------------
+    IDEPICE = 1
+
+    ! ---------------------
+    ! Some simple constants
+    ! ---------------------
+    ZQTMST = 1.0_JPRB / PTSPHY
+    ZGDCP = RG / RCPD
+    ZRDCP = RD / RCPD
+    ZCONS1A = RCPD / ((RLMLT*RG*YRECLDP%RTAUMEL))
+    ZEPSEC = 1.E-14_JPRB
+    ZRG_R = 1.0_JPRB / RG
+    ZRLDCP = 1.0_JPRB / (RALSDCP - RALVDCP)
+
+    ! Note: Defined in module/yoecldp.F90
+    ! NCLDQL=1    ! liquid cloud water
+    ! NCLDQI=2    ! ice cloud water
+    ! NCLDQR=3    ! rain water
+    ! NCLDQS=4    ! snow
+    ! NCLDQV=5    ! vapour
+
+    ! -----------------------------------------------
+    ! Define species phase, 0=vapour, 1=liquid, 2=ice
+    ! -----------------------------------------------
+    IPHASE(NCLDQV) = 0
+    IPHASE(NCLDQL) = 1
+    IPHASE(NCLDQR) = 1
+    IPHASE(NCLDQI) = 2
+    IPHASE(NCLDQS) = 2
+
+    ! ---------------------------------------------------
+    ! Set up melting/freezing index,
+    ! if an ice category melts/freezes, where does it go?
+    ! ---------------------------------------------------
+    IMELT(NCLDQV) = -99
+    IMELT(NCLDQL) = NCLDQI
+    IMELT(NCLDQR) = NCLDQS
+    IMELT(NCLDQI) = NCLDQR
+    IMELT(NCLDQS) = NCLDQR
+
+    ! -----------------------------------------------
+    ! INITIALIZATION OF OUTPUT TENDENCIES
+    ! -----------------------------------------------
+!$acc loop seq
+    DO JK=1,KLEV
+      TENDENCY_LOC_T(JL, JK) = 0.0_JPRB
+      TENDENCY_LOC_Q(JL, JK) = 0.0_JPRB
+      TENDENCY_LOC_A(JL, JK) = 0.0_JPRB
+    END DO
+!$acc loop seq
+    DO JM=1,NCLV - 1
+      DO JK=1,KLEV
+        TENDENCY_LOC_CLD(JL, JK, JM) = 0.0_JPRB
+      END DO
+    END DO
+
+    !-- These were uninitialized : meaningful only when we compare error differences
+!$acc loop seq
+    DO JK=1,KLEV
+      PCOVPTOT(JL, JK) = 0.0_JPRB
+      TENDENCY_LOC_CLD(JL, JK, NCLV) = 0.0_JPRB
+    END DO
+
+    ! -------------------------
+    ! set up fall speeds in m/s
+    ! -------------------------
+    ZVQX(NCLDQV) = 0.0_JPRB
+    ZVQX(NCLDQL) = 0.0_JPRB
+    ZVQX(NCLDQI) = YRECLDP%RVICE
+    ZVQX(NCLDQR) = YRECLDP%RVRAIN
+    ZVQX(NCLDQS) = YRECLDP%RVSNOW
+    LLFALL(:) = .false.
+!$acc loop seq
+    DO JM=1,NCLV
+      IF (ZVQX(JM) > 0.0_JPRB)       LLFALL(JM) = .true.
+      ! falling species
+    END DO
+    ! Set LLFALL to false for ice (but ice still sediments!)
+    ! Need to rationalise this at some point
+    LLFALL(NCLDQI) = .false.
+
+
+    !######################################################################
+    !             1.  *** INITIAL VALUES FOR VARIABLES ***
+    !######################################################################
+
+
+    ! ----------------------
+    ! non CLV initialization
+    ! ----------------------
+!$acc loop seq
+    DO JK=1,KLEV
+      ZTP1(JL, JK) = PT(JL, JK) + PTSPHY*TENDENCY_TMP_T(JL, JK)
+      ZQX(JL, JK, NCLDQV) = PQ(JL, JK) + PTSPHY*TENDENCY_TMP_Q(JL, JK)
+      ZQX0(JL, JK, NCLDQV) = PQ(JL, JK) + PTSPHY*TENDENCY_TMP_Q(JL, JK)
+      ZA(JL, JK) = PA(JL, JK) + PTSPHY*TENDENCY_TMP_A(JL, JK)
+      ZAORIG(JL, JK) = PA(JL, JK) + PTSPHY*TENDENCY_TMP_A(JL, JK)
+    END DO
+
+    ! -------------------------------------
+    ! initialization for CLV family
+    ! -------------------------------------
+!$acc loop seq
+    DO JM=1,NCLV - 1
+      DO JK=1,KLEV
+        ZQX(JL, JK, JM) = PCLV(JL, JK, JM) + PTSPHY*TENDENCY_TMP_CLD(JL, JK, JM)
+        ZQX0(JL, JK, JM) = PCLV(JL, JK, JM) + PTSPHY*TENDENCY_TMP_CLD(JL, JK, JM)
+      END DO
+    END DO
+
+    !-------------
+    ! zero arrays
+    !-------------
+!$acc loop seq
+    DO JM=1,NCLV
+      DO JK=1,KLEV + 1
+        ZPFPLSX(JL, JK, JM) = 0.0_JPRB          ! precip fluxes
+      END DO
+    END DO
+
+!$acc loop seq
+    DO JM=1,NCLV
+      DO JK=1,KLEV
+        ZQXN2D(JL, JK, JM) = 0.0_JPRB          ! end of timestep values in 2D
+        ZLNEG(JL, JK, JM) = 0.0_JPRB          ! negative input check
+      END DO
+    END DO
+
+    PRAINFRAC_TOPRFZ(JL) = 0.0_JPRB      ! rain fraction at top of refreezing layer
+    LLRAINLIQ = .true.      ! Assume all raindrops are liquid initially
+
+    ! ----------------------------------------------------
+    ! Tidy up very small cloud cover or total cloud water
+    ! ----------------------------------------------------
+!$acc loop seq
+    DO JK=1,KLEV
+      IF (ZQX(JL, JK, NCLDQL) + ZQX(JL, JK, NCLDQI) < YRECLDP%RLMIN .or. ZA(JL, JK) < YRECLDP%RAMIN) THEN
+
+        ! Evaporate small cloud liquid water amounts
+        ZLNEG(JL, JK, NCLDQL) = ZLNEG(JL, JK, NCLDQL) + ZQX(JL, JK, NCLDQL)
+        ZQADJ = ZQX(JL, JK, NCLDQL)*ZQTMST
+        TENDENCY_LOC_Q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + ZQADJ
+        TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALVDCP*ZQADJ
+        ZQX(JL, JK, NCLDQV) = ZQX(JL, JK, NCLDQV) + ZQX(JL, JK, NCLDQL)
+        ZQX(JL, JK, NCLDQL) = 0.0_JPRB
+
+        ! Evaporate small cloud ice water amounts
+        ZLNEG(JL, JK, NCLDQI) = ZLNEG(JL, JK, NCLDQI) + ZQX(JL, JK, NCLDQI)
+        ZQADJ = ZQX(JL, JK, NCLDQI)*ZQTMST
+        TENDENCY_LOC_Q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + ZQADJ
+        TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALSDCP*ZQADJ
+        ZQX(JL, JK, NCLDQV) = ZQX(JL, JK, NCLDQV) + ZQX(JL, JK, NCLDQI)
+        ZQX(JL, JK, NCLDQI) = 0.0_JPRB
+
+        ! Set cloud cover to zero
+        ZA(JL, JK) = 0.0_JPRB
+
+      END IF
+    END DO
+
+    ! ---------------------------------
+    ! Tidy up small CLV variables
+    ! ---------------------------------
+    !DIR$ IVDEP
+!$acc loop seq
+    DO JM=1,NCLV - 1
+      !DIR$ IVDEP
+      DO JK=1,KLEV
+        !DIR$ IVDEP
+        IF (ZQX(JL, JK, JM) < YRECLDP%RLMIN) THEN
+          ZLNEG(JL, JK, JM) = ZLNEG(JL, JK, JM) + ZQX(JL, JK, JM)
+          ZQADJ = ZQX(JL, JK, JM)*ZQTMST
+          TENDENCY_LOC_Q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + ZQADJ
+          IF (IPHASE(JM) == 1)           TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALVDCP*ZQADJ
+          IF (IPHASE(JM) == 2)           TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALSDCP*ZQADJ
+          ZQX(JL, JK, NCLDQV) = ZQX(JL, JK, NCLDQV) + ZQX(JL, JK, JM)
+          ZQX(JL, JK, JM) = 0.0_JPRB
+        END IF
+      END DO
+    END DO
+
+
+    ! ------------------------------
+    ! Define saturation values
+    ! ------------------------------
+!$acc loop seq
+    DO JK=1,KLEV
+      !----------------------------------------
+      ! old *diagnostic* mixed phase saturation
+      !----------------------------------------
+      ZFOEALFA(JL, JK) = FOEALFA(ZTP1(JL, JK))
+      ZFOEEWMT(JL, JK) = MIN(FOEEWM(ZTP1(JL, JK)) / PAP(JL, JK), 0.5_JPRB)
+      ZQSMIX(JL, JK) = ZFOEEWMT(JL, JK)
+      ZQSMIX(JL, JK) = ZQSMIX(JL, JK) / (1.0_JPRB - RETV*ZQSMIX(JL, JK))
+
+      !---------------------------------------------
+      ! ice saturation T<273K
+      ! liquid water saturation for T>273K
+      !---------------------------------------------
+      ZALFA = FOEDELTA(ZTP1(JL, JK))
+      ZFOEEW(JL, JK) = MIN((ZALFA*FOEELIQ(ZTP1(JL, JK)) + (1.0_JPRB - ZALFA)*FOEEICE(ZTP1(JL, JK))) / PAP(JL, JK), 0.5_JPRB)
+      ZFOEEW(JL, JK) = MIN(0.5_JPRB, ZFOEEW(JL, JK))
+      ZQSICE(JL, JK) = ZFOEEW(JL, JK) / (1.0_JPRB - RETV*ZFOEEW(JL, JK))
+
+      !----------------------------------
+      ! liquid water saturation
+      !----------------------------------
+      ZFOEELIQT(JL, JK) = MIN(FOEELIQ(ZTP1(JL, JK)) / PAP(JL, JK), 0.5_JPRB)
+      ZQSLIQ(JL, JK) = ZFOEELIQT(JL, JK)
+      ZQSLIQ(JL, JK) = ZQSLIQ(JL, JK) / (1.0_JPRB - RETV*ZQSLIQ(JL, JK))
+
+      !   !----------------------------------
+      !   ! ice water saturation
+      !   !----------------------------------
+      !   ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB)
+      !   ZQSICE(JL,JK)=ZFOEEICET(JL,JK)
+      !   ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK))
+
+    END DO
+
+!$acc loop seq
+    DO JK=1,KLEV
+
+
+      !------------------------------------------
+      ! Ensure cloud fraction is between 0 and 1
+      !------------------------------------------
+      ZA(JL, JK) = MAX(0.0_JPRB, MIN(1.0_JPRB, ZA(JL, JK)))
+
+      !-------------------------------------------------------------------
+      ! Calculate liq/ice fractions (no longer a diagnostic relationship)
+      !-------------------------------------------------------------------
+      ZLI(JL, JK) = ZQX(JL, JK, NCLDQL) + ZQX(JL, JK, NCLDQI)
+      IF (ZLI(JL, JK) > YRECLDP%RLMIN) THEN
+        ZLIQFRAC(JL, JK) = ZQX(JL, JK, NCLDQL) / ZLI(JL, JK)
+        ZICEFRAC(JL, JK) = 1.0_JPRB - ZLIQFRAC(JL, JK)
+      ELSE
+        ZLIQFRAC(JL, JK) = 0.0_JPRB
+        ZICEFRAC(JL, JK) = 0.0_JPRB
+      END IF
+
+    END DO
+
+    !######################################################################
+    !        2.       *** CONSTANTS AND PARAMETERS ***
+    !######################################################################
+    !  Calculate L in updrafts of bl-clouds
+    !  Specify QS, P/PS for tropopause (for c2)
+    !  And initialize variables
+    !------------------------------------------
+
+    !---------------------------------
+    ! Find tropopause level (ZTRPAUS)
+    !---------------------------------
+    ZTRPAUS = 0.1_JPRB
+    ZPAPHD = 1.0_JPRB / PAPH(JL, KLEV + 1)
+!$acc loop seq
+    DO JK=1,KLEV - 1
+      ZSIG = PAP(JL, JK)*ZPAPHD
+      IF (ZSIG > 0.1_JPRB .and. ZSIG < 0.4_JPRB .and. ZTP1(JL, JK) > ZTP1(JL, JK + 1)) THEN
+        ZTRPAUS = ZSIG
+      END IF
+    END DO
+
+    !-----------------------------
+    ! Reset single level variables
+    !-----------------------------
+
+    ZANEWM1 = 0.0_JPRB
+    ZDA = 0.0_JPRB
+    ZCOVPCLR = 0.0_JPRB
+    ZCOVPMAX = 0.0_JPRB
+    ZCOVPTOT = 0.0_JPRB
+    ZCLDTOPDIST = 0.0_JPRB
+
+    !######################################################################
+    !           3.       *** PHYSICS ***
+    !######################################################################
+
+
+    !----------------------------------------------------------------------
+    !                       START OF VERTICAL LOOP
+    !----------------------------------------------------------------------
+
+!$acc loop seq
+    DO JK=YRECLDP%NCLDTOP,KLEV
+
+      !----------------------------------------------------------------------
+      ! 3.0 INITIALIZE VARIABLES
+      !----------------------------------------------------------------------
+
+      !---------------------------------
+      ! First guess microphysics
+      !---------------------------------
+      DO JM=1,NCLV
+        ZQXFG(JM) = ZQX(JL, JK, JM)
+      END DO
+
+      !---------------------------------
+      ! Set KLON arrays to zero
+      !---------------------------------
+
+      ZLICLD = 0.0_JPRB
+      ZRAINAUT = 0.0_JPRB        ! currently needed for diags
+      ZRAINACC = 0.0_JPRB        ! currently needed for diags
+      ZSNOWAUT = 0.0_JPRB        ! needed
+      ZLDEFR = 0.0_JPRB
+      ZACUST = 0.0_JPRB        ! set later when needed
+      ZQPRETOT = 0.0_JPRB
+      ZLFINALSUM = 0.0_JPRB
+
+      ! Required for first guess call
+      ZLCOND1 = 0.0_JPRB
+      ZLCOND2 = 0.0_JPRB
+      ZSUPSAT = 0.0_JPRB
+      ZLEVAPL = 0.0_JPRB
+      ZLEVAPI = 0.0_JPRB
+
+      !-------------------------------------
+      ! solvers for cloud fraction
+      !-------------------------------------
+      ZSOLAB = 0.0_JPRB
+      ZSOLAC = 0.0_JPRB
+
+      ZICETOT = 0.0_JPRB
+
+      !------------------------------------------
+      ! reset matrix so missing pathways are set
+      !------------------------------------------
+      DO JM=1,NCLV
+        DO JN=1,NCLV
+          ZSOLQB(JN, JM) = 0.0_JPRB
+          ZSOLQA(JN, JM) = 0.0_JPRB
+        END DO
+      END DO
+
+      !----------------------------------
+      ! reset new microphysics variables
+      !----------------------------------
+      DO JM=1,NCLV
+        ZFALLSRCE(JM) = 0.0_JPRB
+        ZFALLSINK(JM) = 0.0_JPRB
+        ZCONVSRCE(JM) = 0.0_JPRB
+        ZCONVSINK(JM) = 0.0_JPRB
+        ZPSUPSATSRCE(JM) = 0.0_JPRB
+        ZRATIO(JM) = 0.0_JPRB
+      END DO
+
+
+      !-------------------------
+      ! derived variables needed
+      !-------------------------
+
+      ZDP = PAPH(JL, JK + 1) - PAPH(JL, JK)        ! dp
+      ZGDP = RG / ZDP        ! g/dp
+      ZRHO = PAP(JL, JK) / ((RD*ZTP1(JL, JK)))        ! p/RT air density
+
+      ZDTGDP = PTSPHY*ZGDP        ! dt g/dp
+      ZRDTGDP = ZDP*(1.0_JPRB / ((PTSPHY*RG)))        ! 1/(dt g/dp)
+
+      IF (JK > 1)       ZDTGDPF = (PTSPHY*RG) / (PAP(JL, JK) - PAP(JL, JK - 1))
+
+      !------------------------------------
+      ! Calculate dqs/dT correction factor
+      !------------------------------------
+      ! Reminder: RETV=RV/RD-1
+
+      ! liquid
+      ZFACW = R5LES / ((ZTP1(JL, JK) - R4LES)**2)
+      ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEELIQT(JL, JK))
+      ZDQSLIQDT = ZFACW*ZCOR*ZQSLIQ(JL, JK)
+      ZCORQSLIQ = 1.0_JPRB + RALVDCP*ZDQSLIQDT
+
+      ! ice
+      ZFACI = R5IES / ((ZTP1(JL, JK) - R4IES)**2)
+      ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEEW(JL, JK))
+      ZDQSICEDT = ZFACI*ZCOR*ZQSICE(JL, JK)
+      ZCORQSICE = 1.0_JPRB + RALSDCP*ZDQSICEDT
+
+      ! diagnostic mixed
+      ZALFAW = ZFOEALFA(JL, JK)
+      ZALFAWM = ZALFAW
+      ZFAC = ZALFAW*ZFACW + (1.0_JPRB - ZALFAW)*ZFACI
+      ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEEWMT(JL, JK))
+      ZDQSMIXDT = ZFAC*ZCOR*ZQSMIX(JL, JK)
+      ZCORQSMIX = 1.0_JPRB + FOELDCPM(ZTP1(JL, JK))*ZDQSMIXDT
+
+      ! evaporation/sublimation limits
+      ZEVAPLIMMIX = MAX((ZQSMIX(JL, JK) - ZQX(JL, JK, NCLDQV)) / ZCORQSMIX, 0.0_JPRB)
+      ZEVAPLIMLIQ = MAX((ZQSLIQ(JL, JK) - ZQX(JL, JK, NCLDQV)) / ZCORQSLIQ, 0.0_JPRB)
+      ZEVAPLIMICE = MAX((ZQSICE(JL, JK) - ZQX(JL, JK, NCLDQV)) / ZCORQSICE, 0.0_JPRB)
+
+      !--------------------------------
+      ! in-cloud consensate amount
+      !--------------------------------
+      ZTMPA = 1.0_JPRB / MAX(ZA(JL, JK), ZEPSEC)
+      ZLIQCLD = ZQX(JL, JK, NCLDQL)*ZTMPA
+      ZICECLD = ZQX(JL, JK, NCLDQI)*ZTMPA
+      ZLICLD = ZLIQCLD + ZICECLD
+
+
+      !------------------------------------------------
+      ! Evaporate very small amounts of liquid and ice
+      !------------------------------------------------
+
+      IF (ZQX(JL, JK, NCLDQL) < YRECLDP%RLMIN) THEN
+        ZSOLQA(NCLDQV, NCLDQL) = ZQX(JL, JK, NCLDQL)
+        ZSOLQA(NCLDQL, NCLDQV) = -ZQX(JL, JK, NCLDQL)
+      END IF
+
+      IF (ZQX(JL, JK, NCLDQI) < YRECLDP%RLMIN) THEN
+        ZSOLQA(NCLDQV, NCLDQI) = ZQX(JL, JK, NCLDQI)
+        ZSOLQA(NCLDQI, NCLDQV) = -ZQX(JL, JK, NCLDQI)
+      END IF
+
+
+      !---------------------------------------------------------------------
+      !  3.1  ICE SUPERSATURATION ADJUSTMENT
+      !---------------------------------------------------------------------
+      ! Note that the supersaturation adjustment is made with respect to
+      ! liquid saturation:  when T>0C
+      ! ice saturation:     when T<0C
+      !                     with an adjustment made to allow for ice
+      !                     supersaturation in the clear sky
+      ! Note also that the KOOP factor automatically clips the supersaturation
+      ! to a maximum set by the liquid water saturation mixing ratio
+      ! important for temperatures near to but below 0C
+      !-----------------------------------------------------------------------
+
+      !DIR$ NOFUSION
+
+      !-----------------------------------
+      ! 3.1.1 Supersaturation limit (from Koop)
+      !-----------------------------------
+      ! Needs to be set for all temperatures
+      ZFOKOOP = FOKOOP(ZTP1(JL, JK))
+
+      IF (ZTP1(JL, JK) >= RTT .or. YRECLDP%NSSOPT == 0) THEN
+        ZFAC = 1.0_JPRB
+        ZFACI = 1.0_JPRB
+      ELSE
+        ZFAC = ZA(JL, JK) + ZFOKOOP*(1.0_JPRB - ZA(JL, JK))
+        ZFACI = PTSPHY / YRECLDP%RKOOPTAU
+      END IF
+
+      !-------------------------------------------------------------------
+      ! 3.1.2 Calculate supersaturation wrt Koop including dqs/dT
+      !       correction factor
+      ! [#Note: QSICE or QSLIQ]
+      !-------------------------------------------------------------------
+
+      ! Calculate supersaturation to add to cloud
+      IF (ZA(JL, JK) > 1.0_JPRB - YRECLDP%RAMIN) THEN
+        ZSUPSAT = MAX((ZQX(JL, JK, NCLDQV) - ZFAC*ZQSICE(JL, JK)) / ZCORQSICE, 0.0_JPRB)
+      ELSE
+        ! Calculate environmental humidity supersaturation
+        ZQP1ENV = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSICE(JL, JK)) / MAX(1.0_JPRB - ZA(JL, JK), ZEPSILON)
+        !& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK))
+        ZSUPSAT = MAX(((1.0_JPRB - ZA(JL, JK))*(ZQP1ENV - ZFAC*ZQSICE(JL, JK))) / ZCORQSICE, 0.0_JPRB)
+      END IF
+
+      !-------------------------------------------------------------------
+      ! Here the supersaturation is turned into liquid water
+      ! However, if the temperature is below the threshold for homogeneous
+      ! freezing then the supersaturation is turned instantly to ice.
+      !--------------------------------------------------------------------
+
+      IF (ZSUPSAT > ZEPSEC) THEN
+
+        IF (ZTP1(JL, JK) > YRECLDP%RTHOMO) THEN
+          ! Turn supersaturation into liquid water
+          ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZSUPSAT
+          ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZSUPSAT
+          ! Include liquid in first guess
+          ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZSUPSAT
+        ELSE
+          ! Turn supersaturation into ice water
+          ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZSUPSAT
+          ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZSUPSAT
+          ! Add ice to first guess for deposition term
+          ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZSUPSAT
+        END IF
+
+        ! Increase cloud amount using RKOOPTAU timescale
+        ZSOLAC = (1.0_JPRB - ZA(JL, JK))*ZFACI
+
+      END IF
+
+      !-------------------------------------------------------
+      ! 3.1.3 Include supersaturation from previous timestep
+      ! (Calculated in sltENDIF semi-lagrangian LDSLPHY=T)
+      !-------------------------------------------------------
+      IF (PSUPSAT(JL, JK) > ZEPSEC) THEN
+        IF (ZTP1(JL, JK) > YRECLDP%RTHOMO) THEN
+          ! Turn supersaturation into liquid water
+          ZSOLQA(NCLDQL, NCLDQL) = ZSOLQA(NCLDQL, NCLDQL) + PSUPSAT(JL, JK)
+          ZPSUPSATSRCE(NCLDQL) = PSUPSAT(JL, JK)
+          ! Add liquid to first guess for deposition term
+          ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + PSUPSAT(JL, JK)
+          ! Store cloud budget diagnostics if required
+        ELSE
+          ! Turn supersaturation into ice water
+          ZSOLQA(NCLDQI, NCLDQI) = ZSOLQA(NCLDQI, NCLDQI) + PSUPSAT(JL, JK)
+          ZPSUPSATSRCE(NCLDQI) = PSUPSAT(JL, JK)
+          ! Add ice to first guess for deposition term
+          ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + PSUPSAT(JL, JK)
+          ! Store cloud budget diagnostics if required
+        END IF
+
+        ! Increase cloud amount using RKOOPTAU timescale
+        ZSOLAC = (1.0_JPRB - ZA(JL, JK))*ZFACI
+        ! Store cloud budget diagnostics if required
+      END IF
+
+      ! on JL
+
+      !---------------------------------------------------------------------
+      !  3.2  DETRAINMENT FROM CONVECTION
+      !---------------------------------------------------------------------
+      ! * Diagnostic T-ice/liq split retained for convection
+      !    Note: This link is now flexible and a future convection
+      !    scheme can detrain explicit seperate budgets of:
+      !    cloud water, ice, rain and snow
+      ! * There is no (1-ZA) multiplier term on the cloud detrainment
+      !    term, since is now written in mass-flux terms
+      ! [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA]
+      !---------------------------------------------------------------------
+      IF (JK < KLEV .and. JK >= YRECLDP%NCLDTOP) THEN
+
+
+        PLUDE(JL, JK) = PLUDE(JL, JK)*ZDTGDP
+
+        IF (LDCUM(JL) .and. PLUDE(JL, JK) > YRECLDP%RLMIN .and. PLU(JL, JK + 1) > ZEPSEC) THEN
+
+          ZSOLAC = ZSOLAC + PLUDE(JL, JK) / PLU(JL, JK + 1)
+          ! *diagnostic temperature split*
+          ZALFAW = ZFOEALFA(JL, JK)
+          ZCONVSRCE(NCLDQL) = ZALFAW*PLUDE(JL, JK)
+          ZCONVSRCE(NCLDQI) = (1.0_JPRB - ZALFAW)*PLUDE(JL, JK)
+          ZSOLQA(NCLDQL, NCLDQL) = ZSOLQA(NCLDQL, NCLDQL) + ZCONVSRCE(NCLDQL)
+          ZSOLQA(NCLDQI, NCLDQI) = ZSOLQA(NCLDQI, NCLDQI) + ZCONVSRCE(NCLDQI)
+
+        ELSE
+
+          PLUDE(JL, JK) = 0.0_JPRB
+
+        END IF
+        ! *convective snow detrainment source
+        IF (LDCUM(JL))         ZSOLQA(NCLDQS, NCLDQS) = ZSOLQA(NCLDQS, NCLDQS) + PSNDE(JL, JK)*ZDTGDP
+
+
+      END IF
+      ! JK<KLEV
+
+      !---------------------------------------------------------------------
+      !  3.3  SUBSIDENCE COMPENSATING CONVECTIVE UPDRAUGHTS
+      !---------------------------------------------------------------------
+      ! Three terms:
+      ! * Convective subsidence source of cloud from layer above
+      ! * Evaporation of cloud within the layer
+      ! * Subsidence sink of cloud to the layer below (Implicit solution)
+      !---------------------------------------------------------------------
+
+      !-----------------------------------------------
+      ! Subsidence source from layer above
+      !               and
+      ! Evaporation of cloud within the layer
+      !-----------------------------------------------
+      IF (JK > YRECLDP%NCLDTOP) THEN
+
+        ZMF = MAX(0.0_JPRB, (PMFU(JL, JK) + PMFD(JL, JK))*ZDTGDP)
+        ZACUST = ZMF*ZANEWM1
+
+        DO JM=1,NCLV
+          IF (.not.LLFALL(JM) .and. IPHASE(JM) > 0) THEN
+            ZLCUST(JM) = ZMF*ZQXNM1(JM)
+            ! record total flux for enthalpy budget:
+            ZCONVSRCE(JM) = ZCONVSRCE(JM) + ZLCUST(JM)
+          END IF
+        END DO
+
+        ! Now have to work out how much liquid evaporates at arrival point
+        ! since there is no prognostic memory for in-cloud humidity, i.e.
+        ! we always assume cloud is saturated.
+
+        ZDTDP = (ZRDCP*0.5_JPRB*(ZTP1(JL, JK - 1) + ZTP1(JL, JK))) / PAPH(JL, JK)
+        ZDTFORC = ZDTDP*(PAP(JL, JK) - PAP(JL, JK - 1))
+        ![#Note: Diagnostic mixed phase should be replaced below]
+        ZDQS = ZANEWM1*ZDTFORC*ZDQSMIXDT
+
+        DO JM=1,NCLV
+          IF (.not.LLFALL(JM) .and. IPHASE(JM) > 0) THEN
+            ZLFINAL = MAX(0.0_JPRB, ZLCUST(JM) - ZDQS)              !lim to zero
+            ! no supersaturation allowed incloud ---V
+            ZEVAP = MIN((ZLCUST(JM) - ZLFINAL), ZEVAPLIMMIX)
+            !          ZEVAP=0.0_JPRB
+            ZLFINAL = ZLCUST(JM) - ZEVAP
+            ZLFINALSUM = ZLFINALSUM + ZLFINAL              ! sum
+
+            ZSOLQA(JM, JM) = ZSOLQA(JM, JM) + ZLCUST(JM)              ! whole sum
+            ZSOLQA(NCLDQV, JM) = ZSOLQA(NCLDQV, JM) + ZEVAP
+            ZSOLQA(JM, NCLDQV) = ZSOLQA(JM, NCLDQV) - ZEVAP
+          END IF
+        END DO
+
+        !  Reset the cloud contribution if no cloud water survives to this level:
+        IF (ZLFINALSUM < ZEPSEC)         ZACUST = 0.0_JPRB
+        ZSOLAC = ZSOLAC + ZACUST
+
+      END IF
+      ! on  JK>NCLDTOP
+
+      !---------------------------------------------------------------------
+      ! Subsidence sink of cloud to the layer below
+      ! (Implicit - re. CFL limit on convective mass flux)
+      !---------------------------------------------------------------------
+
+
+      IF (JK < KLEV) THEN
+
+        ZMFDN = MAX(0.0_JPRB, (PMFU(JL, JK + 1) + PMFD(JL, JK + 1))*ZDTGDP)
+
+        ZSOLAB = ZSOLAB + ZMFDN
+        ZSOLQB(NCLDQL, NCLDQL) = ZSOLQB(NCLDQL, NCLDQL) + ZMFDN
+        ZSOLQB(NCLDQI, NCLDQI) = ZSOLQB(NCLDQI, NCLDQI) + ZMFDN
+
+        ! Record sink for cloud budget and enthalpy budget diagnostics
+        ZCONVSINK(NCLDQL) = ZMFDN
+        ZCONVSINK(NCLDQI) = ZMFDN
+
+      END IF
+
+
+      !----------------------------------------------------------------------
+      ! 3.4  EROSION OF CLOUDS BY TURBULENT MIXING
+      !----------------------------------------------------------------------
+      ! NOTE: In default tiedtke scheme this process decreases the cloud
+      !       area but leaves the specific cloud water content
+      !       within clouds unchanged
+      !----------------------------------------------------------------------
+
+      ! ------------------------------
+      ! Define turbulent erosion rate
+      ! ------------------------------
+      ZLDIFDT = YRECLDP%RCLDIFF*PTSPHY        !original version
+      !Increase by factor of 5 for convective points
+      IF (KTYPE(JL) > 0 .and. PLUDE(JL, JK) > ZEPSEC)       ZLDIFDT = YRECLDP%RCLDIFF_CONVI*ZLDIFDT
+
+      ! At the moment, works on mixed RH profile and partitioned ice/liq fraction
+      ! so that it is similar to previous scheme
+      ! Should apply RHw for liquid cloud and RHi for ice cloud separately
+      IF (ZLI(JL, JK) > ZEPSEC) THEN
+        ! Calculate environmental humidity
+        !      ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/&
+        !    &      MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK))
+        !      ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB)
+        ZE = ZLDIFDT*MAX(ZQSMIX(JL, JK) - ZQX(JL, JK, NCLDQV), 0.0_JPRB)
+        ZLEROS = ZA(JL, JK)*ZE
+        ZLEROS = MIN(ZLEROS, ZEVAPLIMMIX)
+        ZLEROS = MIN(ZLEROS, ZLI(JL, JK))
+        ZAEROS = ZLEROS / ZLICLD          !if linear term
+
+        ! Erosion is -ve LINEAR in L,A
+        ZSOLAC = ZSOLAC - ZAEROS          !linear
+
+        ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) + ZLIQFRAC(JL, JK)*ZLEROS
+        ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) - ZLIQFRAC(JL, JK)*ZLEROS
+        ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) + ZICEFRAC(JL, JK)*ZLEROS
+        ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) - ZICEFRAC(JL, JK)*ZLEROS
+
+      END IF
+
+      !----------------------------------------------------------------------
+      ! 3.4  CONDENSATION/EVAPORATION DUE TO DQSAT/DT
+      !----------------------------------------------------------------------
+      !  calculate dqs/dt
+      !  Note: For the separate prognostic Qi and Ql, one would ideally use
+      !  Qsat/DT wrt liquid/Koop here, since the physics is that new clouds
+      !  forms by liquid droplets [liq] or when aqueous aerosols [Koop] form.
+      !  These would then instantaneous freeze if T<-38C or lead to ice growth
+      !  by deposition in warmer mixed phase clouds.  However, since we do
+      !  not have a separate prognostic equation for in-cloud humidity or a
+      !  statistical scheme approach in place, the depositional growth of ice
+      !  in the mixed phase can not be modelled and we resort to supersaturation
+      !  wrt ice instanteously converting to ice over one timestep
+      !  (see Tompkins et al. QJRMS 2007 for details)
+      !  Thus for the initial implementation the diagnostic mixed phase is
+      !  retained for the moment, and the level of approximation noted.
+      !----------------------------------------------------------------------
+
+      ZDTDP = (ZRDCP*ZTP1(JL, JK)) / PAP(JL, JK)
+      ZDPMXDT = ZDP*ZQTMST
+      ZMFDN = 0.0_JPRB
+      IF (JK < KLEV)       ZMFDN = PMFU(JL, JK + 1) + PMFD(JL, JK + 1)
+      ZWTOT = PVERVEL(JL, JK) + 0.5_JPRB*RG*(PMFU(JL, JK) + PMFD(JL, JK) + ZMFDN)
+      ZWTOT = MIN(ZDPMXDT, MAX(-ZDPMXDT, ZWTOT))
+      ZZZDT = PHRSW(JL, JK) + PHRLW(JL, JK)
+      ZDTDIAB = MIN(ZDPMXDT*ZDTDP, MAX(-ZDPMXDT*ZDTDP, ZZZDT))*PTSPHY + RALFDCP*ZLDEFR
+      ! Note: ZLDEFR should be set to the difference between the mixed phase functions
+      ! in the convection and cloud scheme, but this is not calculated, so is zero and
+      ! the functions must be the same
+      ZDTFORC = ZDTDP*ZWTOT*PTSPHY + ZDTDIAB
+      ZQOLD = ZQSMIX(JL, JK)
+      ZTOLD = ZTP1(JL, JK)
+      ZTP1(JL, JK) = ZTP1(JL, JK) + ZDTFORC
+      ZTP1(JL, JK) = MAX(ZTP1(JL, JK), 160.0_JPRB)
+      LLFLAG = .true.
+
+      ! Formerly a call to CUADJTQ(..., ICALL=5)
+      ZQP = 1.0_JPRB / PAP(JL, JK)
+      ZQSAT = FOEEWM(ZTP1(JL, JK))*ZQP
+      ZQSAT = MIN(0.5_JPRB, ZQSAT)
+      ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSAT)
+      ZQSAT = ZQSAT*ZCOR
+      ZCOND = (ZQSMIX(JL, JK) - ZQSAT) / (1.0_JPRB + ZQSAT*ZCOR*FOEDEM(ZTP1(JL, JK)))
+      ZTP1(JL, JK) = ZTP1(JL, JK) + FOELDCPM(ZTP1(JL, JK))*ZCOND
+      ZQSMIX(JL, JK) = ZQSMIX(JL, JK) - ZCOND
+      ZQSAT = FOEEWM(ZTP1(JL, JK))*ZQP
+      ZQSAT = MIN(0.5_JPRB, ZQSAT)
+      ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSAT)
+      ZQSAT = ZQSAT*ZCOR
+      ZCOND1 = (ZQSMIX(JL, JK) - ZQSAT) / (1.0_JPRB + ZQSAT*ZCOR*FOEDEM(ZTP1(JL, JK)))
+      ZTP1(JL, JK) = ZTP1(JL, JK) + FOELDCPM(ZTP1(JL, JK))*ZCOND1
+      ZQSMIX(JL, JK) = ZQSMIX(JL, JK) - ZCOND1
+
+      ZDQS = ZQSMIX(JL, JK) - ZQOLD
+      ZQSMIX(JL, JK) = ZQOLD
+      ZTP1(JL, JK) = ZTOLD
+
+      !----------------------------------------------------------------------
+      ! 3.4a  ZDQS(JL) > 0:  EVAPORATION OF CLOUDS
+      ! ----------------------------------------------------------------------
+      ! Erosion term is LINEAR in L
+      ! Changed to be uniform distribution in cloud region
+
+
+      ! Previous function based on DELTA DISTRIBUTION in cloud:
+      IF (ZDQS > 0.0_JPRB) THEN
+        !    If subsidence evaporation term is turned off, then need to use updated
+        !    liquid and cloud here?
+        !    ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL))
+        ZLEVAP = ZA(JL, JK)*MIN(ZDQS, ZLICLD)
+        ZLEVAP = MIN(ZLEVAP, ZEVAPLIMMIX)
+        ZLEVAP = MIN(ZLEVAP, MAX(ZQSMIX(JL, JK) - ZQX(JL, JK, NCLDQV), 0.0_JPRB))
+
+        ! For first guess call
+        ZLEVAPL = ZLIQFRAC(JL, JK)*ZLEVAP
+        ZLEVAPI = ZICEFRAC(JL, JK)*ZLEVAP
+
+        ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) + ZLIQFRAC(JL, JK)*ZLEVAP
+        ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) - ZLIQFRAC(JL, JK)*ZLEVAP
+
+        ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) + ZICEFRAC(JL, JK)*ZLEVAP
+        ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) - ZICEFRAC(JL, JK)*ZLEVAP
+
+      END IF
+
+
+      !----------------------------------------------------------------------
+      ! 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS
+      !----------------------------------------------------------------------
+      ! (1) Increase of cloud water in existing clouds
+      IF (ZA(JL, JK) > ZEPSEC .and. ZDQS <= -YRECLDP%RLMIN) THEN
+
+        ZLCOND1 = MAX(-ZDQS, 0.0_JPRB)          !new limiter
+
+        !old limiter (significantly improves upper tropospheric humidity rms)
+        IF (ZA(JL, JK) > 0.99_JPRB) THEN
+          ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSMIX(JL, JK))
+          ZCDMAX = (ZQX(JL, JK, NCLDQV) - ZQSMIX(JL, JK)) / (1.0_JPRB + ZCOR*ZQSMIX(JL, JK)*FOEDEM(ZTP1(JL, JK)))
+        ELSE
+          ZCDMAX = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSMIX(JL, JK)) / ZA(JL, JK)
+        END IF
+        ZLCOND1 = MAX(MIN(ZLCOND1, ZCDMAX), 0.0_JPRB)
+        ! end old limiter
+
+        ZLCOND1 = ZA(JL, JK)*ZLCOND1
+        IF (ZLCOND1 < YRECLDP%RLMIN)         ZLCOND1 = 0.0_JPRB
+
+        !-------------------------------------------------------------------------
+        ! All increase goes into liquid unless so cold cloud homogeneously freezes
+        ! Include new liquid formation in first guess value, otherwise liquid
+        ! remains at cold temperatures until next timestep.
+        !-------------------------------------------------------------------------
+        IF (ZTP1(JL, JK) > YRECLDP%RTHOMO) THEN
+          ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZLCOND1
+          ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZLCOND1
+          ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZLCOND1
+        ELSE
+          ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZLCOND1
+          ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZLCOND1
+          ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZLCOND1
+        END IF
+      END IF
+
+      ! (2) Generation of new clouds (da/dt>0)
+
+
+      IF (ZDQS <= -YRECLDP%RLMIN .and. ZA(JL, JK) < 1.0_JPRB - ZEPSEC) THEN
+
+        !---------------------------
+        ! Critical relative humidity
+        !---------------------------
+        ZRHC = YRECLDP%RAMID
+        ZSIGK = PAP(JL, JK) / PAPH(JL, KLEV + 1)
+        ! Increase RHcrit to 1.0 towards the surface (eta>0.8)
+        IF (ZSIGK > 0.8_JPRB) THEN
+          ZRHC = YRECLDP%RAMID + (1.0_JPRB - YRECLDP%RAMID)*((ZSIGK - 0.8_JPRB) / 0.2_JPRB)**2
+        END IF
+
+        ! Commented out for CY37R1 to reduce humidity in high trop and strat
+        !      ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above
+        !      ZBOTT=ZTRPAUS(JL)+0.2_JPRB
+        !      IF(ZSIGK < ZBOTT) THEN
+        !        ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB)
+        !      ENDIF
+
+        !---------------------------
+        ! Supersaturation options
+        !---------------------------
+        IF (YRECLDP%NSSOPT == 0) THEN
+          ! No scheme
+          ZQE = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSICE(JL, JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+          ZQE = MAX(0.0_JPRB, ZQE)
+        ELSE IF (YRECLDP%NSSOPT == 1) THEN
+          ! Tompkins
+          ZQE = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSICE(JL, JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+          ZQE = MAX(0.0_JPRB, ZQE)
+        ELSE IF (YRECLDP%NSSOPT == 2) THEN
+          ! Lohmann and Karcher
+          ZQE = ZQX(JL, JK, NCLDQV)
+        ELSE IF (YRECLDP%NSSOPT == 3) THEN
+          ! Gierens
+          ZQE = ZQX(JL, JK, NCLDQV) + ZLI(JL, JK)
+        END IF
+
+        IF (ZTP1(JL, JK) >= RTT .or. YRECLDP%NSSOPT == 0) THEN
+          ! No ice supersaturation allowed
+          ZFAC = 1.0_JPRB
+        ELSE
+          ! Ice supersaturation
+          ZFAC = ZFOKOOP
+        END IF
+
+        IF (ZQE >= ZRHC*ZQSICE(JL, JK)*ZFAC .and. ZQE < ZQSICE(JL, JK)*ZFAC) THEN
+          ! note: not **2 on 1-a term if ZQE is used.
+          ! Added correction term ZFAC to numerator 15/03/2010
+          ZACOND = -((1.0_JPRB - ZA(JL, JK))*ZFAC*ZDQS) / MAX(2.0_JPRB*(ZFAC*ZQSICE(JL, JK) - ZQE), ZEPSEC)
+
+          ZACOND = MIN(ZACOND, 1.0_JPRB - ZA(JL, JK))            !PUT THE LIMITER BACK
+
+          ! Linear term:
+          ! Added correction term ZFAC 15/03/2010
+          ZLCOND2 = -ZFAC*ZDQS*0.5_JPRB*ZACOND            !mine linear
+
+          ! new limiter formulation
+          ZZDL = (2.0_JPRB*(ZFAC*ZQSICE(JL, JK) - ZQE)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+          ! Added correction term ZFAC 15/03/2010
+          IF (ZFAC*ZDQS < -ZZDL) THEN
+            ! ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV)
+            ZLCONDLIM = (ZA(JL, JK) - 1.0_JPRB)*ZFAC*ZDQS - ZFAC*ZQSICE(JL, JK) + ZQX(JL, JK, NCLDQV)
+            ZLCOND2 = MIN(ZLCOND2, ZLCONDLIM)
+          END IF
+          ZLCOND2 = MAX(ZLCOND2, 0.0_JPRB)
+
+          IF (ZLCOND2 < YRECLDP%RLMIN .or. (1.0_JPRB - ZA(JL, JK)) < ZEPSEC) THEN
+            ZLCOND2 = 0.0_JPRB
+            ZACOND = 0.0_JPRB
+          END IF
+          IF (ZLCOND2 == 0.0_JPRB)           ZACOND = 0.0_JPRB
+
+          ! Large-scale generation is LINEAR in A and LINEAR in L
+          ZSOLAC = ZSOLAC + ZACOND            !linear
+
+          !------------------------------------------------------------------------
+          ! All increase goes into liquid unless so cold cloud homogeneously freezes
+          ! Include new liquid formation in first guess value, otherwise liquid
+          ! remains at cold temperatures until next timestep.
+          !------------------------------------------------------------------------
+          IF (ZTP1(JL, JK) > YRECLDP%RTHOMO) THEN
+            ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZLCOND2
+            ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZLCOND2
+            ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZLCOND2
+          ELSE
+            ! homogeneous freezing
+            ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZLCOND2
+            ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZLCOND2
+            ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZLCOND2
+          END IF
+
+        END IF
+      END IF
+
+      !----------------------------------------------------------------------
+      ! 3.7 Growth of ice by vapour deposition
+      !----------------------------------------------------------------------
+      ! Following Rotstayn et al. 2001:
+      ! does not use the ice nuclei number from cloudaer.F90
+      ! but rather a simple Meyers et al. 1992 form based on the
+      ! supersaturation and assuming clouds are saturated with
+      ! respect to liquid water (well mixed), (or Koop adjustment)
+      ! Growth considered as sink of liquid water if present so
+      ! Bergeron-Findeisen adjustment in autoconversion term no longer needed
+      !----------------------------------------------------------------------
+
+      !--------------------------------------------------------
+      !-
+      !- Ice deposition following Rotstayn et al. (2001)
+      !-  (monodisperse ice particle size distribution)
+      !-
+      !--------------------------------------------------------
+      IF (IDEPICE == 1) THEN
+
+
+        !--------------------------------------------------------------
+        ! Calculate distance from cloud top
+        ! defined by cloudy layer below a layer with cloud frac <0.01
+        ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG)
+        !--------------------------------------------------------------
+
+        IF (ZA(JL, JK - 1) < YRECLDP%RCLDTOPCF .and. ZA(JL, JK) >= YRECLDP%RCLDTOPCF) THEN
+          ZCLDTOPDIST = 0.0_JPRB
+        ELSE
+          ZCLDTOPDIST = ZCLDTOPDIST + ZDP / ((ZRHO*RG))
+        END IF
+
+        !--------------------------------------------------------------
+        ! only treat depositional growth if liquid present. due to fact
+        ! that can not model ice growth from vapour without additional
+        ! in-cloud water vapour variable
+        !--------------------------------------------------------------
+        IF (ZTP1(JL, JK) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN
+          ! T<273K
+
+          ZVPICE = (FOEEICE(ZTP1(JL, JK))*RV) / RD
+          ZVPLIQ = ZVPICE*ZFOKOOP
+          ZICENUCLEI = 1000.0_JPRB*EXP((12.96_JPRB*(ZVPLIQ - ZVPICE)) / ZVPLIQ - 0.639_JPRB)
+
+          !------------------------------------------------
+          !   2.4e-2 is conductivity of air
+          !   8.8 = 700**1/3 = density of ice to the third
+          !------------------------------------------------
+          ZADD = (RLSTT*(RLSTT / ((RV*ZTP1(JL, JK))) - 1.0_JPRB)) / ((2.4E-2_JPRB*ZTP1(JL, JK)))
+          ZBDD = (RV*ZTP1(JL, JK)*PAP(JL, JK)) / ((2.21_JPRB*ZVPICE))
+          ZCVDS = (7.8_JPRB*(ZICENUCLEI / ZRHO)**0.666_JPRB*(ZVPLIQ - ZVPICE)) / ((8.87_JPRB*(ZADD + ZBDD)*ZVPICE))
+
+          !-----------------------------------------------------
+          ! RICEINIT=1.E-12_JPRB is initial mass of ice particle
+          !-----------------------------------------------------
+          ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO)
+
+          !------------------
+          ! new value of ice:
+          !------------------
+          ZINEW = (0.666_JPRB*ZCVDS*PTSPHY + ZICE0**0.666_JPRB)**1.5_JPRB
+
+          !---------------------------
+          ! grid-mean deposition rate:
+          !---------------------------
+          ZDEPOS = MAX(ZA(JL, JK)*(ZINEW - ZICE0), 0.0_JPRB)
+
+          !--------------------------------------------------------------------
+          ! Limit deposition to liquid water amount
+          ! If liquid is all frozen, ice would use up reservoir of water
+          ! vapour in excess of ice saturation mixing ratio - However this
+          ! can not be represented without a in-cloud humidity variable. Using
+          ! the grid-mean humidity would imply a large artificial horizontal
+          ! flux from the clear sky to the cloudy area. We thus rely on the
+          ! supersaturation check to clean up any remaining supersaturation
+          !--------------------------------------------------------------------
+          ZDEPOS = MIN(ZDEPOS, ZQXFG(NCLDQL))            ! limit to liquid water amount
+
+          !--------------------------------------------------------------------
+          ! At top of cloud, reduce deposition rate near cloud top to account for
+          ! small scale turbulent processes, limited ice nucleation and ice fallout
+          !--------------------------------------------------------------------
+          !      ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB)
+          ! Change to include dependence on ice nuclei concentration
+          ! to increase deposition rate with decreasing temperatures
+          ZINFACTOR = MIN(ZICENUCLEI / 15000._JPRB, 1.0_JPRB)
+          ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST /  &
+          & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB)
+
+          !--------------
+          ! add to matrix
+          !--------------
+          ZSOLQA(NCLDQI, NCLDQL) = ZSOLQA(NCLDQI, NCLDQL) + ZDEPOS
+          ZSOLQA(NCLDQL, NCLDQI) = ZSOLQA(NCLDQL, NCLDQI) - ZDEPOS
+          ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZDEPOS
+          ZQXFG(NCLDQL) = ZQXFG(NCLDQL) - ZDEPOS
+
+        END IF
+
+        !--------------------------------------------------------
+        !-
+        !- Ice deposition assuming ice PSD
+        !-
+        !--------------------------------------------------------
+      ELSE IF (IDEPICE == 2) THEN
+
+
+        !--------------------------------------------------------------
+        ! Calculate distance from cloud top
+        ! defined by cloudy layer below a layer with cloud frac <0.01
+        ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG)
+        !--------------------------------------------------------------
+
+        IF (ZA(JL, JK - 1) < YRECLDP%RCLDTOPCF .and. ZA(JL, JK) >= YRECLDP%RCLDTOPCF) THEN
+          ZCLDTOPDIST = 0.0_JPRB
+        ELSE
+          ZCLDTOPDIST = ZCLDTOPDIST + ZDP / ((ZRHO*RG))
+        END IF
+
+        !--------------------------------------------------------------
+        ! only treat depositional growth if liquid present. due to fact
+        ! that can not model ice growth from vapour without additional
+        ! in-cloud water vapour variable
+        !--------------------------------------------------------------
+        IF (ZTP1(JL, JK) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN
+          ! T<273K
+
+          ZVPICE = (FOEEICE(ZTP1(JL, JK))*RV) / RD
+          ZVPLIQ = ZVPICE*ZFOKOOP
+          ZICENUCLEI = 1000.0_JPRB*EXP((12.96_JPRB*(ZVPLIQ - ZVPICE)) / ZVPLIQ - 0.639_JPRB)
+
+          !-----------------------------------------------------
+          ! RICEINIT=1.E-12_JPRB is initial mass of ice particle
+          !-----------------------------------------------------
+          ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO)
+
+          ! Particle size distribution
+          ZTCG = 1.0_JPRB
+          ZFACX1I = 1.0_JPRB
+
+          ZAPLUSB =  &
+          & YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JL, JK) + PAP(JL, JK)*YRECLDP%RCL_APB3*ZTP1(JL, JK)**3._JPRB
+          ZCORRFAC = (1.0_JPRB / ZRHO)**0.5_JPRB
+          ZCORRFAC2 = ((ZTP1(JL, JK) / 273.0_JPRB)**1.5_JPRB)*(393.0_JPRB / (ZTP1(JL, JK) + 120.0_JPRB))
+
+          ZPR02 = (ZRHO*ZICE0*YRECLDP%RCL_CONST1I) / ((ZTCG*ZFACX1I))
+
+          ZTERM1 = ((ZVPLIQ - ZVPICE)*ZTP1(JL, JK)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2I*ZFACX1I) /  &
+          & ((ZRHO*ZAPLUSB*ZVPICE))
+          ZTERM2 = 0.65_JPRB*YRECLDP%RCL_CONST6I*ZPR02**YRECLDP%RCL_CONST4I +  &
+          & (YRECLDP%RCL_CONST3I*ZCORRFAC**0.5_JPRB*ZRHO**0.5_JPRB*ZPR02**YRECLDP%RCL_CONST5I) / ZCORRFAC2**0.5_JPRB
+
+          ZDEPOS = MAX(ZA(JL, JK)*ZTERM1*ZTERM2*PTSPHY, 0.0_JPRB)
+
+          !--------------------------------------------------------------------
+          ! Limit deposition to liquid water amount
+          ! If liquid is all frozen, ice would use up reservoir of water
+          ! vapour in excess of ice saturation mixing ratio - However this
+          ! can not be represented without a in-cloud humidity variable. Using
+          ! the grid-mean humidity would imply a large artificial horizontal
+          ! flux from the clear sky to the cloudy area. We thus rely on the
+          ! supersaturation check to clean up any remaining supersaturation
+          !--------------------------------------------------------------------
+          ZDEPOS = MIN(ZDEPOS, ZQXFG(NCLDQL))            ! limit to liquid water amount
+
+          !--------------------------------------------------------------------
+          ! At top of cloud, reduce deposition rate near cloud top to account for
+          ! small scale turbulent processes, limited ice nucleation and ice fallout
+          !--------------------------------------------------------------------
+          ! Change to include dependence on ice nuclei concentration
+          ! to increase deposition rate with decreasing temperatures
+          ZINFACTOR = MIN(ZICENUCLEI / 15000._JPRB, 1.0_JPRB)
+          ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST /  &
+          & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB)
+
+          !--------------
+          ! add to matrix
+          !--------------
+          ZSOLQA(NCLDQI, NCLDQL) = ZSOLQA(NCLDQI, NCLDQL) + ZDEPOS
+          ZSOLQA(NCLDQL, NCLDQI) = ZSOLQA(NCLDQL, NCLDQI) - ZDEPOS
+          ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZDEPOS
+          ZQXFG(NCLDQL) = ZQXFG(NCLDQL) - ZDEPOS
+        END IF
+
+      END IF
+      ! on IDEPICE
+
+      !######################################################################
+      !              4  *** PRECIPITATION PROCESSES ***
+      !######################################################################
+
+      !----------------------------------
+      ! revise in-cloud consensate amount
+      !----------------------------------
+      ZTMPA = 1.0_JPRB / MAX(ZA(JL, JK), ZEPSEC)
+      ZLIQCLD = ZQXFG(NCLDQL)*ZTMPA
+      ZICECLD = ZQXFG(NCLDQI)*ZTMPA
+      ZLICLD = ZLIQCLD + ZICECLD
+
+      !----------------------------------------------------------------------
+      ! 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES
+      !     now that rain, snow, graupel species are prognostic
+      !     the precipitation flux can be defined directly level by level
+      !     There is no vertical memory required from the flux variable
+      !----------------------------------------------------------------------
+
+      DO JM=1,NCLV
+        IF (LLFALL(JM) .or. JM == NCLDQI) THEN
+          !------------------------
+          ! source from layer above
+          !------------------------
+          IF (JK > YRECLDP%NCLDTOP) THEN
+            ZFALLSRCE(JM) = ZPFPLSX(JL, JK, JM)*ZDTGDP
+            ZSOLQA(JM, JM) = ZSOLQA(JM, JM) + ZFALLSRCE(JM)
+            ZQXFG(JM) = ZQXFG(JM) + ZFALLSRCE(JM)
+            ! use first guess precip----------V
+            ZQPRETOT = ZQPRETOT + ZQXFG(JM)
+          END IF
+          !-------------------------------------------------
+          ! sink to next layer, constant fall speed
+          !-------------------------------------------------
+          ! if aerosol effect then override
+          !  note that for T>233K this is the same as above.
+          IF (YRECLDP%LAERICESED .and. JM == NCLDQI) THEN
+            ZRE_ICE = PRE_ICE(JL, JK)
+            ! The exponent value is from
+            ! Morrison et al. JAS 2005 Appendix
+            ZVQX(NCLDQI) = 0.002_JPRB*ZRE_ICE**1.0_JPRB
+          END IF
+          ZFALL = ZVQX(JM)*ZRHO
+          !-------------------------------------------------
+          ! modified by Heymsfield and Iaquinta JAS 2000
+          !-------------------------------------------------
+          ! ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) &
+          !            &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB))
+
+          ZFALLSINK(JM) = ZDTGDP*ZFALL
+          ! Cloud budget diagnostic stored at end as implicit
+          ! jl
+        END IF
+        ! LLFALL
+      END DO
+      ! jm
+
+      !---------------------------------------------------------------
+      ! Precip cover overlap using MAX-RAN Overlap
+      ! Since precipitation is now prognostic we must
+      !   1) apply an arbitrary minimum coverage (0.3) if precip>0
+      !   2) abandon the 2-flux clr/cld treatment
+      !   3) Thus, since we have no memory of the clear sky precip
+      !      fraction, we mimic the previous method by reducing
+      !      ZCOVPTOT(JL), which has the memory, proportionally with
+      !      the precip evaporation rate, taking cloud fraction
+      !      into account
+      !   #3 above leads to much smoother vertical profiles of
+      !   precipitation fraction than the Klein-Jakob scheme which
+      !   monotonically increases precip fraction and then resets
+      !   it to zero in a step function once clear-sky precip reaches
+      !   zero.
+      !---------------------------------------------------------------
+      IF (ZQPRETOT > ZEPSEC) THEN
+        ZCOVPTOT = 1.0_JPRB - ((1.0_JPRB - ZCOVPTOT)*(1.0_JPRB - MAX(ZA(JL, JK), ZA(JL, JK - 1)))) / (1.0_JPRB - MIN(ZA(JL, JK -  &
+        & 1), 1.0_JPRB - 1.E-06_JPRB))
+        ZCOVPTOT = MAX(ZCOVPTOT, YRECLDP%RCOVPMIN)
+        ZCOVPCLR = MAX(0.0_JPRB, ZCOVPTOT - ZA(JL, JK))          ! clear sky proportion
+        ZRAINCLD = ZQXFG(NCLDQR) / ZCOVPTOT
+        ZSNOWCLD = ZQXFG(NCLDQS) / ZCOVPTOT
+        ZCOVPMAX = MAX(ZCOVPTOT, ZCOVPMAX)
+      ELSE
+        ZRAINCLD = 0.0_JPRB
+        ZSNOWCLD = 0.0_JPRB
+        ZCOVPTOT = 0.0_JPRB          ! no flux - reset cover
+        ZCOVPCLR = 0.0_JPRB          ! reset clear sky proportion
+        ZCOVPMAX = 0.0_JPRB          ! reset max cover for ZZRH calc
+      END IF
+
+      !----------------------------------------------------------------------
+      ! 4.3a AUTOCONVERSION TO SNOW
+      !----------------------------------------------------------------------
+
+      IF (ZTP1(JL, JK) <= RTT) THEN
+        !-----------------------------------------------------
+        !     Snow Autoconversion rate follow Lin et al. 1983
+        !-----------------------------------------------------
+        IF (ZICECLD > ZEPSEC) THEN
+
+          ZZCO = PTSPHY*YRECLDP%RSNOWLIN1*EXP(YRECLDP%RSNOWLIN2*(ZTP1(JL, JK) - RTT))
+
+          IF (YRECLDP%LAERICEAUTO) THEN
+            ZLCRIT = PICRIT_AER(JL, JK)
+            ! 0.3 = N**0.333 with N=0.027
+            ZZCO = ZZCO*(YRECLDP%RNICE / PNICE(JL, JK))**0.333_JPRB
+          ELSE
+            ZLCRIT = YRECLDP%RLCRITSNOW
+          END IF
+
+          ZSNOWAUT = ZZCO*(1.0_JPRB - EXP(-(ZICECLD / ZLCRIT)**2))
+          ZSOLQB(NCLDQS, NCLDQI) = ZSOLQB(NCLDQS, NCLDQI) + ZSNOWAUT
+
+        END IF
+      END IF
+
+      !----------------------------------------------------------------------
+      ! 4.3b AUTOCONVERSION WARM CLOUDS
+      !   Collection and accretion will require separate treatment
+      !   but for now we keep this simple treatment
+      !----------------------------------------------------------------------
+
+      IF (ZLIQCLD > ZEPSEC) THEN
+
+        !--------------------------------------------------------
+        !-
+        !- Warm-rain process follow Sundqvist (1989)
+        !-
+        !--------------------------------------------------------
+        IF (IWARMRAIN == 1) THEN
+
+          ZZCO = YRECLDP%RKCONV*PTSPHY
+
+          IF (YRECLDP%LAERLIQAUTOLSP) THEN
+            ZLCRIT = PLCRIT_AER(JL, JK)
+            ! 0.3 = N**0.333 with N=125 cm-3
+            ZZCO = ZZCO*(YRECLDP%RCCN / PCCN(JL, JK))**0.333_JPRB
+          ELSE
+            ! Modify autoconversion threshold dependent on:
+            !  land (polluted, high CCN, smaller droplets, higher threshold)
+            !  sea  (clean, low CCN, larger droplets, lower threshold)
+            IF (PLSM(JL) > 0.5_JPRB) THEN
+              ZLCRIT = YRECLDP%RCLCRIT_LAND                ! land
+            ELSE
+              ZLCRIT = YRECLDP%RCLCRIT_SEA                ! ocean
+            END IF
+          END IF
+
+          !------------------------------------------------------------------
+          ! Parameters for cloud collection by rain and snow.
+          ! Note that with new prognostic variable it is now possible
+          ! to REPLACE this with an explicit collection parametrization
+          !------------------------------------------------------------------
+          ZPRECIP = (ZPFPLSX(JL, JK, NCLDQS) + ZPFPLSX(JL, JK, NCLDQR)) / MAX(ZEPSEC, ZCOVPTOT)
+          ZCFPR = 1.0_JPRB + YRECLDP%RPRC1*SQRT(MAX(ZPRECIP, 0.0_JPRB))
+          !      ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*&
+          !       &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC))
+
+          IF (YRECLDP%LAERLIQCOLL) THEN
+            ! 5.0 = N**0.333 with N=125 cm-3
+            ZCFPR = ZCFPR*(YRECLDP%RCCN / PCCN(JL, JK))**0.333_JPRB
+          END IF
+
+          ZZCO = ZZCO*ZCFPR
+          ZLCRIT = ZLCRIT / MAX(ZCFPR, ZEPSEC)
+
+          IF (ZLIQCLD / ZLCRIT < 20.0_JPRB) THEN
+            ! Security for exp for some compilers
+            ZRAINAUT = ZZCO*(1.0_JPRB - EXP(-(ZLIQCLD / ZLCRIT)**2))
+          ELSE
+            ZRAINAUT = ZZCO
+          END IF
+
+          ! rain freezes instantly
+          IF (ZTP1(JL, JK) <= RTT) THEN
+            ZSOLQB(NCLDQS, NCLDQL) = ZSOLQB(NCLDQS, NCLDQL) + ZRAINAUT
+          ELSE
+            ZSOLQB(NCLDQR, NCLDQL) = ZSOLQB(NCLDQR, NCLDQL) + ZRAINAUT
+          END IF
+
+          !--------------------------------------------------------
+          !-
+          !- Warm-rain process follow Khairoutdinov and Kogan (2000)
+          !-
+          !--------------------------------------------------------
+        ELSE IF (IWARMRAIN == 2) THEN
+
+          IF (PLSM(JL) > 0.5_JPRB) THEN
+            ! land
+            ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_LAND
+            ZLCRIT = YRECLDP%RCLCRIT_LAND
+          ELSE
+            ! ocean
+            ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_SEA
+            ZLCRIT = YRECLDP%RCLCRIT_SEA
+          END IF
+
+          IF (ZLIQCLD > ZLCRIT) THEN
+
+            ZRAINAUT = 1.5_JPRB*ZA(JL, JK)*PTSPHY*YRECLDP%RCL_KKAAU*ZLIQCLD**YRECLDP%RCL_KKBAUQ*ZCONST**YRECLDP%RCL_KKBAUN
+
+            ZRAINAUT = MIN(ZRAINAUT, ZQXFG(NCLDQL))
+            IF (ZRAINAUT < ZEPSEC)             ZRAINAUT = 0.0_JPRB
+
+            ZRAINACC = 2.0_JPRB*ZA(JL, JK)*PTSPHY*YRECLDP%RCL_KKAAC*(ZLIQCLD*ZRAINCLD)**YRECLDP%RCL_KKBAC
+
+            ZRAINACC = MIN(ZRAINACC, ZQXFG(NCLDQL))
+            IF (ZRAINACC < ZEPSEC)             ZRAINACC = 0.0_JPRB
+
+          ELSE
+            ZRAINAUT = 0.0_JPRB
+            ZRAINACC = 0.0_JPRB
+          END IF
+
+          ! If temperature < 0, then autoconversion produces snow rather than rain
+          ! Explicit
+          IF (ZTP1(JL, JK) <= RTT) THEN
+            ZSOLQA(NCLDQS, NCLDQL) = ZSOLQA(NCLDQS, NCLDQL) + ZRAINAUT
+            ZSOLQA(NCLDQS, NCLDQL) = ZSOLQA(NCLDQS, NCLDQL) + ZRAINACC
+            ZSOLQA(NCLDQL, NCLDQS) = ZSOLQA(NCLDQL, NCLDQS) - ZRAINAUT
+            ZSOLQA(NCLDQL, NCLDQS) = ZSOLQA(NCLDQL, NCLDQS) - ZRAINACC
+          ELSE
+            ZSOLQA(NCLDQR, NCLDQL) = ZSOLQA(NCLDQR, NCLDQL) + ZRAINAUT
+            ZSOLQA(NCLDQR, NCLDQL) = ZSOLQA(NCLDQR, NCLDQL) + ZRAINACC
+            ZSOLQA(NCLDQL, NCLDQR) = ZSOLQA(NCLDQL, NCLDQR) - ZRAINAUT
+            ZSOLQA(NCLDQL, NCLDQR) = ZSOLQA(NCLDQL, NCLDQR) - ZRAINACC
+          END IF
+
+        END IF
+        ! on IWARMRAIN
+
+      END IF
+      ! on ZLIQCLD > ZEPSEC
+
+
+      !----------------------------------------------------------------------
+      ! RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE
+      !      only active if T<0degC and supercooled liquid water is present
+      !      AND if not Sundquist autoconversion (as this includes riming)
+      !----------------------------------------------------------------------
+      IF (IWARMRAIN > 1) THEN
+
+        IF (ZTP1(JL, JK) <= RTT .and. ZLIQCLD > ZEPSEC) THEN
+
+          ! Fallspeed air density correction
+          ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4_JPRB
+
+          !------------------------------------------------------------------
+          ! Riming of snow by cloud water - implicit in lwc
+          !------------------------------------------------------------------
+          IF (ZSNOWCLD > ZEPSEC .and. ZCOVPTOT > 0.01_JPRB) THEN
+
+            ! Calculate riming term
+            ! Factor of liq water taken out because implicit
+            ZSNOWRIME =  &
+            & 0.3_JPRB*ZCOVPTOT*PTSPHY*YRECLDP%RCL_CONST7S*ZFALLCORR*(ZRHO*ZSNOWCLD*YRECLDP%RCL_CONST1S)**YRECLDP%RCL_CONST8S
+
+            ! Limit snow riming term
+            ZSNOWRIME = MIN(ZSNOWRIME, 1.0_JPRB)
+
+            ZSOLQB(NCLDQS, NCLDQL) = ZSOLQB(NCLDQS, NCLDQL) + ZSNOWRIME
+
+          END IF
+
+          !------------------------------------------------------------------
+          ! Riming of ice by cloud water - implicit in lwc
+          ! NOT YET ACTIVE
+          !------------------------------------------------------------------
+          !      IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN
+          !
+          !        ! Calculate riming term
+          !        ! Factor of liq water taken out because implicit
+          !        ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR &
+          !     &                  *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S
+          !
+          !        ! Limit ice riming term
+          !        ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB)
+          !
+          !        ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL)
+          !
+          !      ENDIF
+        END IF
+
+      END IF
+      ! on IWARMRAIN > 1
+
+
+      !----------------------------------------------------------------------
+      ! 4.4a  MELTING OF SNOW and ICE
+      !       with new implicit solver this also has to treat snow or ice
+      !       precipitating from the level above... i.e. local ice AND flux.
+      !       in situ ice and snow: could arise from LS advection or warming
+      !       falling ice and snow: arrives by precipitation process
+      !----------------------------------------------------------------------
+
+      ZICETOT = ZQXFG(NCLDQI) + ZQXFG(NCLDQS)
+      ZMELTMAX = 0.0_JPRB
+
+      ! If there are frozen hydrometeors present and dry-bulb temperature > 0degC
+      IF (ZICETOT > ZEPSEC .and. ZTP1(JL, JK) > RTT) THEN
+
+        ! Calculate subsaturation
+        ZSUBSAT = MAX(ZQSICE(JL, JK) - ZQX(JL, JK, NCLDQV), 0.0_JPRB)
+
+        ! Calculate difference between dry-bulb (ZTP1) and the temperature
+        ! at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx.
+        ! Melting only occurs if the wet-bulb temperature >0
+        ! i.e. warming of ice particle due to melting > cooling
+        ! due to evaporation.
+        ZTDMTW0 = ZTP1(JL, JK) - RTT - ZSUBSAT*(ZTW1 + ZTW2*(PAP(JL, JK) - ZTW3) - ZTW4*(ZTP1(JL, JK) - ZTW5))
+        ! Not implicit yet...
+        ! Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0
+        ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*ZTDMTW0)) / YRECLDP%RTAUMEL)
+        ZMELTMAX = MAX(ZTDMTW0*ZCONS1*ZRLDCP, 0.0_JPRB)
+      END IF
+
+      ! Loop over frozen hydrometeors (ice, snow)
+      DO JM=1,NCLV
+        IF (IPHASE(JM) == 2) THEN
+          JN = IMELT(JM)
+          IF (ZMELTMAX > ZEPSEC .and. ZICETOT > ZEPSEC) THEN
+            ! Apply melting in same proportion as frozen hydrometeor fractions
+            ZALFA = ZQXFG(JM) / ZICETOT
+            ZMELT = MIN(ZQXFG(JM), ZALFA*ZMELTMAX)
+            ! needed in first guess
+            ! This implies that zqpretot has to be recalculated below
+            ! since is not conserved here if ice falls and liquid doesn't
+            ZQXFG(JM) = ZQXFG(JM) - ZMELT
+            ZQXFG(JN) = ZQXFG(JN) + ZMELT
+            ZSOLQA(JN, JM) = ZSOLQA(JN, JM) + ZMELT
+            ZSOLQA(JM, JN) = ZSOLQA(JM, JN) - ZMELT
+          END IF
+        END IF
+      END DO
+
+      !----------------------------------------------------------------------
+      ! 4.4b  FREEZING of RAIN
+      !----------------------------------------------------------------------
+
+      ! If rain present
+      IF (ZQX(JL, JK, NCLDQR) > ZEPSEC) THEN
+
+        IF (ZTP1(JL, JK) <= RTT .and. ZTP1(JL, JK - 1) > RTT) THEN
+          ! Base of melting layer/top of refreezing layer so
+          ! store rain/snow fraction for precip type diagnosis
+          ! If mostly rain, then supercooled rain slow to freeze
+          ! otherwise faster to freeze (snow or ice pellets)
+          ZQPRETOT = MAX(ZQX(JL, JK, NCLDQS) + ZQX(JL, JK, NCLDQR), ZEPSEC)
+          PRAINFRAC_TOPRFZ(JL) = ZQX(JL, JK, NCLDQR) / ZQPRETOT
+          IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN
+            LLRAINLIQ = .true.
+          ELSE
+            LLRAINLIQ = .false.
+          END IF
+        END IF
+
+        ! If temperature less than zero
+        IF (ZTP1(JL, JK) < RTT) THEN
+
+          IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN
+
+            ! Majority of raindrops completely melted
+            ! Refreezing is by slow heterogeneous freezing
+
+            ! Slope of rain particle size distribution
+            ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZQX(JL, JK, NCLDQR))))**YRECLDP%RCL_FAC2
+
+            ! Calculate freezing rate based on Bigg(1953) and Wisner(1972)
+            ZTEMP = YRECLDP%RCL_FZRAB*(ZTP1(JL, JK) - RTT)
+            ZFRZ = PTSPHY*(YRECLDP%RCL_CONST5R / ZRHO)*(EXP(ZTEMP) - 1._JPRB)*ZLAMBDA**YRECLDP%RCL_CONST6R
+            ZFRZMAX = MAX(ZFRZ, 0.0_JPRB)
+
+          ELSE
+
+            ! Majority of raindrops only partially melted
+            ! Refreeze with a shorter timescale (reverse of melting...for now)
+
+            ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*(RTT - ZTP1(JL, JK)))) / YRECLDP%RTAUMEL)
+            ZFRZMAX = MAX((RTT - ZTP1(JL, JK))*ZCONS1*ZRLDCP, 0.0_JPRB)
+
+          END IF
+
+          IF (ZFRZMAX > ZEPSEC) THEN
+            ZFRZ = MIN(ZQX(JL, JK, NCLDQR), ZFRZMAX)
+            ZSOLQA(NCLDQS, NCLDQR) = ZSOLQA(NCLDQS, NCLDQR) + ZFRZ
+            ZSOLQA(NCLDQR, NCLDQS) = ZSOLQA(NCLDQR, NCLDQS) - ZFRZ
+          END IF
+        END IF
+
+      END IF
+
+
+      !----------------------------------------------------------------------
+      ! 4.4c  FREEZING of LIQUID
+      !----------------------------------------------------------------------
+      ! not implicit yet...
+      ZFRZMAX = MAX((YRECLDP%RTHOMO - ZTP1(JL, JK))*ZRLDCP, 0.0_JPRB)
+
+      JM = NCLDQL
+      JN = IMELT(JM)
+      IF (ZFRZMAX > ZEPSEC .and. ZQXFG(JM) > ZEPSEC) THEN
+        ZFRZ = MIN(ZQXFG(JM), ZFRZMAX)
+        ZSOLQA(JN, JM) = ZSOLQA(JN, JM) + ZFRZ
+        ZSOLQA(JM, JN) = ZSOLQA(JM, JN) - ZFRZ
+      END IF
+
+      !----------------------------------------------------------------------
+      ! 4.5   EVAPORATION OF RAIN/SNOW
+      !----------------------------------------------------------------------
+
+      !----------------------------------------
+      ! Rain evaporation scheme from Sundquist
+      !----------------------------------------
+      IF (IEVAPRAIN == 1) THEN
+
+        ! Rain
+
+
+        ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+        ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+
+        ZQE = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSLIQ(JL, JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+        !---------------------------------------------
+        ! humidity in moistest ZCOVPCLR part of domain
+        !---------------------------------------------
+        ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSLIQ(JL, JK)))
+        LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQR) > ZEPSEC .and. ZQE < ZZRH*ZQSLIQ(JL, JK)
+
+        IF (LLO1) THEN
+          ! note: zpreclr is a rain flux
+          ZPRECLR = (ZQXFG(NCLDQR)*ZCOVPCLR) / SIGN(MAX(ABS(ZCOVPTOT*ZDTGDP), ZEPSILON), ZCOVPTOT*ZDTGDP)
+
+          !--------------------------------------
+          ! actual microphysics formula in zbeta
+          !--------------------------------------
+
+          ZBETA1 = ((SQRT(PAP(JL, JK) / PAPH(JL, KLEV + 1)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC)
+
+          ZBETA = RG*YRECLDP%RPECONS*0.5_JPRB*ZBETA1**0.5777_JPRB
+
+          ZDENOM = 1.0_JPRB + ZBETA*PTSPHY*ZCORQSLIQ
+          ZDPR = ((ZCOVPCLR*ZBETA*(ZQSLIQ(JL, JK) - ZQE)) / ZDENOM)*ZDP*ZRG_R
+          ZDPEVAP = ZDPR*ZDTGDP
+
+          !---------------------------------------------------------
+          ! add evaporation term to explicit sink.
+          ! this has to be explicit since if treated in the implicit
+          ! term evaporation can not reduce rain to zero and model
+          ! produces small amounts of rainfall everywhere.
+          !---------------------------------------------------------
+
+          ! Evaporate rain
+          ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQR))
+
+          ZSOLQA(NCLDQV, NCLDQR) = ZSOLQA(NCLDQV, NCLDQR) + ZEVAP
+          ZSOLQA(NCLDQR, NCLDQV) = ZSOLQA(NCLDQR, NCLDQV) - ZEVAP
+
+          !-------------------------------------------------------------
+          ! Reduce the total precip coverage proportional to evaporation
+          ! to mimic the previous scheme which had a diagnostic
+          ! 2-flux treatment, abandoned due to the new prognostic precip
+          !-------------------------------------------------------------
+          ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JL, JK))*ZEVAP) / ZQXFG(NCLDQR)))
+
+          ! Update fg field
+          ZQXFG(NCLDQR) = ZQXFG(NCLDQR) - ZEVAP
+
+        END IF
+
+
+        !---------------------------------------------------------
+        ! Rain evaporation scheme based on Abel and Boutle (2013)
+        !---------------------------------------------------------
+      ELSE IF (IEVAPRAIN == 2) THEN
+
+
+        !-----------------------------------------------------------------------
+        ! Calculate relative humidity limit for rain evaporation
+        ! to avoid cloud formation and saturation of the grid box
+        !-----------------------------------------------------------------------
+        ! Limit RH for rain evaporation dependent on precipitation fraction
+        ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+        ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+
+        ! Critical relative humidity
+        !ZRHC=RAMID
+        !ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1)
+        ! Increase RHcrit to 1.0 towards the surface (eta>0.8)
+        !IF(ZSIGK > 0.8_JPRB) THEN
+        !  ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2
+        !ENDIF
+        !ZZRH = MIN(ZRHC,ZZRH)
+
+        ! Further limit RH for rain evaporation to 80% (RHcrit in free troposphere)
+        ZZRH = MIN(0.8_JPRB, ZZRH)
+
+        ZQE = MAX(0.0_JPRB, MIN(ZQX(JL, JK, NCLDQV), ZQSLIQ(JL, JK)))
+
+        LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQR) > ZEPSEC .and. ZQE < ZZRH*ZQSLIQ(JL, JK)
+
+        IF (LLO1) THEN
+
+          !-------------------------------------------
+          ! Abel and Boutle (2012) evaporation
+          !-------------------------------------------
+          ! Calculate local precipitation (kg/kg)
+          ZPRECLR = ZQXFG(NCLDQR) / ZCOVPTOT
+
+          ! Fallspeed air density correction
+          ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4
+
+          ! Saturation vapour pressure with respect to liquid phase
+          ZESATLIQ = (RV / RD)*FOEELIQ(ZTP1(JL, JK))
+
+          ! Slope of particle size distribution
+          ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZPRECLR)))**YRECLDP%RCL_FAC2            ! ZPRECLR=kg/kg
+
+          ZEVAP_DENOM = YRECLDP%RCL_CDENOM1*ZESATLIQ - YRECLDP%RCL_CDENOM2*ZTP1(JL, JK)*ZESATLIQ + YRECLDP%RCL_CDENOM3*ZTP1(JL,  &
+          & JK)**3._JPRB*PAP(JL, JK)
+
+          ! Temperature dependent conductivity
+          ZCORR2 = ((ZTP1(JL, JK) / 273._JPRB)**1.5_JPRB*393._JPRB) / (ZTP1(JL, JK) + 120._JPRB)
+          ZKA = YRECLDP%RCL_KA273*ZCORR2
+
+          ZSUBSAT = MAX(ZZRH*ZQSLIQ(JL, JK) - ZQE, 0.0_JPRB)
+
+          ZBETA = (0.5_JPRB / ZQSLIQ(JL, JK))*ZTP1(JL, JK)**2._JPRB*ZESATLIQ*YRECLDP%RCL_CONST1R*(ZCORR2 /  &
+          & ZEVAP_DENOM)*(0.78_JPRB / (ZLAMBDA**YRECLDP%RCL_CONST4R) + (YRECLDP%RCL_CONST2R*(ZRHO*ZFALLCORR)**0.5_JPRB) /  &
+          & ((ZCORR2**0.5_JPRB*ZLAMBDA**YRECLDP%RCL_CONST3R)))
+
+          ZDENOM = 1.0_JPRB + ZBETA*PTSPHY            !*ZCORQSLIQ(JL)
+          ZDPEVAP = (ZCOVPCLR*ZBETA*PTSPHY*ZSUBSAT) / ZDENOM
+
+          !---------------------------------------------------------
+          ! Add evaporation term to explicit sink.
+          ! this has to be explicit since if treated in the implicit
+          ! term evaporation can not reduce rain to zero and model
+          ! produces small amounts of rainfall everywhere.
+          !---------------------------------------------------------
+
+          ! Limit rain evaporation
+          ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQR))
+
+          ZSOLQA(NCLDQV, NCLDQR) = ZSOLQA(NCLDQV, NCLDQR) + ZEVAP
+          ZSOLQA(NCLDQR, NCLDQV) = ZSOLQA(NCLDQR, NCLDQV) - ZEVAP
+
+          !-------------------------------------------------------------
+          ! Reduce the total precip coverage proportional to evaporation
+          ! to mimic the previous scheme which had a diagnostic
+          ! 2-flux treatment, abandoned due to the new prognostic precip
+          !-------------------------------------------------------------
+          ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JL, JK))*ZEVAP) / ZQXFG(NCLDQR)))
+
+          ! Update fg field
+          ZQXFG(NCLDQR) = ZQXFG(NCLDQR) - ZEVAP
+
+        END IF
+
+      END IF
+      ! on IEVAPRAIN
+
+      !----------------------------------------------------------------------
+      ! 4.5   EVAPORATION OF SNOW
+      !----------------------------------------------------------------------
+      ! Snow
+      IF (IEVAPSNOW == 1) THEN
+
+        ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+        ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+        ZQE = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSICE(JL, JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+
+        !---------------------------------------------
+        ! humidity in moistest ZCOVPCLR part of domain
+        !---------------------------------------------
+        ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSICE(JL, JK)))
+        LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQS) > ZEPSEC .and. ZQE < ZZRH*ZQSICE(JL, JK)
+
+        IF (LLO1) THEN
+          ! note: zpreclr is a rain flux a
+          ZPRECLR = (ZQXFG(NCLDQS)*ZCOVPCLR) / SIGN(MAX(ABS(ZCOVPTOT*ZDTGDP), ZEPSILON), ZCOVPTOT*ZDTGDP)
+
+          !--------------------------------------
+          ! actual microphysics formula in zbeta
+          !--------------------------------------
+
+          ZBETA1 = ((SQRT(PAP(JL, JK) / PAPH(JL, KLEV + 1)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC)
+
+          ZBETA = RG*YRECLDP%RPECONS*ZBETA1**0.5777_JPRB
+
+          ZDENOM = 1.0_JPRB + ZBETA*PTSPHY*ZCORQSICE
+          ZDPR = ((ZCOVPCLR*ZBETA*(ZQSICE(JL, JK) - ZQE)) / ZDENOM)*ZDP*ZRG_R
+          ZDPEVAP = ZDPR*ZDTGDP
+
+          !---------------------------------------------------------
+          ! add evaporation term to explicit sink.
+          ! this has to be explicit since if treated in the implicit
+          ! term evaporation can not reduce snow to zero and model
+          ! produces small amounts of snowfall everywhere.
+          !---------------------------------------------------------
+
+          ! Evaporate snow
+          ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQS))
+
+          ZSOLQA(NCLDQV, NCLDQS) = ZSOLQA(NCLDQV, NCLDQS) + ZEVAP
+          ZSOLQA(NCLDQS, NCLDQV) = ZSOLQA(NCLDQS, NCLDQV) - ZEVAP
+
+          !-------------------------------------------------------------
+          ! Reduce the total precip coverage proportional to evaporation
+          ! to mimic the previous scheme which had a diagnostic
+          ! 2-flux treatment, abandoned due to the new prognostic precip
+          !-------------------------------------------------------------
+          ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JL, JK))*ZEVAP) / ZQXFG(NCLDQS)))
+
+          !Update first guess field
+          ZQXFG(NCLDQS) = ZQXFG(NCLDQS) - ZEVAP
+
+        END IF
+        !---------------------------------------------------------
+      ELSE IF (IEVAPSNOW == 2) THEN
+
+
+
+        !-----------------------------------------------------------------------
+        ! Calculate relative humidity limit for snow evaporation
+        !-----------------------------------------------------------------------
+        ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+        ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+        ZQE = (ZQX(JL, JK, NCLDQV) - ZA(JL, JK)*ZQSICE(JL, JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JL, JK))
+
+        !---------------------------------------------
+        ! humidity in moistest ZCOVPCLR part of domain
+        !---------------------------------------------
+        ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSICE(JL, JK)))
+        LLO1 = ZCOVPCLR > ZEPSEC .and. ZQX(JL, JK, NCLDQS) > ZEPSEC .and. ZQE < ZZRH*ZQSICE(JL, JK)
+
+        IF (LLO1) THEN
+
+          ! Calculate local precipitation (kg/kg)
+          ZPRECLR = ZQX(JL, JK, NCLDQS) / ZCOVPTOT
+          ZVPICE = (FOEEICE(ZTP1(JL, JK))*RV) / RD
+
+          ! Particle size distribution
+          ! ZTCG increases Ni with colder temperatures - essentially a
+          ! Fletcher or Meyers scheme?
+          ZTCG = 1.0_JPRB            !v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB)
+          ! ZFACX1I modification is based on Andrew Barrett's results
+          ZFACX1S = 1.0_JPRB            !v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB
+
+          ZAPLUSB = YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JL, JK) + PAP(JL, JK)*YRECLDP%RCL_APB3*ZTP1(JL, JK)**3
+          ZCORRFAC = (1.0 / ZRHO)**0.5
+          ZCORRFAC2 = ((ZTP1(JL, JK) / 273.0)**1.5)*(393.0 / (ZTP1(JL, JK) + 120.0))
+
+          ZPR02 = (ZRHO*ZPRECLR*YRECLDP%RCL_CONST1S) / ((ZTCG*ZFACX1S))
+
+          ZTERM1 = ((ZQSICE(JL, JK) - ZQE)*ZTP1(JL, JK)**2*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2S*ZFACX1S) /  &
+          & ((ZRHO*ZAPLUSB*ZQSICE(JL, JK)))
+          ZTERM2 = 0.65*YRECLDP%RCL_CONST6S*ZPR02**YRECLDP%RCL_CONST4S +  &
+          & (YRECLDP%RCL_CONST3S*ZCORRFAC**0.5*ZRHO**0.5*ZPR02**YRECLDP%RCL_CONST5S) / ZCORRFAC2**0.5
+
+          ZDPEVAP = MAX(ZCOVPCLR*ZTERM1*ZTERM2*PTSPHY, 0.0_JPRB)
+
+          !--------------------------------------------------------------------
+          ! Limit evaporation to snow amount
+          !--------------------------------------------------------------------
+          ZEVAP = MIN(ZDPEVAP, ZEVAPLIMICE)
+          ZEVAP = MIN(ZEVAP, ZQX(JL, JK, NCLDQS))
+
+
+          ZSOLQA(NCLDQV, NCLDQS) = ZSOLQA(NCLDQV, NCLDQS) + ZEVAP
+          ZSOLQA(NCLDQS, NCLDQV) = ZSOLQA(NCLDQS, NCLDQV) - ZEVAP
+
+          !-------------------------------------------------------------
+          ! Reduce the total precip coverage proportional to evaporation
+          ! to mimic the previous scheme which had a diagnostic
+          ! 2-flux treatment, abandoned due to the new prognostic precip
+          !-------------------------------------------------------------
+          ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JL, JK))*ZEVAP) / ZQX(JL, JK, NCLDQS)))
+
+          !Update first guess field
+          ZQXFG(NCLDQS) = ZQXFG(NCLDQS) - ZEVAP
+
+        END IF
+
+      END IF
+      ! on IEVAPSNOW
+
+      !--------------------------------------
+      ! Evaporate small precipitation amounts
+      !--------------------------------------
+      DO JM=1,NCLV
+        IF (LLFALL(JM)) THEN
+          IF (ZQXFG(JM) < YRECLDP%RLMIN) THEN
+            ZSOLQA(NCLDQV, JM) = ZSOLQA(NCLDQV, JM) + ZQXFG(JM)
+            ZSOLQA(JM, NCLDQV) = ZSOLQA(JM, NCLDQV) - ZQXFG(JM)
+          END IF
+        END IF
+      END DO
+
+      !######################################################################
+      !            5.0  *** SOLVERS FOR A AND L ***
+      ! now use an implicit solution rather than exact solution
+      ! solver is forward in time, upstream difference for advection
+      !######################################################################
+
+      !---------------------------
+      ! 5.1 solver for cloud cover
+      !---------------------------
+      ZANEW = (ZA(JL, JK) + ZSOLAC) / (1.0_JPRB + ZSOLAB)
+      ZANEW = MIN(ZANEW, 1.0_JPRB)
+      IF (ZANEW < YRECLDP%RAMIN)       ZANEW = 0.0_JPRB
+      ZDA = ZANEW - ZAORIG(JL, JK)
+      !---------------------------------
+      ! variables needed for next level
+      !---------------------------------
+      ZANEWM1 = ZANEW
+
+      !--------------------------------
+      ! 5.2 solver for the microphysics
+      !--------------------------------
+
+      !--------------------------------------------------------------
+      ! Truncate explicit sinks to avoid negatives
+      ! Note: Species are treated in the order in which they run out
+      ! since the clipping will alter the balance for the other vars
+      !--------------------------------------------------------------
+
+      DO JM=1,NCLV
+!$claw nodep
+        DO JN=1,NCLV
+          LLINDEX3(JN, JM) = .false.
+        END DO
+        ZSINKSUM(JM) = 0.0_JPRB
+      END DO
+
+      !----------------------------
+      ! collect sink terms and mark
+      !----------------------------
+      DO JM=1,NCLV
+        DO JN=1,NCLV
+          ZSINKSUM(JM) = ZSINKSUM(JM) - ZSOLQA(JM, JN)            ! +ve total is bad
+        END DO
+      END DO
+
+      !---------------------------------------
+      ! calculate overshoot and scaling factor
+      !---------------------------------------
+      DO JM=1,NCLV
+        ZMAX = MAX(ZQX(JL, JK, JM), ZEPSEC)
+        ZRAT = MAX(ZSINKSUM(JM), ZMAX)
+        ZRATIO(JM) = ZMAX / ZRAT
+      END DO
+
+      !--------------------------------------------
+      ! scale the sink terms, in the correct order,
+      ! recalculating the scale factor each time
+      !--------------------------------------------
+      DO JM=1,NCLV
+        ZSINKSUM(JM) = 0.0_JPRB
+      END DO
+
+      !----------------
+      ! recalculate sum
+      !----------------
+      DO JM=1,NCLV
+        PSUM_SOLQA = 0.0
+        DO JN=1,NCLV
+          PSUM_SOLQA = PSUM_SOLQA + ZSOLQA(JM, JN)
+        END DO
+        ! ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV))
+        ZSINKSUM(JM) = ZSINKSUM(JM) - PSUM_SOLQA
+        !---------------------------
+        ! recalculate scaling factor
+        !---------------------------
+        ZMM = MAX(ZQX(JL, JK, JM), ZEPSEC)
+        ZRR = MAX(ZSINKSUM(JM), ZMM)
+        ZRATIO(JM) = ZMM / ZRR
+        !------
+        ! scale
+        !------
+        ZZRATIO = ZRATIO(JM)
+        !DIR$ IVDEP
+        !DIR$ PREFERVECTOR
+        DO JN=1,NCLV
+          IF (ZSOLQA(JM, JN) < 0.0_JPRB) THEN
+            ZSOLQA(JM, JN) = ZSOLQA(JM, JN)*ZZRATIO
+            ZSOLQA(JN, JM) = ZSOLQA(JN, JM)*ZZRATIO
+          END IF
+        END DO
+      END DO
+
+      !--------------------------------------------------------------
+      ! 5.2.2 Solver
+      !------------------------
+
+      !------------------------
+      ! set the LHS of equation
+      !------------------------
+      DO JM=1,NCLV
+        DO JN=1,NCLV
+          !----------------------------------------------
+          ! diagonals: microphysical sink terms+transport
+          !----------------------------------------------
+          IF (JN == JM) THEN
+            ZQLHS(JN, JM) = 1.0_JPRB + ZFALLSINK(JM)
+            DO JO=1,NCLV
+              ZQLHS(JN, JM) = ZQLHS(JN, JM) + ZSOLQB(JO, JN)
+            END DO
+            !------------------------------------------
+            ! non-diagonals: microphysical source terms
+            !------------------------------------------
+          ELSE
+            ZQLHS(JN, JM) = -ZSOLQB(JN, JM)              ! here is the delta T - missing from doc.
+          END IF
+        END DO
+      END DO
+
+      !------------------------
+      ! set the RHS of equation
+      !------------------------
+      DO JM=1,NCLV
+        !---------------------------------
+        ! sum the explicit source and sink
+        !---------------------------------
+        ZEXPLICIT = 0.0_JPRB
+        DO JN=1,NCLV
+          ZEXPLICIT = ZEXPLICIT + ZSOLQA(JM, JN)            ! sum over middle index
+        END DO
+        ZQXN(JM) = ZQX(JL, JK, JM) + ZEXPLICIT
+      END DO
+
+      !-----------------------------------
+      ! *** solve by LU decomposition: ***
+      !-----------------------------------
+
+      ! Note: This fast way of solving NCLVxNCLV system
+      !       assumes a good behaviour (i.e. non-zero diagonal
+      !       terms with comparable orders) of the matrix stored
+      !       in ZQLHS. For the moment this is the case but
+      !       be aware to preserve it when doing eventual
+      !       modifications.
+
+      ! Non pivoting recursive factorization
+      DO JN=1,NCLV - 1
+        ! number of steps
+        DO JM=JN + 1,NCLV
+          ! row index
+          ZQLHS(JM, JN) = ZQLHS(JM, JN) / ZQLHS(JN, JN)
+          DO IK=JN + 1,NCLV
+            ! column index
+            ZQLHS(JM, IK) = ZQLHS(JM, IK) - ZQLHS(JM, JN)*ZQLHS(JN, IK)
+          END DO
+        END DO
+      END DO
+
+      ! Backsubstitution
+      !  step 1
+      DO JN=2,NCLV
+        DO JM=1,JN - 1
+          ZQXN(JN) = ZQXN(JN) - ZQLHS(JN, JM)*ZQXN(JM)
+        END DO
+      END DO
+      !  step 2
+      ZQXN(NCLV) = ZQXN(NCLV) / ZQLHS(NCLV, NCLV)
+      DO JN=NCLV - 1,1,-1
+        DO JM=JN + 1,NCLV
+          ZQXN(JN) = ZQXN(JN) - ZQLHS(JN, JM)*ZQXN(JM)
+        END DO
+        ZQXN(JN) = ZQXN(JN) / ZQLHS(JN, JN)
+      END DO
+
+      ! Ensure no small values (including negatives) remain in cloud variables nor
+      ! precipitation rates.
+      ! Evaporate l,i,r,s to water vapour. Latent heating taken into account below
+      DO JN=1,NCLV - 1
+        IF (ZQXN(JN) < ZEPSEC) THEN
+          ZQXN(NCLDQV) = ZQXN(NCLDQV) + ZQXN(JN)
+          ZQXN(JN) = 0.0_JPRB
+        END IF
+      END DO
+
+      !--------------------------------
+      ! variables needed for next level
+      !--------------------------------
+      DO JM=1,NCLV
+        ZQXNM1(JM) = ZQXN(JM)
+        ZQXN2D(JL, JK, JM) = ZQXN(JM)
+      END DO
+
+      !------------------------------------------------------------------------
+      ! 5.3 Precipitation/sedimentation fluxes to next level
+      !     diagnostic precipitation fluxes
+      !     It is this scaled flux that must be used for source to next layer
+      !------------------------------------------------------------------------
+
+      DO JM=1,NCLV
+        ZPFPLSX(JL, JK + 1, JM) = ZFALLSINK(JM)*ZQXN(JM)*ZRDTGDP
+      END DO
+
+      ! Ensure precipitation fraction is zero if no precipitation
+      ZQPRETOT = ZPFPLSX(JL, JK + 1, NCLDQS) + ZPFPLSX(JL, JK + 1, NCLDQR)
+      IF (ZQPRETOT < ZEPSEC) THEN
+        ZCOVPTOT = 0.0_JPRB
+      END IF
+
+      !######################################################################
+      !              6  *** UPDATE TENDANCIES ***
+      !######################################################################
+
+      !--------------------------------
+      ! 6.1 Temperature and CLV budgets
+      !--------------------------------
+
+      DO JM=1,NCLV - 1
+
+        ! calculate fluxes in and out of box for conservation of TL
+        ZFLUXQ(JM) = ZPSUPSATSRCE(JM) + ZCONVSRCE(JM) + ZFALLSRCE(JM) - (ZFALLSINK(JM) + ZCONVSINK(JM))*ZQXN(JM)
+
+        IF (IPHASE(JM) == 1) THEN
+          TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) + RALVDCP*(ZQXN(JM) - ZQX(JL, JK, JM) - ZFLUXQ(JM))*ZQTMST
+        END IF
+
+        IF (IPHASE(JM) == 2) THEN
+          TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) + RALSDCP*(ZQXN(JM) - ZQX(JL, JK, JM) - ZFLUXQ(JM))*ZQTMST
+        END IF
+
+        !----------------------------------------------------------------------
+        ! New prognostic tendencies - ice,liquid rain,snow
+        ! Note: CLV arrays use PCLV in calculation of tendency while humidity
+        !       uses ZQX. This is due to clipping at start of cloudsc which
+        !       include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset
+        !----------------------------------------------------------------------
+        TENDENCY_LOC_CLD(JL, JK, JM) = TENDENCY_LOC_CLD(JL, JK, JM) + (ZQXN(JM) - ZQX0(JL, JK, JM))*ZQTMST
+
+      END DO
+
+      !----------------------
+      ! 6.2 Humidity budget
+      !----------------------
+      TENDENCY_LOC_q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + (ZQXN(NCLDQV) - ZQX(JL, JK, NCLDQV))*ZQTMST
+
+      !-------------------
+      ! 6.3 cloud cover
+      !-----------------------
+      TENDENCY_LOC_a(JL, JK) = TENDENCY_LOC_A(JL, JK) + ZDA*ZQTMST
+
+      !--------------------------------------------------
+      ! Copy precipitation fraction into output variable
+      !-------------------------------------------------
+      PCOVPTOT(JL, JK) = ZCOVPTOT
+
+    END DO
+    ! on vertical level JK
+    !----------------------------------------------------------------------
+    !                       END OF VERTICAL LOOP
+    !----------------------------------------------------------------------
+
+    !######################################################################
+    !              8  *** FLUX/DIAGNOSTICS COMPUTATIONS ***
+    !######################################################################
+
+    !--------------------------------------------------------------------
+    ! Copy general precip arrays back into PFP arrays for GRIB archiving
+    ! Add rain and liquid fluxes, ice and snow fluxes
+    !--------------------------------------------------------------------
+!$acc loop seq
+    DO JK=1,KLEV + 1
+      PFPLSL(JL, JK) = ZPFPLSX(JL, JK, NCLDQR) + ZPFPLSX(JL, JK, NCLDQL)
+      PFPLSN(JL, JK) = ZPFPLSX(JL, JK, NCLDQS) + ZPFPLSX(JL, JK, NCLDQI)
+    END DO
+
+    !--------
+    ! Fluxes:
+    !--------
+    PFSQLF(JL, 1) = 0.0_JPRB
+    PFSQIF(JL, 1) = 0.0_JPRB
+    PFSQRF(JL, 1) = 0.0_JPRB
+    PFSQSF(JL, 1) = 0.0_JPRB
+    PFCQLNG(JL, 1) = 0.0_JPRB
+    PFCQNNG(JL, 1) = 0.0_JPRB
+    PFCQRNG(JL, 1) = 0.0_JPRB      !rain
+    PFCQSNG(JL, 1) = 0.0_JPRB      !snow
+    ! fluxes due to turbulence
+    PFSQLTUR(JL, 1) = 0.0_JPRB
+    PFSQITUR(JL, 1) = 0.0_JPRB
+
+!$acc loop seq
+    DO JK=1,KLEV
+
+      ZGDPH_R = -ZRG_R*(PAPH(JL, JK + 1) - PAPH(JL, JK))*ZQTMST
+      PFSQLF(JL, JK + 1) = PFSQLF(JL, JK)
+      PFSQIF(JL, JK + 1) = PFSQIF(JL, JK)
+      PFSQRF(JL, JK + 1) = PFSQLF(JL, JK)
+      PFSQSF(JL, JK + 1) = PFSQIF(JL, JK)
+      PFCQLNG(JL, JK + 1) = PFCQLNG(JL, JK)
+      PFCQNNG(JL, JK + 1) = PFCQNNG(JL, JK)
+      PFCQRNG(JL, JK + 1) = PFCQLNG(JL, JK)
+      PFCQSNG(JL, JK + 1) = PFCQNNG(JL, JK)
+      PFSQLTUR(JL, JK + 1) = PFSQLTUR(JL, JK)
+      PFSQITUR(JL, JK + 1) = PFSQITUR(JL, JK)
+
+      ZALFAW = ZFOEALFA(JL, JK)
+
+      ! Liquid , LS scheme minus detrainment
+      PFSQLF(JL, JK + 1) =  &
+      & PFSQLF(JL, JK + 1) + (ZQXN2D(JL, JK, NCLDQL) - ZQX0(JL, JK, NCLDQL) + PVFL(JL, JK)*PTSPHY - ZALFAW*PLUDE(JL, JK))*ZGDPH_R
+      ! liquid, negative numbers
+      PFCQLNG(JL, JK + 1) = PFCQLNG(JL, JK + 1) + ZLNEG(JL, JK, NCLDQL)*ZGDPH_R
+
+      ! liquid, vertical diffusion
+      PFSQLTUR(JL, JK + 1) = PFSQLTUR(JL, JK + 1) + PVFL(JL, JK)*PTSPHY*ZGDPH_R
+
+      ! Rain, LS scheme
+      PFSQRF(JL, JK + 1) = PFSQRF(JL, JK + 1) + (ZQXN2D(JL, JK, NCLDQR) - ZQX0(JL, JK, NCLDQR))*ZGDPH_R
+      ! rain, negative numbers
+      PFCQRNG(JL, JK + 1) = PFCQRNG(JL, JK + 1) + ZLNEG(JL, JK, NCLDQR)*ZGDPH_R
+
+      ! Ice , LS scheme minus detrainment
+      PFSQIF(JL, JK + 1) = PFSQIF(JL, JK + 1) + (ZQXN2D(JL, JK, NCLDQI) - ZQX0(JL, JK, NCLDQI) + PVFI(JL, JK)*PTSPHY - (1.0_JPRB  &
+      & - ZALFAW)*PLUDE(JL, JK))*ZGDPH_R
+      ! ice, negative numbers
+      PFCQNNG(JL, JK + 1) = PFCQNNG(JL, JK + 1) + ZLNEG(JL, JK, NCLDQI)*ZGDPH_R
+
+      ! ice, vertical diffusion
+      PFSQITUR(JL, JK + 1) = PFSQITUR(JL, JK + 1) + PVFI(JL, JK)*PTSPHY*ZGDPH_R
+
+      ! snow, LS scheme
+      PFSQSF(JL, JK + 1) = PFSQSF(JL, JK + 1) + (ZQXN2D(JL, JK, NCLDQS) - ZQX0(JL, JK, NCLDQS))*ZGDPH_R
+      ! snow, negative numbers
+      PFCQSNG(JL, JK + 1) = PFCQSNG(JL, JK + 1) + ZLNEG(JL, JK, NCLDQS)*ZGDPH_R
+    END DO
+
+    !-----------------------------------
+    ! enthalpy flux due to precipitation
+    !-----------------------------------
+!$acc loop seq
+    DO JK=1,KLEV + 1
+      PFHPSL(JL, JK) = -RLVTT*PFPLSL(JL, JK)
+      PFHPSN(JL, JK) = -RLSTT*PFPLSN(JL, JK)
+    END DO
+
+    !===============================================================================
+    !IF (LHOOK) CALL DR_HOOK('CLOUDSC',1,ZHOOK_HANDLE)
+  END SUBROUTINE CLOUDSC_SCC_HOIST
+END MODULE CLOUDSC_GPU_OMP_SCC_HOIST_MOD
diff --git a/src/cloudsc_gpu/cloudsc_gpu_scc_cuf_mod.F90 b/src/cloudsc_gpu/cloudsc_gpu_scc_cuf_mod.F90
new file mode 100644
index 00000000..8d1fe2e0
--- /dev/null
+++ b/src/cloudsc_gpu/cloudsc_gpu_scc_cuf_mod.F90
@@ -0,0 +1,2782 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+MODULE CLOUDSC_GPU_SCC_CUF_MOD
+  USE NLEV_MOD, ONLY : NLEV
+  
+CONTAINS
+ATTRIBUTES(GLOBAL)  SUBROUTINE CLOUDSC_SCC_CUF (KIDIA, KFDIA, KLON, KGPBLKS, PTSPHY, PT, PQ,  & !! 8
+  & TENDENCY_TMP, TENDENCY_LOC, PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI,  & !! 16
+  & PHRSW, PHRLW, PVERVEL, PAP, PAPH, PLSM, LDCUM, KTYPE, PLU, PLUDE, PSNDE, PMFU, PMFD, PA, PCLV, PSUPSAT, PLCRIT_AER,  & !! 33
+  & PICRIT_AER, PRE_ICE, PCCN, PNICE, PCOVPTOT, PRAINFRAC_TOPRFZ, PFSQLF, PFSQIF, PFCQNNG, PFCQLNG, PFSQRF, PFSQSF, PFCQRNG,  & !! 46
+  & PFCQSNG, PFSQLTUR, PFSQITUR, PFPLSL, PFPLSN, PFHPSL, PFHPSN, YRECLDP )
+    !---input
+    !---prognostic fields
+    !-- arrays for aerosol-cloud interactions
+    !!! & PQAER,    KAER, &
+    !---diagnostic output
+    !---resulting fluxes
+    
+    !===============================================================================
+    !**** *CLOUDSC* -  ROUTINE FOR PARAMATERIZATION OF CLOUD PROCESSES
+    !                  FOR PROGNOSTIC CLOUD SCHEME
+    !!
+    !     M.Tiedtke, C.Jakob, A.Tompkins, R.Forbes     (E.C.M.W.F.)
+    !!
+    !     PURPOSE
+    !     -------
+    !          THIS ROUTINE UPDATES THE CONV/STRAT CLOUD FIELDS.
+    !          THE FOLLOWING PROCESSES ARE CONSIDERED:
+    !        - Detrainment of cloud water from convective updrafts
+    !        - Evaporation/condensation of cloud water in connection
+    !           with heating/cooling such as by subsidence/ascent
+    !        - Erosion of clouds by turbulent mixing of cloud air
+    !           with unsaturated environmental air
+    !        - Deposition onto ice when liquid water present (Bergeron-Findeison)
+    !        - Conversion of cloud water into rain (collision-coalescence)
+    !        - Conversion of cloud ice to snow (aggregation)
+    !        - Sedimentation of rain, snow and ice
+    !        - Evaporation of rain and snow
+    !        - Melting of snow and ice
+    !        - Freezing of liquid and rain
+    !        Note: Turbulent transports of s,q,u,v at cloud tops due to
+    !           buoyancy fluxes and lw radiative cooling are treated in
+    !           the VDF scheme
+    !!
+    !     INTERFACE.
+    !     ----------
+    !          *CLOUDSC* IS CALLED FROM *CALLPAR*
+    !     THE ROUTINE TAKES ITS INPUT FROM THE LONG-TERM STORAGE:
+    !     T,Q,L,PHI AND DETRAINMENT OF CLOUD WATER FROM THE
+    !     CONVECTIVE CLOUDS (MASSFLUX CONVECTION SCHEME), BOUNDARY
+    !     LAYER TURBULENT FLUXES OF HEAT AND MOISTURE, RADIATIVE FLUXES,
+    !     OMEGA.
+    !     IT RETURNS ITS OUTPUT TO:
+    !      1.MODIFIED TENDENCIES OF MODEL VARIABLES T AND Q
+    !        AS WELL AS CLOUD VARIABLES L AND C
+    !      2.GENERATES PRECIPITATION FLUXES FROM STRATIFORM CLOUDS
+    !!
+    !     EXTERNALS.
+    !     ----------
+    !          NONE
+    !!
+    !     MODIFICATIONS.
+    !     -------------
+    !      M. TIEDTKE    E.C.M.W.F.     8/1988, 2/1990
+    !     CH. JAKOB      E.C.M.W.F.     2/1994 IMPLEMENTATION INTO IFS
+    !     A.TOMPKINS     E.C.M.W.F.     2002   NEW NUMERICS
+    !        01-05-22 : D.Salmond   Safety modifications
+    !        02-05-29 : D.Salmond   Optimisation
+    !        03-01-13 : J.Hague     MASS Vector Functions  J.Hague
+    !        03-10-01 : M.Hamrud    Cleaning
+    !        04-12-14 : A.Tompkins  New implicit solver and physics changes
+    !        04-12-03 : A.Tompkins & M.Ko"hler  moist PBL
+    !     G.Mozdzynski  09-Jan-2006  EXP security fix
+    !        19-01-09 : P.Bechtold  Changed increased RCLDIFF value for KTYPE=2
+    !        07-07-10 : A.Tompkins/R.Forbes  4-Phase flexible microphysics
+    !        01-03-11 : R.Forbes    Mixed phase changes and tidy up
+    !        01-10-11 : R.Forbes    Melt ice to rain, allow rain to freeze
+    !        01-10-11 : R.Forbes    Limit supersat to avoid excessive values
+    !        31-10-11 : M.Ahlgrimm  Add rain, snow and PEXTRA to DDH output
+    !        17-02-12 : F.Vana      Simplified/optimized LU factorization
+    !        18-05-12 : F.Vana      Cleaning + better support of sequential physics
+    !        N.Semane+P.Bechtold     04-10-2012 Add RVRFACTOR factor for small planet
+    !        01-02-13 : R.Forbes    New params of autoconv/acc,rain evap,snow riming
+    !        15-03-13 : F. Vana     New dataflow + more tendencies from the first call
+    !        K. Yessad (July 2014): Move some variables.
+    !        F. Vana  05-Mar-2015  Support for single precision
+    !        15-01-15 : R.Forbes    Added new options for snow evap & ice deposition
+    !        10-01-15 : R.Forbes    New physics for rain freezing
+    !        23-10-14 : P. Bechtold remove zeroing of convection arrays
+    !
+    !     SWITCHES.
+    !     --------
+    !!
+    !     MODEL PARAMETERS
+    !     ----------------
+    !     RCLDIFF:    PARAMETER FOR EROSION OF CLOUDS
+    !     RCLCRIT_SEA:  THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER SEA
+    !     RCLCRIT_LAND: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER LAND
+    !     RLCRITSNOW: THRESHOLD VALUE FOR SNOW AUTOCONVERSION
+    !     RKCONV:     PARAMETER FOR AUTOCONVERSION OF CLOUDS (KESSLER)
+    !     RCLDMAX:    MAXIMUM POSSIBLE CLW CONTENT (MASON,1971)
+    !!
+    !     REFERENCES.
+    !     ----------
+    !     TIEDTKE MWR 1993
+    !     JAKOB PhD 2000
+    !     GREGORY ET AL. QJRMS 2000
+    !     TOMPKINS ET AL. QJRMS 2007
+    !!
+    !===============================================================================
+    
+    USE CUDAFOR
+
+    USE PARKIND1, ONLY: JPIM, JPRB
+    USE YOMPHYDER, ONLY: state_type
+
+    USE YOMCST_CUF, ONLY: RG=>RG_D, RD=>RD_D, RCPD=>RCPD_D, RETV=>RETV_D, &
+     & RLVTT=>RLVTT_D, RLSTT=>RLSTT_D, RLMLT=>RLMLT_D, RTT=>RTT_D, RV=>RV_D
+    USE YOETHF_CUF,  &
+     & R2ES=>R2ES_D, R3LES=>R3LES_D, R3IES=>R3IES_D, R4LES=>R4LES_D,  &
+     & R4IES=>R4IES_D, R5LES=>R5LES_D, R5IES=>R5IES_D,  &
+     & R5ALVCP=>R5ALVCP_D, R5ALSCP=>R5ALSCP_D, RALVDCP=>RALVDCP_D,  &
+     & RALSDCP=>RALSDCP_D, RALFDCP=>RALFDCP_D, RTWAT=>RTWAT_D, RTICE=>RTICE_D, RTICECU=>RTICECU_D,  &
+     & RTWAT_RTICE_R=>RTWAT_RTICE_R_D, RTWAT_RTICECU_R=>RTWAT_RTICECU_R_D, RKOOP1=>RKOOP1_D, RKOOP2=>RKOOP2
+    USE YOECLDP, ONLY : NCLV, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, TECLDP
+    ! USE YOECLDP_CUF
+    
+    USE FCTTRE_CUF_MOD, ONLY: FOEDELTA, FOEALFA, FOEEWM, FOEEICE, FOEELIQ, FOELDCP, FOELDCPM, FOEDEM
+    USE FCCLD_CUF_MOD, ONLY : FOKOOP
+    
+    
+    
+    IMPLICIT NONE
+    
+    !-------------------------------------------------------------------------------
+    !                 Declare input/output arguments
+    !-------------------------------------------------------------------------------
+    
+    ! PLCRIT_AER : critical liquid mmr for rain autoconversion process
+    ! PICRIT_AER : critical liquid mmr for snow autoconversion process
+    ! PRE_LIQ : liq Re
+    ! PRE_ICE : ice Re
+    ! PCCN    : liquid cloud condensation nuclei
+    ! PNICE   : ice number concentration (cf. CCN)
+    
+    INTEGER(KIND=JPIM), INTENT(IN), VALUE :: KLON    ! Number of grid points
+    INTEGER(KIND=JPIM), INTENT(IN), VALUE :: KGPBLKS ! Number of levels
+    INTEGER(KIND=JPIM), INTENT(IN), VALUE :: KIDIA
+    INTEGER(KIND=JPIM), INTENT(IN), VALUE :: KFDIA
+    REAL(KIND=JPRB), INTENT(IN), VALUE :: PTSPHY    ! Physics timestep
+
+    REAL(KIND=JPRB), INTENT(IN) :: PLCRIT_AER(KLON, NLEV,KGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN) :: PICRIT_AER(KLON, NLEV,KGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN) :: PRE_ICE(KLON, NLEV,KGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN) :: PCCN(KLON, NLEV,KGPBLKS)    ! liquid cloud condensation nuclei
+    REAL(KIND=JPRB), INTENT(IN) :: PNICE(KLON, NLEV,KGPBLKS)
+    ! ice number concentration (cf. CCN)
+    
+    REAL(KIND=JPRB), INTENT(IN) :: PT(KLON, NLEV,KGPBLKS)    ! T at start of callpar
+    REAL(KIND=JPRB), INTENT(IN) :: PQ(KLON, NLEV,KGPBLKS)    ! Q at start of callpar
+    REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP(KLON, NLEV, NCLV+3,KGPBLKS)
+    REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC(KLON, NLEV, NCLV+3,KGPBLKS)
+    REAL(KIND=JPRB), INTENT(IN) :: PVFA(KLON, NLEV,KGPBLKS)    ! CC from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN) :: PVFL(KLON, NLEV,KGPBLKS)    ! Liq from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN) :: PVFI(KLON, NLEV,KGPBLKS)    ! Ice from VDF scheme
+    REAL(KIND=JPRB), INTENT(IN) :: PDYNA(KLON, NLEV,KGPBLKS)    ! CC from Dynamics
+    REAL(KIND=JPRB), INTENT(IN) :: PDYNL(KLON, NLEV,KGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN) :: PDYNI(KLON, NLEV,KGPBLKS)    ! Liq from Dynamics
+    REAL(KIND=JPRB), INTENT(IN) :: PHRSW(KLON, NLEV,KGPBLKS)    ! Short-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN) :: PHRLW(KLON, NLEV,KGPBLKS)    ! Long-wave heating rate
+    REAL(KIND=JPRB), INTENT(IN) :: PVERVEL(KLON, NLEV,KGPBLKS)    !Vertical velocity
+    REAL(KIND=JPRB), INTENT(IN) :: PAP(KLON, NLEV,KGPBLKS)    ! Pressure on full levels
+    REAL(KIND=JPRB), INTENT(IN) :: PAPH(KLON, NLEV + 1,KGPBLKS)    ! Pressure on half levels
+    REAL(KIND=JPRB), INTENT(IN) :: PLSM(KLON,KGPBLKS)    ! Land fraction (0-1)
+    LOGICAL, INTENT(IN) :: LDCUM(KLON,KGPBLKS)    ! Convection active
+    INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(KLON,KGPBLKS)    ! Convection type 0,1,2
+    REAL(KIND=JPRB), INTENT(IN) :: PLU(KLON, NLEV,KGPBLKS)    ! Conv. condensate
+    REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(KLON, NLEV,KGPBLKS)    ! Conv. detrained water
+    REAL(KIND=JPRB), INTENT(IN) :: PSNDE(KLON, NLEV,KGPBLKS)    ! Conv. detrained snow
+    REAL(KIND=JPRB), INTENT(IN) :: PMFU(KLON, NLEV,KGPBLKS)    ! Conv. mass flux up
+    REAL(KIND=JPRB), INTENT(IN) :: PMFD(KLON, NLEV,KGPBLKS)    ! Conv. mass flux down
+    REAL(KIND=JPRB), INTENT(IN) :: PA(KLON, NLEV,KGPBLKS)
+    ! Original Cloud fraction (t)
+    
+    REAL(KIND=JPRB), INTENT(IN) :: PCLV(KLON, NLEV, NCLV,KGPBLKS)
+    
+    ! Supersat clipped at previous time level in SLTEND
+    REAL(KIND=JPRB), INTENT(IN) :: PSUPSAT(KLON, NLEV,KGPBLKS)
+    REAL(KIND=JPRB), INTENT(OUT) :: PCOVPTOT(KLON, NLEV, KGPBLKS)    ! Precip fraction
+    REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(KLON, KGPBLKS)
+    ! Flux diagnostics for DDH budget
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(KLON, NLEV + 1, KGPBLKS)    ! Flux of liquid
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(KLON, NLEV + 1, KGPBLKS)    ! Flux of ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(KLON, NLEV + 1, KGPBLKS)    ! -ve corr for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(KLON, NLEV + 1, KGPBLKS)    ! -ve corr for ice
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(KLON, NLEV + 1, KGPBLKS)    ! Flux diagnostics
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(KLON, NLEV + 1, KGPBLKS)    !    for DDH, generic
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(KLON, NLEV + 1, KGPBLKS)    ! rain
+    REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(KLON, NLEV + 1, KGPBLKS)    ! snow
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(KLON, NLEV + 1, KGPBLKS)    ! liquid flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(KLON, NLEV + 1, KGPBLKS)    ! ice flux due to VDF
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(KLON, NLEV + 1, KGPBLKS)    ! liq+rain sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(KLON, NLEV + 1, KGPBLKS)    ! ice+snow sedim flux
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(KLON, NLEV + 1, KGPBLKS)    ! Enthalpy flux for liq
+    REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(KLON, NLEV + 1, KGPBLKS)
+
+    TYPE(TECLDP), INTENT(IN) :: YRECLDP
+    ! Enthalp flux for ice
+    
+    
+    !-------------------------------------------------------------------------------
+    !                       Declare local variables
+    !-------------------------------------------------------------------------------
+    
+    REAL(KIND=JPRB) :: ZLCOND1, ZLCOND2, ZLEVAP, ZLEROS, ZLEVAPL, ZLEVAPI, ZRAINAUT, ZSNOWAUT, ZLIQCLD, ZICECLD
+    !  condensation and evaporation terms
+    ! autoconversion terms
+    REAL(KIND=JPRB) :: ZFOKOOP
+    REAL(KIND=JPRB), device :: ZFOEALFA(NLEV + 1)
+    REAL(KIND=JPRB) :: ZICENUCLEI
+    ! number concentration of ice nuclei
+    
+    REAL(KIND=JPRB) :: ZLICLD
+    REAL(KIND=JPRB) :: ZACOND
+    REAL(KIND=JPRB) :: ZAEROS
+    REAL(KIND=JPRB) :: ZLFINALSUM
+    REAL(KIND=JPRB) :: ZDQS
+    REAL(KIND=JPRB) :: ZTOLD
+    REAL(KIND=JPRB) :: ZQOLD
+    REAL(KIND=JPRB) :: ZDTGDP
+    REAL(KIND=JPRB) :: ZRDTGDP
+    REAL(KIND=JPRB) :: ZTRPAUS
+    REAL(KIND=JPRB) :: ZCOVPCLR
+    REAL(KIND=JPRB) :: ZPRECLR
+    REAL(KIND=JPRB) :: ZCOVPTOT
+    REAL(KIND=JPRB) :: ZCOVPMAX
+    REAL(KIND=JPRB) :: ZQPRETOT
+    REAL(KIND=JPRB) :: ZDPEVAP
+    REAL(KIND=JPRB) :: ZDTFORC
+    REAL(KIND=JPRB) :: ZDTDIAB
+    REAL(KIND=JPRB), device:: ZTP1(NLEV)
+    REAL(KIND=JPRB) :: ZLDEFR
+    REAL(KIND=JPRB) :: ZLDIFDT
+    REAL(KIND=JPRB) :: ZDTGDPF
+    REAL(KIND=JPRB), device :: ZLCUST(NCLV)
+    REAL(KIND=JPRB) :: ZACUST
+    REAL(KIND=JPRB) :: ZMF
+    
+    REAL(KIND=JPRB) :: ZRHO
+    REAL(KIND=JPRB) :: ZTMP1, ZTMP2, ZTMP3
+    REAL(KIND=JPRB) :: ZTMP4, ZTMP5, ZTMP6, ZTMP7
+    REAL(KIND=JPRB) :: ZALFAWM
+    
+    ! Accumulators of A,B,and C factors for cloud equations
+    REAL(KIND=JPRB) :: ZSOLAB    ! -ve implicit CC
+    REAL(KIND=JPRB) :: ZSOLAC    ! linear CC
+    REAL(KIND=JPRB) :: ZANEW
+    REAL(KIND=JPRB) :: ZANEWM1
+    
+    REAL(KIND=JPRB) :: ZGDP
+    
+    !---for flux calculation
+    REAL(KIND=JPRB) :: ZDA
+    REAL(KIND=JPRB), device :: ZLI(NLEV), ZA(NLEV)
+    REAL(KIND=JPRB), device :: ZAORIG(NLEV)
+    ! start of scheme value for CC
+    
+    LOGICAL :: LLFLAG
+    LOGICAL :: LLO1
+    
+    INTEGER(KIND=JPIM) :: ICALL, IK, JK, JL, JBLK, JM, JN, JO, JLEN, IS
+    
+    REAL(KIND=JPRB) :: ZDP, ZPAPHD
+    
+    REAL(KIND=JPRB) :: ZALFA
+    ! & ZALFACU, ZALFALS
+    REAL(KIND=JPRB) :: ZALFAW
+    REAL(KIND=JPRB) :: ZBETA, ZBETA1
+    !REAL(KIND=JPRB) :: ZBOTT
+    REAL(KIND=JPRB) :: ZCFPR
+    REAL(KIND=JPRB) :: ZCOR
+    REAL(KIND=JPRB) :: ZCDMAX
+    REAL(KIND=JPRB) :: ZMIN
+    REAL(KIND=JPRB) :: ZLCONDLIM
+    REAL(KIND=JPRB) :: ZDENOM
+    REAL(KIND=JPRB) :: ZDPMXDT
+    REAL(KIND=JPRB) :: ZDPR
+    REAL(KIND=JPRB) :: ZDTDP
+    REAL(KIND=JPRB) :: ZE
+    REAL(KIND=JPRB) :: ZEPSEC
+    REAL(KIND=JPRB) :: ZFAC, ZFACI, ZFACW
+    REAL(KIND=JPRB) :: ZGDCP
+    REAL(KIND=JPRB) :: ZINEW
+    REAL(KIND=JPRB) :: ZLCRIT
+    REAL(KIND=JPRB) :: ZMFDN
+    REAL(KIND=JPRB) :: ZPRECIP
+    REAL(KIND=JPRB) :: ZQE
+    REAL(KIND=JPRB) :: ZQSAT, ZQTMST, ZRDCP
+    REAL(KIND=JPRB) :: ZRHC, ZSIG, ZSIGK
+    REAL(KIND=JPRB) :: ZWTOT
+    REAL(KIND=JPRB) :: ZZCO, ZZDL, ZZRH, ZZZDT, ZQADJ
+    REAL(KIND=JPRB) :: ZQNEW, ZTNEW
+    REAL(KIND=JPRB) :: ZRG_R, ZGDPH_R, ZCONS1, ZCOND, ZCONS1A
+    REAL(KIND=JPRB) :: ZLFINAL
+    REAL(KIND=JPRB) :: ZMELT
+    REAL(KIND=JPRB) :: ZEVAP
+    REAL(KIND=JPRB) :: ZFRZ
+    REAL(KIND=JPRB) :: ZVPLIQ, ZVPICE
+    REAL(KIND=JPRB) :: ZADD, ZBDD, ZCVDS, ZICE0, ZDEPOS
+    REAL(KIND=JPRB) :: ZSUPSAT
+    REAL(KIND=JPRB) :: ZFALL
+    REAL(KIND=JPRB) :: ZRE_ICE
+    REAL(KIND=JPRB) :: ZRLDCP
+    REAL(KIND=JPRB) :: ZQP1ENV
+    
+    !----------------------------
+    ! Arrays for new microphysics
+    !----------------------------
+    INTEGER(KIND=JPIM), device :: IPHASE(NCLV)
+    ! marker for water phase of each species
+    ! 0=vapour, 1=liquid, 2=ice
+    
+    INTEGER(KIND=JPIM), device :: IMELT(NCLV)
+    ! marks melting linkage for ice categories
+    ! ice->liquid, snow->rain
+    
+    LOGICAL :: LLFALL(NCLV)
+    ! marks falling species
+    ! LLFALL=0, cloud cover must > 0 for zqx > 0
+    ! LLFALL=1, no cloud needed, zqx can evaporate
+    
+    LOGICAL, device :: LLINDEX1(NCLV)    ! index variable
+    LOGICAL, device :: LLINDEX3(NCLV, NCLV)    ! index variable
+    REAL(KIND=JPRB) :: ZMAX
+    REAL(KIND=JPRB) :: ZRAT
+    INTEGER(KIND=JPIM), device :: IORDER(NCLV)
+    ! array for sorting explicit terms
+    
+    REAL(KIND=JPRB), device :: ZLIQFRAC(NLEV)    ! cloud liquid water fraction: ql/(ql+qi)
+    REAL(KIND=JPRB), device :: ZICEFRAC(NLEV)    ! cloud ice water fraction: qi/(ql+qi)
+    REAL(KIND=JPRB), device:: ZQX(NLEV, NCLV)    ! water variables
+    REAL(KIND=JPRB), device:: ZQX0(NLEV, NCLV)    ! water variables at start of scheme
+    REAL(KIND=JPRB), device :: ZQXN(NCLV)    ! new values for zqx at time+1
+    REAL(KIND=JPRB), device :: ZQXFG(NCLV)    ! first guess values including precip
+    REAL(KIND=JPRB), device :: ZQXNM1(NCLV)    ! new values for zqx at time+1 at level above
+    REAL(KIND=JPRB), device :: ZFLUXQ(NCLV)
+    ! fluxes convergence of species (needed?)
+    ! Keep the following for possible future total water variance scheme?
+    !REAL(KIND=JPRB) :: ZTL(KLON,NLEV)       ! liquid water temperature
+    !REAL(KIND=JPRB) :: ZABETA(KLON,NLEV)    ! cloud fraction
+    !REAL(KIND=JPRB) :: ZVAR(KLON,NLEV)      ! temporary variance
+    !REAL(KIND=JPRB) :: ZQTMIN(KLON,NLEV)
+    !REAL(KIND=JPRB) :: ZQTMAX(KLON,NLEV)
+    
+    REAL(KIND=JPRB), device :: ZPFPLSX(NLEV + 1, NCLV)    ! generalized precipitation flux
+    REAL(KIND=JPRB), device :: ZLNEG(NLEV, NCLV)    ! for negative correction diagnostics
+    REAL(KIND=JPRB) :: ZMELTMAX
+    REAL(KIND=JPRB) :: ZFRZMAX
+    REAL(KIND=JPRB) :: ZICETOT
+    
+    REAL(KIND=JPRB), device :: ZQXN2D(NLEV, NCLV)
+    ! water variables store
+    
+    REAL(KIND=JPRB), device :: ZQSMIX(NLEV)
+    ! diagnostic mixed phase saturation
+    !REAL(KIND=JPRB) :: ZQSBIN(KLON,NLEV) ! binary switched ice/liq saturation
+    REAL(KIND=JPRB), device :: ZQSLIQ(NLEV)    ! liquid water saturation
+    REAL(KIND=JPRB), device :: ZQSICE(NLEV)
+    ! ice water saturation
+    
+    !REAL(KIND=JPRB) :: ZRHM(KLON,NLEV) ! diagnostic mixed phase RH
+    !REAL(KIND=JPRB) :: ZRHL(KLON,NLEV) ! RH wrt liq
+    !REAL(KIND=JPRB) :: ZRHI(KLON,NLEV) ! RH wrt ice
+    
+    REAL(KIND=JPRB), device :: ZFOEEWMT(NLEV)
+    REAL(KIND=JPRB), device :: ZFOEEW(NLEV)
+    REAL(KIND=JPRB), device :: ZFOEELIQT(NLEV)
+    !REAL(KIND=JPRB) :: ZFOEEICET(KLON,NLEV)
+    
+    REAL(KIND=JPRB) :: ZDQSLIQDT, ZDQSICEDT, ZDQSMIXDT
+    REAL(KIND=JPRB) :: ZCORQSLIQ
+    REAL(KIND=JPRB) :: ZCORQSICE
+    !REAL(KIND=JPRB) :: ZCORQSBIN(KLON)
+    REAL(KIND=JPRB) :: ZCORQSMIX
+    REAL(KIND=JPRB) :: ZEVAPLIMLIQ, ZEVAPLIMICE, ZEVAPLIMMIX
+    
+    !-------------------------------------------------------
+    ! SOURCE/SINK array for implicit and explicit terms
+    !-------------------------------------------------------
+    ! a POSITIVE value entered into the arrays is a...
+    !            Source of this variable
+    !            |
+    !            |   Sink of this variable
+    !            |   |
+    !            V   V
+    ! ZSOLQA(JL,IQa,IQb)  = explicit terms
+    ! ZSOLQB(JL,IQa,IQb)  = implicit terms
+    ! Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is
+    ! a source of NCLDQL and a sink of IQV
+    ! put 'magic' source terms such as PLUDE from
+    ! detrainment into explicit source/sink array diagnognal
+    ! ZSOLQA(NCLDQL,NCLDQL)= -PLUDE
+    ! i.e. A positive value is a sink!????? weird...
+    !-------------------------------------------------------
+    
+    REAL(KIND=JPRB), device :: ZSOLQA(NCLV, NCLV)    ! explicit sources and sinks
+    REAL(KIND=JPRB), device :: ZSOLQB(NCLV, NCLV)
+    ! implicit sources and sinks
+    ! e.g. microphysical pathways between ice variables.
+    REAL(KIND=JPRB), device :: ZQLHS(NCLV, NCLV)    ! n x n matrix storing the LHS of implicit solver
+    REAL(KIND=JPRB), device :: ZVQX(NCLV)    ! fall speeds of three categories
+    REAL(KIND=JPRB), device :: ZEXPLICIT, ZRATIO(NCLV), ZSINKSUM(NCLV)
+    
+    ! for sedimentation source/sink terms
+    REAL(KIND=JPRB), device :: ZFALLSINK(NCLV)
+    REAL(KIND=JPRB), device :: ZFALLSRCE(NCLV)
+    
+    ! for convection detrainment source and subsidence source/sink terms
+    REAL(KIND=JPRB), device :: ZCONVSRCE(NCLV)
+    REAL(KIND=JPRB), device :: ZCONVSINK(NCLV)
+    
+    ! for supersaturation source term from previous timestep
+    REAL(KIND=JPRB), device :: ZPSUPSATSRCE(NCLV)
+    
+    ! Numerical fit to wet bulb temperature
+    REAL(KIND=JPRB), PARAMETER :: ZTW1 = 1329.31_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW2 = 0.0074615_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW3 = 0.85E5_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW4 = 40.637_JPRB
+    REAL(KIND=JPRB), PARAMETER :: ZTW5 = 275.0_JPRB
+    
+    REAL(KIND=JPRB) :: ZSUBSAT    ! Subsaturation for snow melting term
+    REAL(KIND=JPRB) :: ZTDMTW0
+    ! Diff between dry-bulb temperature and
+    ! temperature when wet-bulb = 0degC
+    
+    ! Variables for deposition term
+    REAL(KIND=JPRB) :: ZTCG    ! Temperature dependent function for ice PSD
+    REAL(KIND=JPRB) :: ZFACX1I, ZFACX1S    ! PSD correction factor
+    REAL(KIND=JPRB) :: ZAPLUSB, ZCORRFAC, ZCORRFAC2, ZPR02, ZTERM1, ZTERM2    ! for ice dep
+    REAL(KIND=JPRB) :: ZCLDTOPDIST    ! Distance from cloud top
+    REAL(KIND=JPRB) :: ZINFACTOR
+    ! No. of ice nuclei factor for deposition
+    
+    ! Autoconversion/accretion/riming/evaporation
+    INTEGER(KIND=JPIM) :: IWARMRAIN
+    INTEGER(KIND=JPIM) :: IEVAPRAIN
+    INTEGER(KIND=JPIM) :: IEVAPSNOW
+    INTEGER(KIND=JPIM) :: IDEPICE
+    REAL(KIND=JPRB) :: ZRAINACC
+    REAL(KIND=JPRB) :: ZRAINCLD
+    REAL(KIND=JPRB) :: ZSNOWRIME
+    REAL(KIND=JPRB) :: ZSNOWCLD
+    REAL(KIND=JPRB) :: ZESATLIQ
+    REAL(KIND=JPRB) :: ZFALLCORR
+    REAL(KIND=JPRB) :: ZLAMBDA
+    REAL(KIND=JPRB) :: ZEVAP_DENOM
+    REAL(KIND=JPRB) :: ZCORR2
+    REAL(KIND=JPRB) :: ZKA
+    REAL(KIND=JPRB) :: ZCONST
+    REAL(KIND=JPRB) :: ZTEMP
+    
+    ! Rain freezing
+    LOGICAL :: LLRAINLIQ
+    ! True if majority of raindrops are liquid (no ice core)
+    
+    !----------------------------
+    ! End: new microphysics
+    !----------------------------
+    
+    !----------------------
+    ! SCM budget statistics
+    !----------------------
+    REAL(KIND=JPRB) :: ZRAIN
+    
+    REAL(KIND=JPRB) :: ZHOOK_HANDLE
+    REAL(KIND=JPRB) :: ZTMPL, ZTMPI, ZTMPA
+    
+    REAL(KIND=JPRB) :: ZMM, ZRR
+    REAL(KIND=JPRB) :: ZRG
+    
+    REAL(KIND=JPRB) :: ZZSUM, ZZRATIO
+    REAL(KIND=JPRB) :: ZEPSILON
+    
+    REAL(KIND=JPRB) :: ZCOND1, ZQP
+    
+    REAL(KIND=JPRB) :: PSUM_SOLQA
+    
+    
+    
+#ifdef CLOUDSC_STMT_FUNC
+#include "fcttre.func.h"
+#include "fccld.func.h"
+#endif
+    
+      JL=THREADIDX%X
+      JBLK=BLOCKIDX%Z
+      
+      
+      !===============================================================================
+      !IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE)
+      
+      !===============================================================================
+      !  0.0     Beginning of timestep book-keeping
+      !----------------------------------------------------------------------
+      
+      
+      !######################################################################
+      !             0.  *** SET UP CONSTANTS ***
+      !######################################################################
+      
+      ZEPSILON = 100._JPRB*EPSILON(ZEPSILON)
+      
+      ! ---------------------------------------------------------------------
+      ! Set version of warm-rain autoconversion/accretion
+      ! IWARMRAIN = 1 ! Sundquist
+      ! IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000)
+      ! ---------------------------------------------------------------------
+      IWARMRAIN = 2
+      ! ---------------------------------------------------------------------
+      ! Set version of rain evaporation
+      ! IEVAPRAIN = 1 ! Sundquist
+      ! IEVAPRAIN = 2 ! Abel and Boutle (2013)
+      ! ---------------------------------------------------------------------
+      IEVAPRAIN = 2
+      ! ---------------------------------------------------------------------
+      ! Set version of snow evaporation
+      ! IEVAPSNOW = 1 ! Sundquist
+      ! IEVAPSNOW = 2 ! New
+      ! ---------------------------------------------------------------------
+      IEVAPSNOW = 1
+      ! ---------------------------------------------------------------------
+      ! Set version of ice deposition
+      ! IDEPICE = 1 ! Rotstayn (2001)
+      ! IDEPICE = 2 ! New
+      ! ---------------------------------------------------------------------
+      IDEPICE = 1
+      
+      ! ---------------------
+      ! Some simple constants
+      ! ---------------------
+      ZQTMST = 1.0_JPRB / PTSPHY
+      ZGDCP = RG / RCPD
+      Zrdcp = RD / RCPD
+!!      ZCONS1A = RCPD / ((RLMLT*RG*YRECLDP%RTAUMEL))
+      ZCONS1A = RCPD / ((RLMLT*RG*YRECLDP%RTAUMEL))
+      ZEPSEC = 1.E-14_JPRB
+      ZRG_R = 1.0_JPRB / RG
+      ZRLDCP = 1.0_JPRB / (RALSDCP - RALVDCP)
+      
+      ! Note: Defined in module/yoecldp.F90
+      ! NCLDQL=1    ! liquid cloud water
+      ! NCLDQI=2    ! ice cloud water
+      ! NCLDQR=3    ! rain water
+      ! NCLDQS=4    ! snow
+      ! NCLDQV=5    ! vapour
+      
+      ! -----------------------------------------------
+      ! Define species phase, 0=vapour, 1=liquid, 2=ice
+      ! -----------------------------------------------
+      IPHASE(NCLDQV) = 0
+      IPHASE(NCLDQL) = 1
+      IPHASE(NCLDQR) = 1
+      IPHASE(NCLDQI) = 2
+      IPHASE(NCLDQS) = 2
+      
+      ! ---------------------------------------------------
+      ! Set up melting/freezing index,
+      ! if an ice category melts/freezes, where does it go?
+      ! ---------------------------------------------------
+      IMELT(NCLDQV) = -99
+      IMELT(NCLDQL) = NCLDQI
+      IMELT(NCLDQR) = NCLDQS
+      IMELT(NCLDQI) = NCLDQR
+      IMELT(NCLDQS) = NCLDQR
+      
+      ! -----------------------------------------------
+      ! INITIALIZATION OF OUTPUT TENDENCIES
+      ! -----------------------------------------------
+!! !$acc loop seq
+      DO JK=1,NLEV
+        TENDENCY_LOC(JL, JK, 1, JBLK) = 0.0_JPRB
+        TENDENCY_LOC(JL, JK, 2, JBLK) = 0.0_JPRB
+        TENDENCY_LOC(JL, JK, 3, JBLK) = 0.0_JPRB
+      END DO
+!! !$acc loop seq
+      DO JM=1,NCLV - 1
+        DO JK=1,NLEV
+          TENDENCY_LOC(JL, JK, 3+JM, JBLK) = 0.0_JPRB
+        END DO
+      END DO
+      
+      !-- These were uninitialized : meaningful only when we compare error differences
+!! !$acc loop seq
+      DO JK=1,NLEV
+        PCOVPTOT(JL, JK, JBLK) = 0.0_JPRB
+        TENDENCY_LOC(JL, JK, 3+NCLV, JBLK) = 0.0_JPRB
+      END DO
+      
+      ! -------------------------
+      ! set up fall speeds in m/s
+      ! -------------------------
+      ZVQX(NCLDQV) = 0.0_JPRB
+      ZVQX(NCLDQL) = 0.0_JPRB
+!!      ZVQX(NCLDQI) = YRECLDP%RVICE
+      ZVQX(NCLDQI) = YRECLDP%RVICE
+!!      ZVQX(NCLDQR) = YRECLDP%RVRAIN
+      ZVQX(NCLDQR) = YRECLDP%RVRAIN
+!!      ZVQX(NCLDQS) = YRECLDP%RVSNOW
+      ZVQX(NCLDQS) = YRECLDP%RVSNOW
+      LLFALL(:) = .false.
+!! !$acc loop seq
+      DO JM=1,NCLV
+        IF (ZVQX(JM) > 0.0_JPRB)         LLFALL(JM) = .true.
+        ! falling species
+      END DO
+      ! Set LLFALL to false for ice (but ice still sediments!)
+      ! Need to rationalise this at some point
+      LLFALL(NCLDQI) = .false.
+      
+      
+      !######################################################################
+      !             1.  *** INITIAL VALUES FOR VARIABLES ***
+      !######################################################################
+      
+      
+      ! ----------------------
+      ! non CLV initialization
+      ! ----------------------
+!! !$acc loop seq
+      DO JK=1,NLEV
+        ZTP1(JK) = PT(JL, JK, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 1, JBLK)
+        ZQX(JK, NCLDQV) = PQ(JL, JK, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 3, JBLK)
+        ZQX0(JK, NCLDQV) = PQ(JL, JK, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 3, JBLK)
+        ZA(JK) = PA(JL, JK, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 2, JBLK)
+        ZAORIG(JK) = PA(JL, JK, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 2, JBLK)
+      END DO
+      
+      ! -------------------------------------
+      ! initialization for CLV family
+      ! -------------------------------------
+!! !$acc loop seq
+      DO JM=1,NCLV - 1
+        DO JK=1,NLEV
+          ZQX(JK, JM) = PCLV(JL, JK, JM, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 3+JM, JBLK)
+          ZQX0(JK, JM) = PCLV(JL, JK, JM, JBLK) + PTSPHY*TENDENCY_TMP(JL, JK, 3+JM, JBLK)
+        END DO
+      END DO
+      
+      !-------------
+      ! zero arrays
+      !-------------
+!! !$acc loop seq
+      DO JM=1,NCLV
+        DO JK=1,NLEV + 1
+          ZPFPLSX(JK,JM) = 0.0_JPRB            ! precip fluxes
+        END DO
+      END DO
+      
+!! !$acc loop seq
+      DO JM=1,NCLV
+        DO JK=1,NLEV
+          ZQXN2D(JK, JM) = 0.0_JPRB            ! end of timestep values in 2D
+          ZLNEG(JK, JM) = 0.0_JPRB            ! negative input check
+        END DO
+      END DO
+      
+      PRAINFRAC_TOPRFZ(JL,JBLK) = 0.0_JPRB        ! rain fraction at top of refreezing layer
+      LLRAINLIQ = .true.        ! Assume all raindrops are liquid initially
+      
+      ! ----------------------------------------------------
+      ! Tidy up very small cloud cover or total cloud water
+      ! ----------------------------------------------------
+!! !$acc loop seq
+      DO JK=1,NLEV
+!!        IF (ZQX(JK, NCLDQL) + ZQX(JK, NCLDQI) < YRECLDP%RLMIN .or. ZA(JK) < YRECLDP%RAMIN) THEN
+        IF (ZQX(JK, NCLDQL) + ZQX(JK, NCLDQI) < YRECLDP%RLMIN .or. ZA(JK) < YRECLDP%RAMIN) THEN
+          
+          ! Evaporate small cloud liquid water amounts
+          ZLNEG(JK, NCLDQL) = ZLNEG(JK, NCLDQL) + ZQX(JK, NCLDQL)
+          ZQADJ = ZQX(JK, NCLDQL)*ZQTMST
+          TENDENCY_LOC(JL, JK, 3, JBLK) = TENDENCY_LOC(JL, JK, 3, JBLK) + ZQADJ
+          TENDENCY_LOC(JL, JK, 1, JBLK) = TENDENCY_LOC(JL, JK, 1, JBLK) - RALVDCP*ZQADJ
+          ZQX(JK, NCLDQV) = ZQX(JK, NCLDQV) + ZQX(JK, NCLDQL)
+          ZQX(JK, NCLDQL) = 0.0_JPRB
+          
+          ! Evaporate small cloud ice water amounts
+          ZLNEG(JK, NCLDQI) = ZLNEG(JK, NCLDQI) + ZQX(JK, NCLDQI)
+          ZQADJ = ZQX(JK, NCLDQI)*ZQTMST
+          TENDENCY_LOC(JL, JK, 3, JBLK) = TENDENCY_LOC(JL, JK, 3, JBLK) + ZQADJ
+          TENDENCY_LOC(JL, JK, 1, JBLK) = TENDENCY_LOC(JL, JK, 1, JBLK) - RALSDCP*ZQADJ
+          ZQX(JK, NCLDQV) = ZQX(JK, NCLDQV) + ZQX(JK, NCLDQI)
+          ZQX(JK, NCLDQI) = 0.0_JPRB
+          
+          ! Set cloud cover to zero
+          ZA(JK) = 0.0_JPRB
+          
+        END IF
+      END DO
+      
+      ! ---------------------------------
+      ! Tidy up small CLV variables
+      ! ---------------------------------
+      !DIR$ IVDEP
+!! !$acc loop seq
+      DO JM=1,NCLV - 1
+        !DIR$ IVDEP
+        DO JK=1,NLEV
+          !DIR$ IVDEP
+!!          IF (ZQX(JK, JM) < YRECLDP%RLMIN) THEN
+          IF (ZQX(JK, JM) < YRECLDP%RLMIN) THEN
+            ZLNEG(JK, JM) = ZLNEG(JK, JM) + ZQX(JK, JM)
+            ZQADJ = ZQX(JK, JM)*ZQTMST
+            TENDENCY_LOC(JL, JK, 3, JBLK) = TENDENCY_LOC(JL, JK, 3, JBLK) + ZQADJ
+            IF (IPHASE(JM) == 1)             TENDENCY_LOC(JL, JK, 1, JBLK) = TENDENCY_LOC(JL, JK, 1, JBLK) - RALVDCP*ZQADJ
+            IF (IPHASE(JM) == 2)             TENDENCY_LOC(JL, JK, 1, JBLK) = TENDENCY_LOC(JL, JK, 1, JBLK) - RALSDCP*ZQADJ
+            ZQX(JK, NCLDQV) = ZQX(JK, NCLDQV) + ZQX(JK, JM)
+            ZQX(JK, JM) = 0.0_JPRB
+          END IF
+        END DO
+      END DO
+      
+      
+      ! ------------------------------
+      ! Define saturation values
+      ! ------------------------------
+!! !$acc loop seq
+      DO JK=1,NLEV
+        !----------------------------------------
+        ! old *diagnostic* mixed phase saturation
+        !----------------------------------------
+        ZFOEALFA(JK) = FOEALFA(ZTP1(JK))
+        ZFOEEWMT(JK) = MIN(FOEEWM(ZTP1(JK)) / PAP(JL, JK, JBLK), 0.5_JPRB)
+        ZQSMIX(JK) = ZFOEEWMT(JK)
+        ZQSMIX(JK) = ZQSMIX(JK) / (1.0_JPRB - RETV*ZQSMIX(JK))
+        
+        !---------------------------------------------
+        ! ice saturation T<273K
+        ! liquid water saturation for T>273K
+        !---------------------------------------------
+        ZALFA = FOEDELTA(ZTP1(JK))
+        ZFOEEW(JK) = MIN((ZALFA*FOEELIQ(ZTP1(JK)) + (1.0_JPRB - ZALFA)*FOEEICE(ZTP1(JK))) / PAP(JL, JK, JBLK), 0.5_JPRB)
+        ZFOEEW(JK) = MIN(0.5_JPRB, ZFOEEW(JK))
+        ZQSICE(JK) = ZFOEEW(JK) / (1.0_JPRB - RETV*ZFOEEW(JK))
+        
+        !----------------------------------
+        ! liquid water saturation
+        !----------------------------------
+        ZFOEELIQT(JK) = MIN(FOEELIQ(ZTP1(JK)) / PAP(JL, JK, JBLK), 0.5_JPRB)
+        ZQSLIQ(JK) = ZFOEELIQT(JK)
+        ZQSLIQ(JK) = ZQSLIQ(JK) / (1.0_JPRB - RETV*ZQSLIQ(JK))
+        
+        !   !----------------------------------
+        !   ! ice water saturation
+        !   !----------------------------------
+        !   ZFOEEICET(JL,JK, JBLK)=MIN(FOEEICE(ZTP1(JK))/PAP(JL,JK, JBLK),0.5_JPRB)
+        !   ZQSICE(JL,JK, JBLK)=ZFOEEICET(JL,JK, JBLK)
+        !   ZQSICE(JL,JK, JBLK)=ZQSICE(JL,JK, JBLK)/(1.0_JPRB-RETV*ZQSICE(JL,JK, JBLK))
+        
+      END DO
+      
+!! !$acc loop seq
+      DO JK=1,NLEV
+        
+        
+        !------------------------------------------
+        ! Ensure cloud fraction is between 0 and 1
+        !------------------------------------------
+        ZA(JK) = MAX(0.0_JPRB, MIN(1.0_JPRB, ZA(JK)))
+        
+        !-------------------------------------------------------------------
+        ! Calculate liq/ice fractions (no longer a diagnostic relationship)
+        !-------------------------------------------------------------------
+        ZLI(JK) = ZQX(JK, NCLDQL) + ZQX(JK, NCLDQI)
+!!        IF (ZLI(JK) > YRECLDP%RLMIN) THEN
+        IF (ZLI(JK) > YRECLDP%RLMIN) THEN
+          ZLIQFRAC(JK) = ZQX(JK, NCLDQL) / ZLI(JK)
+          ZICEFRAC(JK) = 1.0_JPRB - ZLIQFRAC(JK)
+        ELSE
+          ZLIQFRAC(JK) = 0.0_JPRB
+          ZICEFRAC(JK) = 0.0_JPRB
+        END IF
+        
+      END DO
+      
+      !######################################################################
+      !        2.       *** CONSTANTS AND PARAMETERS ***
+      !######################################################################
+      !  Calculate L in updrafts of bl-clouds
+      !  Specify QS, P/PS for tropopause (for c2)
+      !  And initialize variables
+      !------------------------------------------
+      
+      !---------------------------------
+      ! Find tropopause level (ZTRPAUS)
+      !---------------------------------
+      ZTRPAUS = 0.1_JPRB
+      ZPAPHD = 1.0_JPRB / PAPH(JL, NLEV + 1, JBLK)
+!! !$acc loop seq
+      DO JK=1,NLEV - 1
+        ZSIG = PAP(JL, JK, JBLK)*ZPAPHD
+        IF (ZSIG > 0.1_JPRB .and. ZSIG < 0.4_JPRB .and. ZTP1(JK) > ZTP1(JK + 1)) THEN
+          ZTRPAUS = ZSIG
+        END IF
+      END DO
+      
+      !-----------------------------
+      ! Reset single level variables
+      !-----------------------------
+      
+      ZANEWM1 = 0.0_JPRB
+      ZDA = 0.0_JPRB
+      ZCOVPCLR = 0.0_JPRB
+      ZCOVPMAX = 0.0_JPRB
+      ZCOVPTOT = 0.0_JPRB
+      ZCLDTOPDIST = 0.0_JPRB
+      
+      !######################################################################
+      !           3.       *** PHYSICS ***
+      !######################################################################
+      
+      
+      !----------------------------------------------------------------------
+      !                       START OF VERTICAL LOOP
+      !----------------------------------------------------------------------
+      
+!! !$acc loop seq
+!!      DO JK=YRECLDP%NCLDTOP,NLEV
+      DO JK=YRECLDP%NCLDTOP,NLEV
+        
+        !----------------------------------------------------------------------
+        ! 3.0 INITIALIZE VARIABLES
+        !----------------------------------------------------------------------
+        
+        !---------------------------------
+        ! First guess microphysics
+        !---------------------------------
+        DO JM=1,NCLV
+          ZQXFG(JM) = ZQX(JK, JM)
+        END DO
+        
+        !---------------------------------
+        ! Set KLON arrays to zero
+        !---------------------------------
+        
+        ZLICLD = 0.0_JPRB
+        ZRAINAUT = 0.0_JPRB          ! currently needed for diags
+        ZRAINACC = 0.0_JPRB          ! currently needed for diags
+        ZSNOWAUT = 0.0_JPRB          ! needed
+        ZLDEFR = 0.0_JPRB
+        ZACUST = 0.0_JPRB          ! set later when needed
+        ZQPRETOT = 0.0_JPRB
+        ZLFINALSUM = 0.0_JPRB
+        
+        ! Required for first guess call
+        ZLCOND1 = 0.0_JPRB
+        ZLCOND2 = 0.0_JPRB
+        ZSUPSAT = 0.0_JPRB
+        ZLEVAPL = 0.0_JPRB
+        ZLEVAPI = 0.0_JPRB
+        
+        !-------------------------------------
+        ! solvers for cloud fraction
+        !-------------------------------------
+        ZSOLAB = 0.0_JPRB
+        ZSOLAC = 0.0_JPRB
+        
+        ZICETOT = 0.0_JPRB
+        
+        !------------------------------------------
+        ! reset matrix so missing pathways are set
+        !------------------------------------------
+        DO JM=1,NCLV
+          DO JN=1,NCLV
+            ZSOLQB(JN, JM) = 0.0_JPRB
+            ZSOLQA(JN, JM) = 0.0_JPRB
+          END DO
+        END DO
+        
+        !----------------------------------
+        ! reset new microphysics variables
+        !----------------------------------
+        DO JM=1,NCLV
+          ZFALLSRCE(JM) = 0.0_JPRB
+          ZFALLSINK(JM) = 0.0_JPRB
+          ZCONVSRCE(JM) = 0.0_JPRB
+          ZCONVSINK(JM) = 0.0_JPRB
+          ZPSUPSATSRCE(JM) = 0.0_JPRB
+          ZRATIO(JM) = 0.0_JPRB
+        END DO
+        
+        
+        !-------------------------
+        ! derived variables needed
+        !-------------------------
+        
+        ZDP = PAPH(JL, JK + 1, JBLK) - PAPH(JL, JK, JBLK)          ! dp
+        ZGDP = RG / ZDP          ! g/dp
+        ZRHO = PAP(JL, JK, JBLK) / ((RD*ZTP1(JK)))          ! p/RT air density
+        
+        ZDTGDP = PTSPHY*ZGDP          ! dt g/dp
+        ZRDTGDP = ZDP*(1.0_JPRB / ((PTSPHY*RG)))          ! 1/(dt g/dp)
+        
+        IF (JK > 1)         ZDTGDPF = (PTSPHY*RG) / (PAP(JL, JK, JBLK) - PAP(JL, JK - 1, JBLK))
+        
+        !------------------------------------
+        ! Calculate dqs/dT correction factor
+        !------------------------------------
+        ! Reminder: RETV=RV/RD-1
+        
+        ! liquid
+        ZFACW = R5LES / ((ZTP1(JK) - R4LES)**2)
+        ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEELIQT(JK))
+        ZDQSLIQDT = ZFACW*ZCOR*ZQSLIQ(JK)
+        ZCORQSLIQ = 1.0_JPRB + RALVDCP*ZDQSLIQDT
+        
+        ! ice
+        ZFACI = R5IES / ((ZTP1(JK) - R4IES)**2)
+        ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEEW(JK))
+        ZDQSICEDT = ZFACI*ZCOR*ZQSICE(JK)
+        ZCORQSICE = 1.0_JPRB + RALSDCP*ZDQSICEDT
+        
+        ! diagnostic mixed
+        ZALFAW = ZFOEALFA(JK)
+        ZALFAWM = ZALFAW
+        ZFAC = ZALFAW*ZFACW + (1.0_JPRB - ZALFAW)*ZFACI
+        ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEEWMT(JK))
+        ZDQSMIXDT = ZFAC*ZCOR*ZQSMIX(JK)
+        ZCORQSMIX = 1.0_JPRB + FOELDCPM(ZTP1(JK))*ZDQSMIXDT
+        
+        ! evaporation/sublimation limits
+        ZEVAPLIMMIX = MAX((ZQSMIX(JK) - ZQX(JK, NCLDQV)) / ZCORQSMIX, 0.0_JPRB)
+        ZEVAPLIMLIQ = MAX((ZQSLIQ(JK) - ZQX(JK, NCLDQV)) / ZCORQSLIQ, 0.0_JPRB)
+        ZEVAPLIMICE = MAX((ZQSICE(JK) - ZQX(JK, NCLDQV)) / ZCORQSICE, 0.0_JPRB)
+        
+        !--------------------------------
+        ! in-cloud consensate amount
+        !--------------------------------
+        ZTMPA = 1.0_JPRB / MAX(ZA(JK), ZEPSEC)
+        ZLIQCLD = ZQX(JK, NCLDQL)*ZTMPA
+        ZICECLD = ZQX(JK, NCLDQI)*ZTMPA
+        ZLICLD = ZLIQCLD + ZICECLD
+        
+        
+        !------------------------------------------------
+        ! Evaporate very small amounts of liquid and ice
+        !------------------------------------------------
+        
+!!        IF (ZQX(JK, NCLDQL) < YRECLDP%RLMIN) THEN
+        IF (ZQX(JK, NCLDQL) < YRECLDP%RLMIN) THEN
+          ZSOLQA(NCLDQV, NCLDQL) = ZQX(JK, NCLDQL)
+          ZSOLQA(NCLDQL, NCLDQV) = -ZQX(JK, NCLDQL)
+        END IF
+        
+!!        IF (ZQX(JK, NCLDQI) < YRECLDP%RLMIN) THEN
+        IF (ZQX(JK, NCLDQI) < YRECLDP%RLMIN) THEN
+          ZSOLQA(NCLDQV, NCLDQI) = ZQX(JK, NCLDQI)
+          ZSOLQA(NCLDQI, NCLDQV) = -ZQX(JK, NCLDQI)
+        END IF
+        
+        
+        !---------------------------------------------------------------------
+        !  3.1  ICE SUPERSATURATION ADJUSTMENT
+        !---------------------------------------------------------------------
+        ! Note that the supersaturation adjustment is made with respect to
+        ! liquid saturation:  when T>0C
+        ! ice saturation:     when T<0C
+        !                     with an adjustment made to allow for ice
+        !                     supersaturation in the clear sky
+        ! Note also that the KOOP factor automatically clips the supersaturation
+        ! to a maximum set by the liquid water saturation mixing ratio
+        ! important for temperatures near to but below 0C
+        !-----------------------------------------------------------------------
+        
+        !DIR$ NOFUSION
+        
+        !-----------------------------------
+        ! 3.1.1 Supersaturation limit (from Koop)
+        !-----------------------------------
+        ! Needs to be set for all temperatures
+        ZFOKOOP = FOKOOP(ZTP1(JK))
+        
+!!        IF (ZTP1(JK) >= RTT .or. YRECLDP%NSSOPT == 0) THEN
+        IF (ZTP1(JK) >= RTT .or. YRECLDP%NSSOPT == 0) THEN
+          ZFAC = 1.0_JPRB
+          ZFACI = 1.0_JPRB
+        ELSE
+          ZFAC = ZA(JK) + ZFOKOOP*(1.0_JPRB - ZA(JK))
+!!          ZFACI = PTSPHY / YRECLDP%RKOOPTAU
+          ZFACI = PTSPHY / YRECLDP%RKOOPTAU
+        END IF
+        
+        !-------------------------------------------------------------------
+        ! 3.1.2 Calculate supersaturation wrt Koop including dqs/dT
+        !       correction factor
+        ! [#Note: QSICE or QSLIQ]
+        !-------------------------------------------------------------------
+        
+        ! Calculate supersaturation to add to cloud
+!!        IF (ZA(JK) > 1.0_JPRB - YRECLDP%RAMIN) THEN
+        IF (ZA(JK) > 1.0_JPRB - YRECLDP%RAMIN) THEN
+          ZSUPSAT = MAX((ZQX(JK, NCLDQV) - ZFAC*ZQSICE(JK)) / ZCORQSICE, 0.0_JPRB)
+        ELSE
+          ! Calculate environmental humidity supersaturation
+          ZQP1ENV = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSICE(JK)) / MAX(1.0_JPRB - ZA(JK), ZEPSILON)
+          !& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK, JBLK)),ZEPSILON),1.0_JPRB-ZA(JL,JK, JBLK))
+          ZSUPSAT = MAX(((1.0_JPRB - ZA(JK))*(ZQP1ENV - ZFAC*ZQSICE(JK))) / ZCORQSICE, 0.0_JPRB)
+        END IF
+        
+        !-------------------------------------------------------------------
+        ! Here the supersaturation is turned into liquid water
+        ! However, if the temperature is below the threshold for homogeneous
+        ! freezing then the supersaturation is turned instantly to ice.
+        !--------------------------------------------------------------------
+        
+        IF (ZSUPSAT > ZEPSEC) THEN
+          
+!!          IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+          IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+            ! Turn supersaturation into liquid water
+            ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZSUPSAT
+            ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZSUPSAT
+            ! Include liquid in first guess
+            ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZSUPSAT
+          ELSE
+            ! Turn supersaturation into ice water
+            ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZSUPSAT
+            ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZSUPSAT
+            ! Add ice to first guess for deposition term
+            ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZSUPSAT
+          END IF
+          
+          ! Increase cloud amount using RKOOPTAU timescale
+          ZSOLAC = (1.0_JPRB - ZA(JK))*ZFACI
+          
+        END IF
+        
+        !-------------------------------------------------------
+        ! 3.1.3 Include supersaturation from previous timestep
+        ! (Calculated in sltENDIF semi-lagrangian LDSLPHY=T)
+        !-------------------------------------------------------
+        IF (PSUPSAT(JL, JK, JBLK) > ZEPSEC) THEN
+!!          IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+          IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+            ! Turn supersaturation into liquid water
+            ZSOLQA(NCLDQL, NCLDQL) = ZSOLQA(NCLDQL, NCLDQL) + PSUPSAT(JL, JK, JBLK)
+            ZPSUPSATSRCE(NCLDQL) = PSUPSAT(JL, JK, JBLK)
+            ! Add liquid to first guess for deposition term
+            ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + PSUPSAT(JL, JK, JBLK)
+            ! Store cloud budget diagnostics if required
+          ELSE
+            ! Turn supersaturation into ice water
+            ZSOLQA(NCLDQI, NCLDQI) = ZSOLQA(NCLDQI, NCLDQI) + PSUPSAT(JL, JK, JBLK)
+            ZPSUPSATSRCE(NCLDQI) = PSUPSAT(JL, JK, JBLK)
+            ! Add ice to first guess for deposition term
+            ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + PSUPSAT(JL, JK, JBLK)
+            ! Store cloud budget diagnostics if required
+          END IF
+          
+          ! Increase cloud amount using RKOOPTAU timescale
+          ZSOLAC = (1.0_JPRB - ZA(JK))*ZFACI
+          ! Store cloud budget diagnostics if required
+        END IF
+        
+        ! on JL
+        
+        !---------------------------------------------------------------------
+        !  3.2  DETRAINMENT FROM CONVECTION
+        !---------------------------------------------------------------------
+        ! * Diagnostic T-ice/liq split retained for convection
+        !    Note: This link is now flexible and a future convection
+        !    scheme can detrain explicit seperate budgets of:
+        !    cloud water, ice, rain and snow
+        ! * There is no (1-ZA) multiplier term on the cloud detrainment
+        !    term, since is now written in mass-flux terms
+        ! [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA]
+        !---------------------------------------------------------------------
+!!        IF (JK < NLEV .and. JK >= YRECLDP%NCLDTOP) THEN
+        IF (JK < NLEV .and. JK >= YRECLDP%NCLDTOP) THEN
+          
+          
+          PLUDE(JL, JK, JBLK) = PLUDE(JL, JK, JBLK)*ZDTGDP
+          
+!!          IF (LDCUM(JL, JBLK) .and. PLUDE(JL, JK, JBLK) > YRECLDP%RLMIN .and. PLU(JL, JK + 1, JBLK) > ZEPSEC) THEN
+          IF (LDCUM(JL, JBLK) .and. PLUDE(JL, JK, JBLK) > YRECLDP%RLMIN .and. PLU(JL, JK + 1, JBLK) > ZEPSEC) THEN
+            
+            ZSOLAC = ZSOLAC + PLUDE(JL, JK, JBLK) / PLU(JL, JK + 1, JBLK)
+            ! *diagnostic temperature split*
+            ZALFAW = ZFOEALFA(JK)
+            ZCONVSRCE(NCLDQL) = ZALFAW*PLUDE(JL, JK, JBLK)
+            ZCONVSRCE(NCLDQI) = (1.0_JPRB - ZALFAW)*PLUDE(JL, JK, JBLK)
+            ZSOLQA(NCLDQL, NCLDQL) = ZSOLQA(NCLDQL, NCLDQL) + ZCONVSRCE(NCLDQL)
+            ZSOLQA(NCLDQI, NCLDQI) = ZSOLQA(NCLDQI, NCLDQI) + ZCONVSRCE(NCLDQI)
+            
+          ELSE
+            
+            PLUDE(JL, JK, JBLK) = 0.0_JPRB
+            
+          END IF
+          ! *convective snow detrainment source
+          IF (LDCUM(JL, JBLK))           ZSOLQA(NCLDQS, NCLDQS) = ZSOLQA(NCLDQS, NCLDQS) + PSNDE(JL, JK, JBLK)*ZDTGDP
+          
+          
+        END IF
+        ! JK<NLEV
+        
+        !---------------------------------------------------------------------
+        !  3.3  SUBSIDENCE COMPENSATING CONVECTIVE UPDRAUGHTS
+        !---------------------------------------------------------------------
+        ! Three terms:
+        ! * Convective subsidence source of cloud from layer above
+        ! * Evaporation of cloud within the layer
+        ! * Subsidence sink of cloud to the layer below (Implicit solution)
+        !---------------------------------------------------------------------
+        
+        !-----------------------------------------------
+        ! Subsidence source from layer above
+        !               and
+        ! Evaporation of cloud within the layer
+        !-----------------------------------------------
+!!        IF (JK > YRECLDP%NCLDTOP) THEN
+        IF (JK > YRECLDP%NCLDTOP) THEN
+          
+          ZMF = MAX(0.0_JPRB, (PMFU(JL, JK, JBLK) + PMFD(JL, JK, JBLK))*ZDTGDP)
+          ZACUST = ZMF*ZANEWM1
+          
+          DO JM=1,NCLV
+            IF (.not.LLFALL(JM) .and. IPHASE(JM) > 0) THEN
+              ZLCUST(JM) = ZMF*ZQXNM1(JM)
+              ! record total flux for enthalpy budget:
+              ZCONVSRCE(JM) = ZCONVSRCE(JM) + ZLCUST(JM)
+            END IF
+          END DO
+          
+          ! Now have to work out how much liquid evaporates at arrival point
+          ! since there is no prognostic memory for in-cloud humidity, i.e.
+          ! we always assume cloud is saturated.
+          
+          ZDTDP = (ZRDCP*0.5_JPRB*(ZTP1(JK - 1) + ZTP1(JK))) / PAPH(JL, JK, JBLK)
+          ZDTFORC = ZDTDP*(PAP(JL, JK, JBLK) - PAP(JL, JK - 1, JBLK))
+          ![#Note: Diagnostic mixed phase should be replaced below]
+          ZDQS = ZANEWM1*ZDTFORC*ZDQSMIXDT
+          
+          DO JM=1,NCLV
+            IF (.not.LLFALL(JM) .and. IPHASE(JM) > 0) THEN
+              ZLFINAL = MAX(0.0_JPRB, ZLCUST(JM) - ZDQS)                !lim to zero
+              ! no supersaturation allowed incloud ---V
+              ZEVAP = MIN((ZLCUST(JM) - ZLFINAL), ZEVAPLIMMIX)
+              !          ZEVAP=0.0_JPRB
+              ZLFINAL = ZLCUST(JM) - ZEVAP
+              ZLFINALSUM = ZLFINALSUM + ZLFINAL                ! sum
+              
+              ZSOLQA(JM, JM) = ZSOLQA(JM, JM) + ZLCUST(JM)                ! whole sum
+              ZSOLQA(NCLDQV, JM) = ZSOLQA(NCLDQV, JM) + ZEVAP
+              ZSOLQA(JM, NCLDQV) = ZSOLQA(JM, NCLDQV) - ZEVAP
+            END IF
+          END DO
+          
+          !  Reset the cloud contribution if no cloud water survives to this level:
+          IF (ZLFINALSUM < ZEPSEC)           ZACUST = 0.0_JPRB
+          ZSOLAC = ZSOLAC + ZACUST
+          
+        END IF
+        ! on  JK>NCLDTOP
+        
+        !---------------------------------------------------------------------
+        ! Subsidence sink of cloud to the layer below
+        ! (Implicit - re. CFL limit on convective mass flux)
+        !---------------------------------------------------------------------
+        
+        
+        IF (JK < NLEV) THEN
+          
+          ZMFDN = MAX(0.0_JPRB, (PMFU(JL, JK + 1, JBLK) + PMFD(JL, JK + 1, JBLK))*ZDTGDP)
+          
+          ZSOLAB = ZSOLAB + ZMFDN
+          ZSOLQB(NCLDQL, NCLDQL) = ZSOLQB(NCLDQL, NCLDQL) + ZMFDN
+          ZSOLQB(NCLDQI, NCLDQI) = ZSOLQB(NCLDQI, NCLDQI) + ZMFDN
+          
+          ! Record sink for cloud budget and enthalpy budget diagnostics
+          ZCONVSINK(NCLDQL) = ZMFDN
+          ZCONVSINK(NCLDQI) = ZMFDN
+          
+        END IF
+        
+        
+        !----------------------------------------------------------------------
+        ! 3.4  EROSION OF CLOUDS BY TURBULENT MIXING
+        !----------------------------------------------------------------------
+        ! NOTE: In default tiedtke scheme this process decreases the cloud
+        !       area but leaves the specific cloud water content
+        !       within clouds unchanged
+        !----------------------------------------------------------------------
+        
+        ! ------------------------------
+        ! Define turbulent erosion rate
+        ! ------------------------------
+!!        ZLDIFDT = YRECLDP%RCLDIFF*PTSPHY          !original version
+        ZLDIFDT = YRECLDP%RCLDIFF*PTSPHY          !original version
+        !Increase by factor of 5 for convective points
+!!        IF (KTYPE(JL,JBLK) > 0 .and. PLUDE(JL, JK, JBLK) > ZEPSEC)         ZLDIFDT = YRECLDP%RCLDIFF_CONVI*ZLDIFDT
+        IF (KTYPE(JL,JBLK) > 0 .and. PLUDE(JL, JK, JBLK) > ZEPSEC)         ZLDIFDT = YRECLDP%RCLDIFF_CONVI*ZLDIFDT
+        
+        ! At the moment, works on mixed RH profile and partitioned ice/liq fraction
+        ! so that it is similar to previous scheme
+        ! Should apply RHw for liquid cloud and RHi for ice cloud separately
+        IF (ZLI(JK) > ZEPSEC) THEN
+          ! Calculate environmental humidity
+          !      ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK, JBLK)*ZQSMIX(JL,JK, JBLK))/&
+          !    &      MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK, JBLK))
+          !      ZE=ZLDIFDT(JK)*MAX(ZQSMIX(JL,JK, JBLK)-ZQE,0.0_JPRB)
+          ZE = ZLDIFDT*MAX(ZQSMIX(JK) - ZQX(JK, NCLDQV), 0.0_JPRB)
+          ZLEROS = ZA(JK)*ZE
+          ZLEROS = MIN(ZLEROS, ZEVAPLIMMIX)
+          ZLEROS = MIN(ZLEROS, ZLI(JK))
+          ZAEROS = ZLEROS / ZLICLD            !if linear term
+          
+          ! Erosion is -ve LINEAR in L,A
+          ZSOLAC = ZSOLAC - ZAEROS            !linear
+          
+          ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) + ZLIQFRAC(JK)*ZLEROS
+          ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) - ZLIQFRAC(JK)*ZLEROS
+          ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) + ZICEFRAC(JK)*ZLEROS
+          ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) - ZICEFRAC(JK)*ZLEROS
+          
+        END IF
+        
+        !----------------------------------------------------------------------
+        ! 3.4  CONDENSATION/EVAPORATION DUE TO DQSAT/DT
+        !----------------------------------------------------------------------
+        !  calculate dqs/dt
+        !  Note: For the separate prognostic Qi and Ql, one would ideally use
+        !  Qsat/DT wrt liquid/Koop here, since the physics is that new clouds
+        !  forms by liquid droplets [liq] or when aqueous aerosols [Koop] form.
+        !  These would then instantaneous freeze if T<-38C or lead to ice growth
+        !  by deposition in warmer mixed phase clouds.  However, since we do
+        !  not have a separate prognostic equation for in-cloud humidity or a
+        !  statistical scheme approach in place, the depositional growth of ice
+        !  in the mixed phase can not be modelled and we resort to supersaturation
+        !  wrt ice instanteously converting to ice over one timestep
+        !  (see Tompkins et al. QJRMS 2007 for details)
+        !  Thus for the initial implementation the diagnostic mixed phase is
+        !  retained for the moment, and the level of approximation noted.
+        !----------------------------------------------------------------------
+        
+        ZDTDP = (ZRDCP*ZTP1(JK)) / PAP(JL, JK, JBLK)
+        ZDPMXDT = ZDP*ZQTMST
+        ZMFDN = 0.0_JPRB
+        IF (JK < NLEV)         ZMFDN = PMFU(JL, JK + 1, JBLK) + PMFD(JL, JK + 1, JBLK)
+        ZWTOT = PVERVEL(JL, JK, JBLK) + 0.5_JPRB*RG*(PMFU(JL, JK, JBLK) + PMFD(JL, JK, JBLK) + ZMFDN)
+        ZWTOT = MIN(ZDPMXDT, MAX(-ZDPMXDT, ZWTOT))
+        ZZZDT = PHRSW(JL, JK, JBLK) + PHRLW(JL, JK, JBLK)
+        ZDTDIAB = MIN(ZDPMXDT*ZDTDP, MAX(-ZDPMXDT*ZDTDP, ZZZDT))*PTSPHY + RALFDCP*ZLDEFR
+        ! Note: ZLDEFR should be set to the difference between the mixed phase functions
+        ! in the convection and cloud scheme, but this is not calculated, so is zero and
+        ! the functions must be the same
+        ZDTFORC = ZDTDP*ZWTOT*PTSPHY + ZDTDIAB
+        ZQOLD = ZQSMIX(JK)
+        ZTOLD = ZTP1(JK)
+        ZTP1(JK) = ZTP1(JK) + ZDTFORC
+        ZTP1(JK) = MAX(ZTP1(JK), 160.0_JPRB)
+        LLFLAG = .true.
+        
+        ! Formerly a call to CUADJTQ(..., ICALL=5)
+        ZQP = 1.0_JPRB / PAP(JL, JK, JBLK)
+        ZQSAT = FOEEWM(ZTP1(JK))*ZQP
+        ZQSAT = MIN(0.5_JPRB, ZQSAT)
+        ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSAT)
+        ZQSAT = ZQSAT*ZCOR
+        ZCOND = (ZQSMIX(JK) - ZQSAT) / (1.0_JPRB + ZQSAT*ZCOR*FOEDEM(ZTP1(JK)))
+        ZTP1(JK) = ZTP1(JK) + FOELDCPM(ZTP1(JK))*ZCOND
+        ZQSMIX(JK) = ZQSMIX(JK) - ZCOND
+        ZQSAT = FOEEWM(ZTP1(JK))*ZQP
+        ZQSAT = MIN(0.5_JPRB, ZQSAT)
+        ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSAT)
+        ZQSAT = ZQSAT*ZCOR
+        ZCOND1 = (ZQSMIX(JK) - ZQSAT) / (1.0_JPRB + ZQSAT*ZCOR*FOEDEM(ZTP1(JK)))
+        ZTP1(JK) = ZTP1(JK) + FOELDCPM(ZTP1(JK))*ZCOND1
+        ZQSMIX(JK) = ZQSMIX(JK) - ZCOND1
+        
+        ZDQS = ZQSMIX(JK) - ZQOLD
+        ZQSMIX(JK) = ZQOLD
+        ZTP1(JK) = ZTOLD
+        
+        !----------------------------------------------------------------------
+        ! 3.4a  ZDQS(JK) > 0:  EVAPORATION OF CLOUDS
+        ! ----------------------------------------------------------------------
+        ! Erosion term is LINEAR in L
+        ! Changed to be uniform distribution in cloud region
+        
+        
+        ! Previous function based on DELTA DISTRIBUTION in cloud:
+        IF (ZDQS > 0.0_JPRB) THEN
+          !    If subsidence evaporation term is turned off, then need to use updated
+          !    liquid and cloud here?
+          !    ZLEVAP = MAX(ZA(JL,JK, JBLK)+ZACUST(JK),1.0_JPRB)*MIN(ZDQS(JK),ZLICLD(JK)+ZLFINALSUM(JK))
+          ZLEVAP = ZA(JK)*MIN(ZDQS, ZLICLD)
+          ZLEVAP = MIN(ZLEVAP, ZEVAPLIMMIX)
+          ZLEVAP = MIN(ZLEVAP, MAX(ZQSMIX(JK) - ZQX(JK, NCLDQV), 0.0_JPRB))
+          
+          ! For first guess call
+          ZLEVAPL = ZLIQFRAC(JK)*ZLEVAP
+          ZLEVAPI = ZICEFRAC(JK)*ZLEVAP
+          
+          ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) + ZLIQFRAC(JK)*ZLEVAP
+          ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) - ZLIQFRAC(JK)*ZLEVAP
+          
+          ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) + ZICEFRAC(JK)*ZLEVAP
+          ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) - ZICEFRAC(JK)*ZLEVAP
+          
+        END IF
+        
+        
+        !----------------------------------------------------------------------
+        ! 3.4b ZDQS(JK) < 0: FORMATION OF CLOUDS
+        !----------------------------------------------------------------------
+        ! (1) Increase of cloud water in existing clouds
+!!        IF (ZA(JK) > ZEPSEC .and. ZDQS <= -YRECLDP%RLMIN) THEN
+        IF (ZA(JK) > ZEPSEC .and. ZDQS <= -YRECLDP%RLMIN) THEN
+          
+          ZLCOND1 = MAX(-ZDQS, 0.0_JPRB)            !new limiter
+          
+          !old limiter (significantly improves upper tropospheric humidity rms)
+          IF (ZA(JK) > 0.99_JPRB) THEN
+            ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSMIX(JK))
+            ZCDMAX = (ZQX(JK, NCLDQV) - ZQSMIX(JK)) / (1.0_JPRB + ZCOR*ZQSMIX(JK)*FOEDEM(ZTP1(JK)))
+          ELSE
+            ZCDMAX = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSMIX(JK)) / ZA(JK)
+          END IF
+          ZLCOND1 = MAX(MIN(ZLCOND1, ZCDMAX), 0.0_JPRB)
+          ! end old limiter
+          
+          ZLCOND1 = ZA(JK)*ZLCOND1
+!!          IF (ZLCOND1 < YRECLDP%RLMIN)           ZLCOND1 = 0.0_JPRB
+          IF (ZLCOND1 < YRECLDP%RLMIN)           ZLCOND1 = 0.0_JPRB
+          
+          !-------------------------------------------------------------------------
+          ! All increase goes into liquid unless so cold cloud homogeneously freezes
+          ! Include new liquid formation in first guess value, otherwise liquid
+          ! remains at cold temperatures until next timestep.
+          !-------------------------------------------------------------------------
+!!          IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+          IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+            ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZLCOND1
+            ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZLCOND1
+            ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZLCOND1
+          ELSE
+            ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZLCOND1
+            ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZLCOND1
+            ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZLCOND1
+          END IF
+        END IF
+        
+        ! (2) Generation of new clouds (da/dt>0)
+        
+        
+!!        IF (ZDQS <= -YRECLDP%RLMIN .and. ZA(JK) < 1.0_JPRB - ZEPSEC) THEN
+        IF (ZDQS <= -YRECLDP%RLMIN .and. ZA(JK) < 1.0_JPRB - ZEPSEC) THEN
+          
+          !---------------------------
+          ! Critical relative humidity
+          !---------------------------
+!!          ZRHC = YRECLDP%RAMID
+          ZRHC = YRECLDP%RAMID
+!!          ZSIGK = PAP(JL, JK, JBLK) / PAPH(JL, NLEV + 1, JBLK)
+          ZSIGK = PAP(JL, JK, JBLK)
+          ZSIGK = ZSIGK / PAPH(JL, NLEV + 1, JBLK)
+          ! Increase RHcrit to 1.0 towards the surface (eta>0.8)
+          IF (ZSIGK > 0.8_JPRB) THEN
+!!            ZRHC = YRECLDP%RAMID + (1.0_JPRB - YRECLDP%RAMID)*((ZSIGK - 0.8_JPRB) / 0.2_JPRB)**2
+            ZRHC = YRECLDP%RAMID + (1.0_JPRB - YRECLDP%RAMID)*((ZSIGK - 0.8_JPRB) / 0.2_JPRB)**2
+          END IF
+          
+          ! Commented out for CY37R1 to reduce humidity in high trop and strat
+          !      ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above
+          !      ZBOTT=ZTRPAUS(JK)+0.2_JPRB
+          !      IF(ZSIGK < ZBOTT) THEN
+          !        ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB)
+          !      ENDIF
+          
+          !---------------------------
+          ! Supersaturation options
+          !---------------------------
+!!          IF (YRECLDP%NSSOPT == 0) THEN
+          IF (YRECLDP%NSSOPT == 0) THEN
+            ! No scheme
+            ZQE = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSICE(JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+            ZQE = MAX(0.0_JPRB, ZQE)
+!!          ELSE IF (YRECLDP%NSSOPT == 1) THEN
+          ELSE IF (YRECLDP%NSSOPT == 1) THEN
+            ! Tompkins
+            ZQE = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSICE(JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+            ZQE = MAX(0.0_JPRB, ZQE)
+!!          ELSE IF (YRECLDP%NSSOPT == 2) THEN
+          ELSE IF (YRECLDP%NSSOPT == 2) THEN
+            ! Lohmann and Karcher
+            ZQE = ZQX(JK, NCLDQV)
+!!          ELSE IF (YRECLDP%NSSOPT == 3) THEN
+          ELSE IF (YRECLDP%NSSOPT == 3) THEN
+            ! Gierens
+            ZQE = ZQX(JK, NCLDQV) + ZLI(JK)
+          END IF
+          
+!!          IF (ZTP1(JK) >= RTT .or. YRECLDP%NSSOPT == 0) THEN
+          IF (ZTP1(JK) >= RTT .or. YRECLDP%NSSOPT == 0) THEN
+            ! No ice supersaturation allowed
+            ZFAC = 1.0_JPRB
+          ELSE
+            ! Ice supersaturation
+            ZFAC = ZFOKOOP
+          END IF
+          
+          IF (ZQE >= ZRHC*ZQSICE(JK)*ZFAC .and. ZQE < ZQSICE(JK)*ZFAC) THEN
+            ! note: not **2 on 1-a term if ZQE is used.
+            ! Added correction term ZFAC to numerator 15/03/2010
+            ZACOND = -((1.0_JPRB - ZA(JK))*ZFAC*ZDQS) / MAX(2.0_JPRB*(ZFAC*ZQSICE(JK) - ZQE), ZEPSEC)
+            
+            ZACOND = MIN(ZACOND, 1.0_JPRB - ZA(JK))              !PUT THE LIMITER BACK
+            
+            ! Linear term:
+            ! Added correction term ZFAC 15/03/2010
+            ZLCOND2 = -ZFAC*ZDQS*0.5_JPRB*ZACOND              !mine linear
+            
+            ! new limiter formulation
+            ZZDL = (2.0_JPRB*(ZFAC*ZQSICE(JK) - ZQE)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+            ! Added correction term ZFAC 15/03/2010
+            IF (ZFAC*ZDQS < -ZZDL) THEN
+              ! ZLCONDLIM=(ZA(JL,JK, JBLK)-1.0_JPRB)*ZDQS(JK)-ZQSICE(JL,JK, JBLK)+ZQX(JL,JK,NCLDQV)
+              ZLCONDLIM = (ZA(JK) - 1.0_JPRB)*ZFAC*ZDQS - ZFAC*ZQSICE(JK) + ZQX(JK, NCLDQV)
+              ZLCOND2 = MIN(ZLCOND2, ZLCONDLIM)
+            END IF
+            ZLCOND2 = MAX(ZLCOND2, 0.0_JPRB)
+            
+!!            IF (ZLCOND2 < YRECLDP%RLMIN .or. (1.0_JPRB - ZA(JK)) < ZEPSEC) THEN
+            IF (ZLCOND2 < YRECLDP%RLMIN .or. (1.0_JPRB - ZA(JK)) < ZEPSEC) THEN
+              ZLCOND2 = 0.0_JPRB
+              ZACOND = 0.0_JPRB
+            END IF
+            IF (ZLCOND2 == 0.0_JPRB)             ZACOND = 0.0_JPRB
+            
+            ! Large-scale generation is LINEAR in A and LINEAR in L
+            ZSOLAC = ZSOLAC + ZACOND              !linear
+            
+            !------------------------------------------------------------------------
+            ! All increase goes into liquid unless so cold cloud homogeneously freezes
+            ! Include new liquid formation in first guess value, otherwise liquid
+            ! remains at cold temperatures until next timestep.
+            !------------------------------------------------------------------------
+!!            IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+            IF (ZTP1(JK) > YRECLDP%RTHOMO) THEN
+              ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZLCOND2
+              ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZLCOND2
+              ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZLCOND2
+            ELSE
+              ! homogeneous freezing
+              ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZLCOND2
+              ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZLCOND2
+              ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZLCOND2
+            END IF
+            
+          END IF
+        END IF
+        
+        !----------------------------------------------------------------------
+        ! 3.7 Growth of ice by vapour deposition
+        !----------------------------------------------------------------------
+        ! Following Rotstayn et al. 2001:
+        ! does not use the ice nuclei number from cloudaer.F90
+        ! but rather a simple Meyers et al. 1992 form based on the
+        ! supersaturation and assuming clouds are saturated with
+        ! respect to liquid water (well mixed), (or Koop adjustment)
+        ! Growth considered as sink of liquid water if present so
+        ! Bergeron-Findeisen adjustment in autoconversion term no longer needed
+        !----------------------------------------------------------------------
+        
+        !--------------------------------------------------------
+        !-
+        !- Ice deposition following Rotstayn et al. (2001)
+        !-  (monodisperse ice particle size distribution)
+        !-
+        !--------------------------------------------------------
+        IF (IDEPICE == 1) THEN
+          
+          
+          !--------------------------------------------------------------
+          ! Calculate distance from cloud top
+          ! defined by cloudy layer below a layer with cloud frac <0.01
+          ! ZDZ = ZDP(JK)/(ZRHO(JK)*RG)
+          !--------------------------------------------------------------
+          
+!!          IF (ZA(JK - 1) < YRECLDP%RCLDTOPCF .and. ZA(JK) >= YRECLDP%RCLDTOPCF) THEN
+          IF (ZA(JK - 1) < YRECLDP%RCLDTOPCF .and. ZA(JK) >= YRECLDP%RCLDTOPCF) THEN
+            ZCLDTOPDIST = 0.0_JPRB
+          ELSE
+            ZCLDTOPDIST = ZCLDTOPDIST + ZDP / ((ZRHO*RG))
+          END IF
+          
+          !--------------------------------------------------------------
+          ! only treat depositional growth if liquid present. due to fact
+          ! that can not model ice growth from vapour without additional
+          ! in-cloud water vapour variable
+          !--------------------------------------------------------------
+!!          IF (ZTP1(JK) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN
+          IF (ZTP1(JK) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN
+            ! T<273K
+            
+            ZVPICE = (FOEEICE(ZTP1(JK))*RV) / RD
+            ZVPLIQ = ZVPICE*ZFOKOOP
+            ZICENUCLEI = 1000.0_JPRB*EXP((12.96_JPRB*(ZVPLIQ - ZVPICE)) / ZVPLIQ - 0.639_JPRB)
+            
+            !------------------------------------------------
+            !   2.4e-2 is conductivity of air
+            !   8.8 = 700**1/3 = density of ice to the third
+            !------------------------------------------------
+            ZADD = (RLSTT*(RLSTT / ((RV*ZTP1(JK))) - 1.0_JPRB)) / ((2.4E-2_JPRB*ZTP1(JK)))
+            ZBDD = (RV*ZTP1(JK)*PAP(JL, JK, JBLK)) / ((2.21_JPRB*ZVPICE))
+            ZCVDS = (7.8_JPRB*(ZICENUCLEI / ZRHO)**0.666_JPRB*(ZVPLIQ - ZVPICE)) / ((8.87_JPRB*(ZADD + ZBDD)*ZVPICE))
+            
+            !-----------------------------------------------------
+            ! RICEINIT=1.E-12_JPRB is initial mass of ice particle
+            !-----------------------------------------------------
+!!            ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO)
+            ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO)
+            
+            !------------------
+            ! new value of ice:
+            !------------------
+            ZINEW = (0.666_JPRB*ZCVDS*PTSPHY + ZICE0**0.666_JPRB)**1.5_JPRB
+            
+            !---------------------------
+            ! grid-mean deposition rate:
+            !---------------------------
+            ZDEPOS = MAX(ZA(JK)*(ZINEW - ZICE0), 0.0_JPRB)
+            
+            !--------------------------------------------------------------------
+            ! Limit deposition to liquid water amount
+            ! If liquid is all frozen, ice would use up reservoir of water
+            ! vapour in excess of ice saturation mixing ratio - However this
+            ! can not be represented without a in-cloud humidity variable. Using
+            ! the grid-mean humidity would imply a large artificial horizontal
+            ! flux from the clear sky to the cloudy area. We thus rely on the
+            ! supersaturation check to clean up any remaining supersaturation
+            !--------------------------------------------------------------------
+            ZDEPOS = MIN(ZDEPOS, ZQXFG(NCLDQL))              ! limit to liquid water amount
+            
+            !--------------------------------------------------------------------
+            ! At top of cloud, reduce deposition rate near cloud top to account for
+            ! small scale turbulent processes, limited ice nucleation and ice fallout
+            !--------------------------------------------------------------------
+            !      ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JK)/RDEPLIQREFDEPTH,1.0_JPRB)
+            ! Change to include dependence on ice nuclei concentration
+            ! to increase deposition rate with decreasing temperatures
+            ZINFACTOR = MIN(ZICENUCLEI / 15000._JPRB, 1.0_JPRB)
+!!            ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST /  &
+            ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST /  &
+!!            & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB)
+            & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB)
+            
+            !--------------
+            ! add to matrix
+            !--------------
+            ZSOLQA(NCLDQI, NCLDQL) = ZSOLQA(NCLDQI, NCLDQL) + ZDEPOS
+            ZSOLQA(NCLDQL, NCLDQI) = ZSOLQA(NCLDQL, NCLDQI) - ZDEPOS
+            ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZDEPOS
+            ZQXFG(NCLDQL) = ZQXFG(NCLDQL) - ZDEPOS
+            
+          END IF
+          
+          !--------------------------------------------------------
+          !-
+          !- Ice deposition assuming ice PSD
+          !-
+          !--------------------------------------------------------
+        ELSE IF (IDEPICE == 2) THEN
+          
+          
+          !--------------------------------------------------------------
+          ! Calculate distance from cloud top
+          ! defined by cloudy layer below a layer with cloud frac <0.01
+          ! ZDZ = ZDP(JK)/(ZRHO(JK)*RG)
+          !--------------------------------------------------------------
+          
+!!          IF (ZA(JK - 1) < YRECLDP%RCLDTOPCF .and. ZA(JK) >= YRECLDP%RCLDTOPCF) THEN
+          IF (ZA(JK - 1) < YRECLDP%RCLDTOPCF .and. ZA(JK) >= YRECLDP%RCLDTOPCF) THEN
+            ZCLDTOPDIST = 0.0_JPRB
+          ELSE
+            ZCLDTOPDIST = ZCLDTOPDIST + ZDP / ((ZRHO*RG))
+          END IF
+          
+          !--------------------------------------------------------------
+          ! only treat depositional growth if liquid present. due to fact
+          ! that can not model ice growth from vapour without additional
+          ! in-cloud water vapour variable
+          !--------------------------------------------------------------
+!!          IF (ZTP1(JK) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN
+          IF (ZTP1(JK) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN
+            ! T<273K
+            
+            ZVPICE = (FOEEICE(ZTP1(JK))*RV) / RD
+            ZVPLIQ = ZVPICE*ZFOKOOP
+            ZICENUCLEI = 1000.0_JPRB*EXP((12.96_JPRB*(ZVPLIQ - ZVPICE)) / ZVPLIQ - 0.639_JPRB)
+            
+            !-----------------------------------------------------
+            ! RICEINIT=1.E-12_JPRB is initial mass of ice particle
+            !-----------------------------------------------------
+!!            ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO)
+            ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO)
+            
+            ! Particle size distribution
+            ZTCG = 1.0_JPRB
+            ZFACX1I = 1.0_JPRB
+            
+            ZAPLUSB =  &
+!!            & YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JK) + PAP(JL, JK, JBLK)*YRECLDP%RCL_APB3*ZTP1(JK)**3._JPRB
+            & YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JK) + PAP(JL, JK, JBLK)*YRECLDP%RCL_APB3*ZTP1(JK)**3._JPRB
+            ZCORRFAC = (1.0_JPRB / ZRHO)**0.5_JPRB
+            ZCORRFAC2 = ((ZTP1(JK) / 273.0_JPRB)**1.5_JPRB)*(393.0_JPRB / (ZTP1(JK) + 120.0_JPRB))
+            
+!!            ZPR02 = (ZRHO*ZICE0*YRECLDP%RCL_CONST1I) / ((ZTCG*ZFACX1I))
+            ZPR02 = (ZRHO*ZICE0*YRECLDP%RCL_CONST1I) / ((ZTCG*ZFACX1I))
+            
+!!            ZTERM1 = ((ZVPLIQ - ZVPICE)*ZTP1(JK)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2I*ZFACX1I) /  &
+            ZTERM1 = ((ZVPLIQ - ZVPICE)*ZTP1(JK)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2I*ZFACX1I) /  &
+            & ((ZRHO*ZAPLUSB*ZVPICE))
+!!            ZTERM2 = 0.65_JPRB*YRECLDP%RCL_CONST6I*ZPR02**YRECLDP%RCL_CONST4I +  &
+            ZTERM2 = 0.65_JPRB*YRECLDP%RCL_CONST6I*ZPR02**YRECLDP%RCL_CONST4I +  &
+!!            & (YRECLDP%RCL_CONST3I*ZCORRFAC**0.5_JPRB*ZRHO**0.5_JPRB*ZPR02**YRECLDP%RCL_CONST5I) / ZCORRFAC2**0.5_JPRB
+            & (YRECLDP%RCL_CONST3I*ZCORRFAC**0.5_JPRB*ZRHO**0.5_JPRB*ZPR02**YRECLDP%RCL_CONST5I) / ZCORRFAC2**0.5_JPRB
+            
+            ZDEPOS = MAX(ZA(JK)*ZTERM1*ZTERM2*PTSPHY, 0.0_JPRB)
+            
+            !--------------------------------------------------------------------
+            ! Limit deposition to liquid water amount
+            ! If liquid is all frozen, ice would use up reservoir of water
+            ! vapour in excess of ice saturation mixing ratio - However this
+            ! can not be represented without a in-cloud humidity variable. Using
+            ! the grid-mean humidity would imply a large artificial horizontal
+            ! flux from the clear sky to the cloudy area. We thus rely on the
+            ! supersaturation check to clean up any remaining supersaturation
+            !--------------------------------------------------------------------
+            ZDEPOS = MIN(ZDEPOS, ZQXFG(NCLDQL))              ! limit to liquid water amount
+            
+            !--------------------------------------------------------------------
+            ! At top of cloud, reduce deposition rate near cloud top to account for
+            ! small scale turbulent processes, limited ice nucleation and ice fallout
+            !--------------------------------------------------------------------
+            ! Change to include dependence on ice nuclei concentration
+            ! to increase deposition rate with decreasing temperatures
+            ZINFACTOR = MIN(ZICENUCLEI / 15000._JPRB, 1.0_JPRB)
+!!            ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST /  &
+            ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST /  &
+!!            & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB)
+            & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB)
+            
+            !--------------
+            ! add to matrix
+            !--------------
+            ZSOLQA(NCLDQI, NCLDQL) = ZSOLQA(NCLDQI, NCLDQL) + ZDEPOS
+            ZSOLQA(NCLDQL, NCLDQI) = ZSOLQA(NCLDQL, NCLDQI) - ZDEPOS
+            ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZDEPOS
+            ZQXFG(NCLDQL) = ZQXFG(NCLDQL) - ZDEPOS
+          END IF
+          
+        END IF
+        ! on IDEPICE
+        
+        !######################################################################
+        !              4  *** PRECIPITATION PROCESSES ***
+        !######################################################################
+        
+        !----------------------------------
+        ! revise in-cloud consensate amount
+        !----------------------------------
+        ZTMPA = 1.0_JPRB / MAX(ZA(JK), ZEPSEC)
+        ZLIQCLD = ZQXFG(NCLDQL)*ZTMPA
+        ZICECLD = ZQXFG(NCLDQI)*ZTMPA
+        ZLICLD = ZLIQCLD + ZICECLD
+        
+        !----------------------------------------------------------------------
+        ! 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES
+        !     now that rain, snow, graupel species are prognostic
+        !     the precipitation flux can be defined directly level by level
+        !     There is no vertical memory required from the flux variable
+        !----------------------------------------------------------------------
+        
+        DO JM=1,NCLV
+          IF (LLFALL(JM) .or. JM == NCLDQI) THEN
+            !------------------------
+            ! source from layer above
+            !------------------------
+!!            IF (JK > YRECLDP%NCLDTOP) THEN
+            IF (JK > YRECLDP%NCLDTOP) THEN
+              ZFALLSRCE(JM) = ZPFPLSX(JK, JM)*ZDTGDP
+              ZSOLQA(JM, JM) = ZSOLQA(JM, JM) + ZFALLSRCE(JM)
+              ZQXFG(JM) = ZQXFG(JM) + ZFALLSRCE(JM)
+              ! use first guess precip----------V
+              ZQPRETOT = ZQPRETOT + ZQXFG(JM)
+            END IF
+            !-------------------------------------------------
+            ! sink to next layer, constant fall speed
+            !-------------------------------------------------
+            ! if aerosol effect then override
+            !  note that for T>233K this is the same as above.
+!!            IF (YRECLDP%LAERICESED .and. JM == NCLDQI) THEN
+            IF (YRECLDP%LAERICESED .and. JM == NCLDQI) THEN
+              ZRE_ICE = PRE_ICE(JL, JK, JBLK)
+              ! The exponent value is from
+              ! Morrison et al. JAS 2005 Appendix
+              ZVQX(NCLDQI) = 0.002_JPRB*ZRE_ICE**1.0_JPRB
+            END IF
+            ZFALL = ZVQX(JM)*ZRHO
+            !-------------------------------------------------
+            ! modified by Heymsfield and Iaquinta JAS 2000
+            !-------------------------------------------------
+            ! ZFALL = ZFALL*((PAP(JL,JK, JBLK)*RICEHI1)**(-0.178_JPRB)) &
+            !            &*((ZTP1(JL,JK, JBLK)*RICEHI2)**(-0.394_JPRB))
+            
+            ZFALLSINK(JM) = ZDTGDP*ZFALL
+            ! Cloud budget diagnostic stored at end as implicit
+            ! jl
+          END IF
+          ! LLFALL
+        END DO
+        ! jm
+        
+        !---------------------------------------------------------------
+        ! Precip cover overlap using MAX-RAN Overlap
+        ! Since precipitation is now prognostic we must
+        !   1) apply an arbitrary minimum coverage (0.3) if precip>0
+        !   2) abandon the 2-flux clr/cld treatment
+        !   3) Thus, since we have no memory of the clear sky precip
+        !      fraction, we mimic the previous method by reducing
+        !      ZCOVPTOT(JK), which has the memory, proportionally with
+        !      the precip evaporation rate, taking cloud fraction
+        !      into account
+        !   #3 above leads to much smoother vertical profiles of
+        !   precipitation fraction than the Klein-Jakob scheme which
+        !   monotonically increases precip fraction and then resets
+        !   it to zero in a step function once clear-sky precip reaches
+        !   zero.
+        !---------------------------------------------------------------
+        IF (ZQPRETOT > ZEPSEC) THEN
+          ZCOVPTOT = 1.0_JPRB - ((1.0_JPRB - ZCOVPTOT)*(1.0_JPRB - MAX(ZA(JK), ZA(JK - 1)))) / (1.0_JPRB - MIN(ZA(JK  &
+          & - 1), 1.0_JPRB - 1.E-06_JPRB))
+!!          ZCOVPTOT = MAX(ZCOVPTOT, YRECLDP%RCOVPMIN)
+          ZCOVPTOT = MAX(ZCOVPTOT, YRECLDP%RCOVPMIN)
+          ZCOVPCLR = MAX(0.0_JPRB, ZCOVPTOT - ZA(JK))            ! clear sky proportion
+          ZRAINCLD = ZQXFG(NCLDQR) / ZCOVPTOT
+          ZSNOWCLD = ZQXFG(NCLDQS) / ZCOVPTOT
+          ZCOVPMAX = MAX(ZCOVPTOT, ZCOVPMAX)
+        ELSE
+          ZRAINCLD = 0.0_JPRB
+          ZSNOWCLD = 0.0_JPRB
+          ZCOVPTOT = 0.0_JPRB            ! no flux - reset cover
+          ZCOVPCLR = 0.0_JPRB            ! reset clear sky proportion
+          ZCOVPMAX = 0.0_JPRB            ! reset max cover for ZZRH calc
+        END IF
+        
+        !----------------------------------------------------------------------
+        ! 4.3a AUTOCONVERSION TO SNOW
+        !----------------------------------------------------------------------
+        
+        IF (ZTP1(JK) <= RTT) THEN
+          !-----------------------------------------------------
+          !     Snow Autoconversion rate follow Lin et al. 1983
+          !-----------------------------------------------------
+          IF (ZICECLD > ZEPSEC) THEN
+            
+!!            ZZCO = PTSPHY*YRECLDP%RSNOWLIN1*EXP(YRECLDP%RSNOWLIN2*(ZTP1(JK) - RTT))
+            ZZCO = PTSPHY*YRECLDP%RSNOWLIN1*EXP(YRECLDP%RSNOWLIN2*(ZTP1(JK) - RTT))
+            
+!!            IF (YRECLDP%LAERICEAUTO) THEN
+            IF (YRECLDP%LAERICEAUTO) THEN
+              ZLCRIT = PICRIT_AER(JL, JK, JBLK)
+              ! 0.3 = N**0.333 with N=0.027
+!!              ZZCO = ZZCO*(YRECLDP%RNICE / PNICE(JL, JK, JBLK))**0.333_JPRB
+              ZZCO = ZZCO*(YRECLDP%RNICE / PNICE(JL, JK, JBLK))**0.333_JPRB
+            ELSE
+!!              ZLCRIT = YRECLDP%RLCRITSNOW
+              ZLCRIT = YRECLDP%RLCRITSNOW
+            END IF
+            
+            ZSNOWAUT = ZZCO*(1.0_JPRB - EXP(-(ZICECLD / ZLCRIT)**2))
+            ZSOLQB(NCLDQS, NCLDQI) = ZSOLQB(NCLDQS, NCLDQI) + ZSNOWAUT
+            
+          END IF
+        END IF
+        
+        !----------------------------------------------------------------------
+        ! 4.3b AUTOCONVERSION WARM CLOUDS
+        !   Collection and accretion will require separate treatment
+        !   but for now we keep this simple treatment
+        !----------------------------------------------------------------------
+        
+        IF (ZLIQCLD > ZEPSEC) THEN
+          
+          !--------------------------------------------------------
+          !-
+          !- Warm-rain process follow Sundqvist (1989)
+          !-
+          !--------------------------------------------------------
+          IF (IWARMRAIN == 1) THEN
+            
+!!            ZZCO = YRECLDP%RKCONV*PTSPHY
+            ZZCO = YRECLDP%RKCONV*PTSPHY
+            
+!!            IF (YRECLDP%LAERLIQAUTOLSP) THEN
+            IF (YRECLDP%LAERLIQAUTOLSP) THEN
+              ZLCRIT = PLCRIT_AER(JL, JK, JBLK)
+              ! 0.3 = N**0.333 with N=125 cm-3
+!!              ZZCO = ZZCO*(YRECLDP%RCCN / PCCN(JL, JK, JBLK))**0.333_JPRB
+              ZZCO = ZZCO*(YRECLDP%RCCN / PCCN(JL, JK, JBLK))**0.333_JPRB
+            ELSE
+              ! Modify autoconversion threshold dependent on:
+              !  land (polluted, high CCN, smaller droplets, higher threshold)
+              !  sea  (clean, low CCN, larger droplets, lower threshold)
+              IF (PLSM(JL,JBLK) > 0.5_JPRB) THEN
+!!                ZLCRIT = YRECLDP%RCLCRIT_LAND                  ! land
+                ZLCRIT = YRECLDP%RCLCRIT_LAND                  ! land
+              ELSE
+!!                ZLCRIT = YRECLDP%RCLCRIT_SEA                  ! ocean
+                ZLCRIT = YRECLDP%RCLCRIT_SEA                  ! ocean
+              END IF
+            END IF
+            
+            !------------------------------------------------------------------
+            ! Parameters for cloud collection by rain and snow.
+            ! Note that with new prognostic variable it is now possible
+            ! to REPLACE this with an explicit collection parametrization
+            !------------------------------------------------------------------
+            ZPRECIP = (ZPFPLSX(JK, NCLDQS) + ZPFPLSX(JK, NCLDQR)) / MAX(ZEPSEC, ZCOVPTOT)
+!!            ZCFPR = 1.0_JPRB + YRECLDP%RPRC1*SQRT(MAX(ZPRECIP, 0.0_JPRB))
+            ZCFPR = 1.0_JPRB + YRECLDP%RPRC1*SQRT(MAX(ZPRECIP, 0.0_JPRB))
+            !      ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*&
+            !       &ZCOVPTOT(JK)/(MAX(ZA(JL,JK, JBLK),ZEPSEC))
+            
+!!            IF (YRECLDP%LAERLIQCOLL) THEN
+            IF (YRECLDP%LAERLIQCOLL) THEN
+              ! 5.0 = N**0.333 with N=125 cm-3
+!!              ZCFPR = ZCFPR*(YRECLDP%RCCN / PCCN(JL, JK, JBLK))**0.333_JPRB
+              ZCFPR = ZCFPR*(YRECLDP%RCCN / PCCN(JL, JK, JBLK))**0.333_JPRB
+            END IF
+            
+            ZZCO = ZZCO*ZCFPR
+            ZLCRIT = ZLCRIT / MAX(ZCFPR, ZEPSEC)
+            
+            IF (ZLIQCLD / ZLCRIT < 20.0_JPRB) THEN
+              ! Security for exp for some compilers
+              ZRAINAUT = ZZCO*(1.0_JPRB - EXP(-(ZLIQCLD / ZLCRIT)**2))
+            ELSE
+              ZRAINAUT = ZZCO
+            END IF
+            
+            ! rain freezes instantly
+            IF (ZTP1(JK) <= RTT) THEN
+              ZSOLQB(NCLDQS, NCLDQL) = ZSOLQB(NCLDQS, NCLDQL) + ZRAINAUT
+            ELSE
+              ZSOLQB(NCLDQR, NCLDQL) = ZSOLQB(NCLDQR, NCLDQL) + ZRAINAUT
+            END IF
+            
+            !--------------------------------------------------------
+            !-
+            !- Warm-rain process follow Khairoutdinov and Kogan (2000)
+            !-
+            !--------------------------------------------------------
+          ELSE IF (IWARMRAIN == 2) THEN
+            
+            IF (PLSM(JL,JBLK) > 0.5_JPRB) THEN
+              ! land
+!!              ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_LAND
+              ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_LAND
+!!              ZLCRIT = YRECLDP%RCLCRIT_LAND
+              ZLCRIT = YRECLDP%RCLCRIT_LAND
+            ELSE
+              ! ocean
+!!              ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_SEA
+              ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_SEA
+!!              ZLCRIT = YRECLDP%RCLCRIT_SEA
+              ZLCRIT = YRECLDP%RCLCRIT_SEA
+            END IF
+            
+            IF (ZLIQCLD > ZLCRIT) THEN
+              
+!!              ZRAINAUT = 1.5_JPRB*ZA(JK)*PTSPHY*YRECLDP%RCL_KKAAU*ZLIQCLD**YRECLDP%RCL_KKBAUQ*ZCONST**YRECLDP%RCL_KKBAUN
+              ZRAINAUT = 1.5_JPRB*ZA(JK)*PTSPHY*YRECLDP%RCL_KKAAU*ZLIQCLD**YRECLDP%RCL_KKBAUQ*ZCONST**YRECLDP%RCL_KKBAUN
+              
+              ZRAINAUT = MIN(ZRAINAUT, ZQXFG(NCLDQL))
+              IF (ZRAINAUT < ZEPSEC)               ZRAINAUT = 0.0_JPRB
+              
+!!              ZRAINACC = 2.0_JPRB*ZA(JK)*PTSPHY*YRECLDP%RCL_KKAAC*(ZLIQCLD*ZRAINCLD)**YRECLDP%RCL_KKBAC
+              ZRAINACC = 2.0_JPRB*ZA(JK)*PTSPHY*YRECLDP%RCL_KKAAC*(ZLIQCLD*ZRAINCLD)**YRECLDP%RCL_KKBAC
+              
+              ZRAINACC = MIN(ZRAINACC, ZQXFG(NCLDQL))
+              IF (ZRAINACC < ZEPSEC)               ZRAINACC = 0.0_JPRB
+              
+            ELSE
+              ZRAINAUT = 0.0_JPRB
+              ZRAINACC = 0.0_JPRB
+            END IF
+            
+            ! If temperature < 0, then autoconversion produces snow rather than rain
+            ! Explicit
+            IF (ZTP1(JK) <= RTT) THEN
+              ZSOLQA(NCLDQS, NCLDQL) = ZSOLQA(NCLDQS, NCLDQL) + ZRAINAUT
+              ZSOLQA(NCLDQS, NCLDQL) = ZSOLQA(NCLDQS, NCLDQL) + ZRAINACC
+              ZSOLQA(NCLDQL, NCLDQS) = ZSOLQA(NCLDQL, NCLDQS) - ZRAINAUT
+              ZSOLQA(NCLDQL, NCLDQS) = ZSOLQA(NCLDQL, NCLDQS) - ZRAINACC
+            ELSE
+              ZSOLQA(NCLDQR, NCLDQL) = ZSOLQA(NCLDQR, NCLDQL) + ZRAINAUT
+              ZSOLQA(NCLDQR, NCLDQL) = ZSOLQA(NCLDQR, NCLDQL) + ZRAINACC
+              ZSOLQA(NCLDQL, NCLDQR) = ZSOLQA(NCLDQL, NCLDQR) - ZRAINAUT
+              ZSOLQA(NCLDQL, NCLDQR) = ZSOLQA(NCLDQL, NCLDQR) - ZRAINACC
+            END IF
+            
+          END IF
+          ! on IWARMRAIN
+          
+        END IF
+        ! on ZLIQCLD > ZEPSEC
+        
+        
+        !----------------------------------------------------------------------
+        ! RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE
+        !      only active if T<0degC and supercooled liquid water is present
+        !      AND if not Sundquist autoconversion (as this includes riming)
+        !----------------------------------------------------------------------
+        IF (IWARMRAIN > 1) THEN
+          
+          IF (ZTP1(JK) <= RTT .and. ZLIQCLD > ZEPSEC) THEN
+            
+            ! Fallspeed air density correction
+!!            ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4_JPRB
+            ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4_JPRB
+            
+            !------------------------------------------------------------------
+            ! Riming of snow by cloud water - implicit in lwc
+            !------------------------------------------------------------------
+            IF (ZSNOWCLD > ZEPSEC .and. ZCOVPTOT > 0.01_JPRB) THEN
+              
+              ! Calculate riming term
+              ! Factor of liq water taken out because implicit
+              ZSNOWRIME =  &
+!!              & 0.3_JPRB*ZCOVPTOT*PTSPHY*YRECLDP%RCL_CONST7S*ZFALLCORR*(ZRHO*ZSNOWCLD*YRECLDP%RCL_CONST1S)**YRECLDP%RCL_CONST8S
+              & 0.3_JPRB*ZCOVPTOT*PTSPHY*YRECLDP%RCL_CONST7S*ZFALLCORR*(ZRHO*ZSNOWCLD*YRECLDP%RCL_CONST1S)**YRECLDP%RCL_CONST8S
+              
+              ! Limit snow riming term
+              ZSNOWRIME = MIN(ZSNOWRIME, 1.0_JPRB)
+              
+              ZSOLQB(NCLDQS, NCLDQL) = ZSOLQB(NCLDQS, NCLDQL) + ZSNOWRIME
+              
+            END IF
+            
+            !------------------------------------------------------------------
+            ! Riming of ice by cloud water - implicit in lwc
+            ! NOT YET ACTIVE
+            !------------------------------------------------------------------
+            !      IF (ZICECLD(JK)>ZEPSEC .AND. ZA(JL,JK, JBLK)>0.01_JPRB) THEN
+            !
+            !        ! Calculate riming term
+            !        ! Factor of liq water taken out because implicit
+            !        ZSNOWRIME(JK) = ZA(JL,JK, JBLK)*PTSPHY*RCL_CONST7S*ZFALLCORR &
+            !     &                  *(ZRHO(JK)*ZICECLD(JK)*RCL_CONST1S)**RCL_CONST8S
+            !
+            !        ! Limit ice riming term
+            !        ZSNOWRIME(JK)=MIN(ZSNOWRIME(JK),1.0_JPRB)
+            !
+            !        ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JK)
+            !
+            !      ENDIF
+          END IF
+          
+        END IF
+        ! on IWARMRAIN > 1
+        
+        
+        !----------------------------------------------------------------------
+        ! 4.4a  MELTING OF SNOW and ICE
+        !       with new implicit solver this also has to treat snow or ice
+        !       precipitating from the level above... i.e. local ice AND flux.
+        !       in situ ice and snow: could arise from LS advection or warming
+        !       falling ice and snow: arrives by precipitation process
+        !----------------------------------------------------------------------
+        
+        ZICETOT = ZQXFG(NCLDQI) + ZQXFG(NCLDQS)
+        ZMELTMAX = 0.0_JPRB
+        
+        ! If there are frozen hydrometeors present and dry-bulb temperature > 0degC
+        IF (ZICETOT > ZEPSEC .and. ZTP1(JK) > RTT) THEN
+          
+          ! Calculate subsaturation
+          ZSUBSAT = MAX(ZQSICE(JK) - ZQX(JK, NCLDQV), 0.0_JPRB)
+          
+          ! Calculate difference between dry-bulb (ZTP1) and the temperature
+          ! at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx.
+          ! Melting only occurs if the wet-bulb temperature >0
+          ! i.e. warming of ice particle due to melting > cooling
+          ! due to evaporation.
+          ZTDMTW0 = ZTP1(JK) - RTT - ZSUBSAT*(ZTW1 + ZTW2*(PAP(JL, JK, JBLK) - ZTW3) - ZTW4*(ZTP1(JK) - ZTW5))
+          ! Not implicit yet...
+          ! Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0
+!!          ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*ZTDMTW0)) / YRECLDP%RTAUMEL)
+          ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*ZTDMTW0)) / YRECLDP%RTAUMEL)
+          ZMELTMAX = MAX(ZTDMTW0*ZCONS1*ZRLDCP, 0.0_JPRB)
+        END IF
+        
+        ! Loop over frozen hydrometeors (ice, snow)
+        DO JM=1,NCLV
+          IF (IPHASE(JM) == 2) THEN
+            JN = IMELT(JM)
+            IF (ZMELTMAX > ZEPSEC .and. ZICETOT > ZEPSEC) THEN
+              ! Apply melting in same proportion as frozen hydrometeor fractions
+              ZALFA = ZQXFG(JM) / ZICETOT
+              ZMELT = MIN(ZQXFG(JM), ZALFA*ZMELTMAX)
+              ! needed in first guess
+              ! This implies that zqpretot has to be recalculated below
+              ! since is not conserved here if ice falls and liquid doesn't
+              ZQXFG(JM) = ZQXFG(JM) - ZMELT
+              ZQXFG(JN) = ZQXFG(JN) + ZMELT
+              ZSOLQA(JN, JM) = ZSOLQA(JN, JM) + ZMELT
+              ZSOLQA(JM, JN) = ZSOLQA(JM, JN) - ZMELT
+            END IF
+          END IF
+        END DO
+        
+        !----------------------------------------------------------------------
+        ! 4.4b  FREEZING of RAIN
+        !----------------------------------------------------------------------
+        
+        ! If rain present
+        IF (ZQX(JK, NCLDQR) > ZEPSEC) THEN
+          
+          IF (ZTP1(JK) <= RTT .and. ZTP1(JK - 1) > RTT) THEN
+            ! Base of melting layer/top of refreezing layer so
+            ! store rain/snow fraction for precip type diagnosis
+            ! If mostly rain, then supercooled rain slow to freeze
+            ! otherwise faster to freeze (snow or ice pellets)
+            ZQPRETOT = MAX(ZQX(JK, NCLDQS) + ZQX(JK, NCLDQR), ZEPSEC)
+            PRAINFRAC_TOPRFZ(JL,JBLK) = ZQX(JK, NCLDQR) / ZQPRETOT
+            IF (PRAINFRAC_TOPRFZ(JL,JBLK) > 0.8) THEN
+              LLRAINLIQ = .true.
+            ELSE
+              LLRAINLIQ = .false.
+            END IF
+          END IF
+          
+          ! If temperature less than zero
+          IF (ZTP1(JK) < RTT) THEN
+            
+            IF (PRAINFRAC_TOPRFZ(JL,JBLK) > 0.8) THEN
+              
+              ! Majority of raindrops completely melted
+              ! Refreezing is by slow heterogeneous freezing
+              
+              ! Slope of rain particle size distribution
+!!              ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZQX(JK, NCLDQR))))**YRECLDP%RCL_FAC2
+              ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZQX(JK, NCLDQR))))**YRECLDP%RCL_FAC2
+              
+              ! Calculate freezing rate based on Bigg(1953) and Wisner(1972)
+!!              ZTEMP = YRECLDP%RCL_FZRAB*(ZTP1(JK) - RTT)
+              ZTEMP = YRECLDP%RCL_FZRAB*(ZTP1(JK) - RTT)
+!!              ZFRZ = PTSPHY*(YRECLDP%RCL_CONST5R / ZRHO)*(EXP(ZTEMP) - 1._JPRB)*ZLAMBDA**YRECLDP%RCL_CONST6R
+              ZFRZ = PTSPHY*(YRECLDP%RCL_CONST5R / ZRHO)*(EXP(ZTEMP) - 1._JPRB)*ZLAMBDA**YRECLDP%RCL_CONST6R
+              ZFRZMAX = MAX(ZFRZ, 0.0_JPRB)
+              
+            ELSE
+              
+              ! Majority of raindrops only partially melted
+              ! Refreeze with a shorter timescale (reverse of melting...for now)
+              
+!!              ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*(RTT - ZTP1(JK)))) / YRECLDP%RTAUMEL)
+              ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*(RTT - ZTP1(JK)))) / YRECLDP%RTAUMEL)
+              ZFRZMAX = MAX((RTT - ZTP1(JK))*ZCONS1*ZRLDCP, 0.0_JPRB)
+              
+            END IF
+            
+            IF (ZFRZMAX > ZEPSEC) THEN
+              ZFRZ = MIN(ZQX(JK, NCLDQR), ZFRZMAX)
+              ZSOLQA(NCLDQS, NCLDQR) = ZSOLQA(NCLDQS, NCLDQR) + ZFRZ
+              ZSOLQA(NCLDQR, NCLDQS) = ZSOLQA(NCLDQR, NCLDQS) - ZFRZ
+            END IF
+          END IF
+          
+        END IF
+        
+        
+        !----------------------------------------------------------------------
+        ! 4.4c  FREEZING of LIQUID
+        !----------------------------------------------------------------------
+        ! not implicit yet...
+!!        ZFRZMAX = MAX((YRECLDP%RTHOMO - ZTP1(JK))*ZRLDCP, 0.0_JPRB)
+        ZFRZMAX = MAX((YRECLDP%RTHOMO - ZTP1(JK))*ZRLDCP, 0.0_JPRB)
+        
+        JM = NCLDQL
+        JN = IMELT(JM)
+        IF (ZFRZMAX > ZEPSEC .and. ZQXFG(JM) > ZEPSEC) THEN
+          ZFRZ = MIN(ZQXFG(JM), ZFRZMAX)
+          ZSOLQA(JN, JM) = ZSOLQA(JN, JM) + ZFRZ
+          ZSOLQA(JM, JN) = ZSOLQA(JM, JN) - ZFRZ
+        END IF
+        
+        !----------------------------------------------------------------------
+        ! 4.5   EVAPORATION OF RAIN/SNOW
+        !----------------------------------------------------------------------
+        
+        !----------------------------------------
+        ! Rain evaporation scheme from Sundquist
+        !----------------------------------------
+        IF (IEVAPRAIN == 1) THEN
+          
+          ! Rain
+          
+          
+!!          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+!!          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          
+          ZQE = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSLIQ(JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          !---------------------------------------------
+          ! humidity in moistest ZCOVPCLR part of domain
+          !---------------------------------------------
+          ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSLIQ(JK)))
+          LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQR) > ZEPSEC .and. ZQE < ZZRH*ZQSLIQ(JK)
+          
+          IF (LLO1) THEN
+            ! note: zpreclr is a rain flux
+            ZPRECLR = (ZQXFG(NCLDQR)*ZCOVPCLR) / SIGN(MAX(ABS(ZCOVPTOT*ZDTGDP), ZEPSILON), ZCOVPTOT*ZDTGDP)
+            
+            !--------------------------------------
+            ! actual microphysics formula in zbeta
+            !--------------------------------------
+            
+!!            ZBETA1 = ((SQRT(PAP(JL, JK, JBLK) / PAPH(JL, NLEV + 1, JBLK)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC)
+            ZBETA1 = ((SQRT(PAP(JL, JK, JBLK) / PAPH(JL, NLEV + 1, JBLK)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC)
+            
+!!            ZBETA = RG*YRECLDP%RPECONS*0.5_JPRB*ZBETA1**0.5777_JPRB
+            ZBETA = RG*YRECLDP%RPECONS*0.5_JPRB*ZBETA1**0.5777_JPRB
+            
+            ZDENOM = 1.0_JPRB + ZBETA*PTSPHY*ZCORQSLIQ
+            ZDPR = ((ZCOVPCLR*ZBETA*(ZQSLIQ(JK) - ZQE)) / ZDENOM)*ZDP*ZRG_R
+            ZDPEVAP = ZDPR*ZDTGDP
+            
+            !---------------------------------------------------------
+            ! add evaporation term to explicit sink.
+            ! this has to be explicit since if treated in the implicit
+            ! term evaporation can not reduce rain to zero and model
+            ! produces small amounts of rainfall everywhere.
+            !---------------------------------------------------------
+            
+            ! Evaporate rain
+            ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQR))
+            
+            ZSOLQA(NCLDQV, NCLDQR) = ZSOLQA(NCLDQV, NCLDQR) + ZEVAP
+            ZSOLQA(NCLDQR, NCLDQV) = ZSOLQA(NCLDQR, NCLDQV) - ZEVAP
+            
+            !-------------------------------------------------------------
+            ! Reduce the total precip coverage proportional to evaporation
+            ! to mimic the previous scheme which had a diagnostic
+            ! 2-flux treatment, abandoned due to the new prognostic precip
+            !-------------------------------------------------------------
+!!            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQXFG(NCLDQR)))
+            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQXFG(NCLDQR)))
+            
+            ! Update fg field
+            ZQXFG(NCLDQR) = ZQXFG(NCLDQR) - ZEVAP
+            
+          END IF
+          
+          
+          !---------------------------------------------------------
+          ! Rain evaporation scheme based on Abel and Boutle (2013)
+          !---------------------------------------------------------
+        ELSE IF (IEVAPRAIN == 2) THEN
+          
+          
+          !-----------------------------------------------------------------------
+          ! Calculate relative humidity limit for rain evaporation
+          ! to avoid cloud formation and saturation of the grid box
+          !-----------------------------------------------------------------------
+          ! Limit RH for rain evaporation dependent on precipitation fraction
+!!          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+!!          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          
+          ! Critical relative humidity
+          !ZRHC=RAMID
+          !ZSIGK=PAP(JL,JK, JBLK)/PAPH(JL,NLEV+1)
+          ! Increase RHcrit to 1.0 towards the surface (eta>0.8)
+          !IF(ZSIGK > 0.8_JPRB) THEN
+          !  ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2
+          !ENDIF
+          !ZZRH = MIN(ZRHC,ZZRH)
+          
+          ! Further limit RH for rain evaporation to 80% (RHcrit in free troposphere)
+          ZZRH = MIN(0.8_JPRB, ZZRH)
+          
+          ZQE = MAX(0.0_JPRB, MIN(ZQX(JK, NCLDQV), ZQSLIQ(JK)))
+          
+          LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQR) > ZEPSEC .and. ZQE < ZZRH*ZQSLIQ(JK)
+          
+          IF (LLO1) THEN
+            
+            !-------------------------------------------
+            ! Abel and Boutle (2012) evaporation
+            !-------------------------------------------
+            ! Calculate local precipitation (kg/kg)
+            ZPRECLR = ZQXFG(NCLDQR) / ZCOVPTOT
+            
+            ! Fallspeed air density correction
+!!            ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4
+            ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4
+            
+            ! Saturation vapour pressure with respect to liquid phase
+            ZESATLIQ = (RV / RD)*FOEELIQ(ZTP1(JK))
+            
+            ! Slope of particle size distribution
+!!            ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZPRECLR)))**YRECLDP%RCL_FAC2              ! ZPRECLR=kg/kg
+            ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZPRECLR)))**YRECLDP%RCL_FAC2              ! ZPRECLR=kg/kg
+            
+!!            ZEVAP_DENOM = YRECLDP%RCL_CDENOM1*ZESATLIQ - YRECLDP%RCL_CDENOM2*ZTP1(JK)*ZESATLIQ +  &
+            ZEVAP_DENOM = YRECLDP%RCL_CDENOM1*ZESATLIQ - YRECLDP%RCL_CDENOM2*ZTP1(JK)*ZESATLIQ +  &
+!!            & YRECLDP%RCL_CDENOM3*ZTP1(JK)**3._JPRB*PAP(JL, JK, JBLK)
+            & YRECLDP%RCL_CDENOM3*ZTP1(JK)**3._JPRB*PAP(JL, JK, JBLK)
+            
+            ! Temperature dependent conductivity
+            ZCORR2 = ((ZTP1(JK) / 273._JPRB)**1.5_JPRB*393._JPRB) / (ZTP1(JK) + 120._JPRB)
+!!            ZKA = YRECLDP%RCL_KA273*ZCORR2
+            ZKA = YRECLDP%RCL_KA273*ZCORR2
+            
+            ZSUBSAT = MAX(ZZRH*ZQSLIQ(JK) - ZQE, 0.0_JPRB)
+            
+!!            ZBETA = (0.5_JPRB / ZQSLIQ(JK))*ZTP1(JK)**2._JPRB*ZESATLIQ*YRECLDP%RCL_CONST1R*(ZCORR2 /  &
+            ZBETA = (0.5_JPRB / ZQSLIQ(JK))*ZTP1(JK)**2._JPRB*ZESATLIQ*YRECLDP%RCL_CONST1R*(ZCORR2 /  &
+!!            & ZEVAP_DENOM)*(0.78_JPRB / (ZLAMBDA**YRECLDP%RCL_CONST4R) + (YRECLDP%RCL_CONST2R*(ZRHO*ZFALLCORR)**0.5_JPRB) /  &
+            & ZEVAP_DENOM)*(0.78_JPRB / (ZLAMBDA**YRECLDP%RCL_CONST4R) + (YRECLDP%RCL_CONST2R*(ZRHO*ZFALLCORR)**0.5_JPRB) /  &
+!!            & ((ZCORR2**0.5_JPRB*ZLAMBDA**YRECLDP%RCL_CONST3R)))
+            & ((ZCORR2**0.5_JPRB*ZLAMBDA**YRECLDP%RCL_CONST3R)))
+            
+            ZDENOM = 1.0_JPRB + ZBETA*PTSPHY              !*ZCORQSLIQ(JK)
+            ZDPEVAP = (ZCOVPCLR*ZBETA*PTSPHY*ZSUBSAT) / ZDENOM
+            
+            !---------------------------------------------------------
+            ! Add evaporation term to explicit sink.
+            ! this has to be explicit since if treated in the implicit
+            ! term evaporation can not reduce rain to zero and model
+            ! produces small amounts of rainfall everywhere.
+            !---------------------------------------------------------
+            
+            ! Limit rain evaporation
+            ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQR))
+            
+            ZSOLQA(NCLDQV, NCLDQR) = ZSOLQA(NCLDQV, NCLDQR) + ZEVAP
+            ZSOLQA(NCLDQR, NCLDQV) = ZSOLQA(NCLDQR, NCLDQV) - ZEVAP
+            
+            !-------------------------------------------------------------
+            ! Reduce the total precip coverage proportional to evaporation
+            ! to mimic the previous scheme which had a diagnostic
+            ! 2-flux treatment, abandoned due to the new prognostic precip
+            !-------------------------------------------------------------
+!!            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQXFG(NCLDQR)))
+            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQXFG(NCLDQR)))
+            
+            ! Update fg field
+            ZQXFG(NCLDQR) = ZQXFG(NCLDQR) - ZEVAP
+            
+          END IF
+          
+        END IF
+        ! on IEVAPRAIN
+        
+        !----------------------------------------------------------------------
+        ! 4.5   EVAPORATION OF SNOW
+        !----------------------------------------------------------------------
+        ! Snow
+        IF (IEVAPSNOW == 1) THEN
+          
+!!          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+!!          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          ZQE = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSICE(JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          
+          !---------------------------------------------
+          ! humidity in moistest ZCOVPCLR part of domain
+          !---------------------------------------------
+          ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSICE(JK)))
+          LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQS) > ZEPSEC .and. ZQE < ZZRH*ZQSICE(JK)
+          
+          IF (LLO1) THEN
+            ! note: zpreclr is a rain flux a
+            ZPRECLR = (ZQXFG(NCLDQS)*ZCOVPCLR) / SIGN(MAX(ABS(ZCOVPTOT*ZDTGDP), ZEPSILON), ZCOVPTOT*ZDTGDP)
+            
+            !--------------------------------------
+            ! actual microphysics formula in zbeta
+            !--------------------------------------
+            
+!!            ZBETA1 = ((SQRT(PAP(JL, JK, JBLK) / PAPH(JL, NLEV + 1, JBLK)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC)
+            ZBETA1 = ((SQRT(PAP(JL, JK, JBLK) / PAPH(JL, NLEV + 1, JBLK)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC)
+            
+!!            ZBETA = RG*YRECLDP%RPECONS*ZBETA1**0.5777_JPRB
+            ZBETA = RG*YRECLDP%RPECONS*ZBETA1**0.5777_JPRB
+            
+            ZDENOM = 1.0_JPRB + ZBETA*PTSPHY*ZCORQSICE
+            ZDPR = ((ZCOVPCLR*ZBETA*(ZQSICE(JK) - ZQE)) / ZDENOM)*ZDP*ZRG_R
+            ZDPEVAP = ZDPR*ZDTGDP
+            
+            !---------------------------------------------------------
+            ! add evaporation term to explicit sink.
+            ! this has to be explicit since if treated in the implicit
+            ! term evaporation can not reduce snow to zero and model
+            ! produces small amounts of snowfall everywhere.
+            !---------------------------------------------------------
+            
+            ! Evaporate snow
+            ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQS))
+            
+            ZSOLQA(NCLDQV, NCLDQS) = ZSOLQA(NCLDQV, NCLDQS) + ZEVAP
+            ZSOLQA(NCLDQS, NCLDQV) = ZSOLQA(NCLDQS, NCLDQV) - ZEVAP
+            
+            !-------------------------------------------------------------
+            ! Reduce the total precip coverage proportional to evaporation
+            ! to mimic the previous scheme which had a diagnostic
+            ! 2-flux treatment, abandoned due to the new prognostic precip
+            !-------------------------------------------------------------
+!!            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQXFG(NCLDQS)))
+            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQXFG(NCLDQS)))
+            
+            !Update first guess field
+            ZQXFG(NCLDQS) = ZQXFG(NCLDQS) - ZEVAP
+            
+          END IF
+          !---------------------------------------------------------
+        ELSE IF (IEVAPSNOW == 2) THEN
+          
+          
+          
+          !-----------------------------------------------------------------------
+          ! Calculate relative humidity limit for snow evaporation
+          !-----------------------------------------------------------------------
+!!          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+!!          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB)
+          ZQE = (ZQX(JK, NCLDQV) - ZA(JK)*ZQSICE(JK)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK))
+          
+          !---------------------------------------------
+          ! humidity in moistest ZCOVPCLR part of domain
+          !---------------------------------------------
+          ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSICE(JK)))
+          LLO1 = ZCOVPCLR > ZEPSEC .and. ZQX(JK, NCLDQS) > ZEPSEC .and. ZQE < ZZRH*ZQSICE(JK)
+          
+          IF (LLO1) THEN
+            
+            ! Calculate local precipitation (kg/kg)
+            ZPRECLR = ZQX(JK, NCLDQS) / ZCOVPTOT
+            ZVPICE = (FOEEICE(ZTP1(JK))*RV) / RD
+            
+            ! Particle size distribution
+            ! ZTCG increases Ni with colder temperatures - essentially a
+            ! Fletcher or Meyers scheme?
+            ZTCG = 1.0_JPRB              !v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK, JBLK))/8.18_JPRB)
+            ! ZFACX1I modification is based on Andrew Barrett's results
+            ZFACX1S = 1.0_JPRB              !v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB
+            
+            ZAPLUSB =  &
+!!            & YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JK) + PAP(JL, JK, JBLK)*YRECLDP%RCL_APB3*ZTP1(JK)**3
+            & YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JK) + PAP(JL, JK, JBLK)*YRECLDP%RCL_APB3*ZTP1(JK)**3
+            ZCORRFAC = (1.0 / ZRHO)**0.5
+            ZCORRFAC2 = ((ZTP1(JK) / 273.0)**1.5)*(393.0 / (ZTP1(JK) + 120.0))
+            
+!!            ZPR02 = (ZRHO*ZPRECLR*YRECLDP%RCL_CONST1S) / ((ZTCG*ZFACX1S))
+            ZPR02 = (ZRHO*ZPRECLR*YRECLDP%RCL_CONST1S) / ((ZTCG*ZFACX1S))
+            
+!!            ZTERM1 = ((ZQSICE(JK) - ZQE)*ZTP1(JK)**2*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2S*ZFACX1S) /  &
+            ZTERM1 = ((ZQSICE(JK) - ZQE)*ZTP1(JK)**2*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2S*ZFACX1S) /  &
+            & ((ZRHO*ZAPLUSB*ZQSICE(JK)))
+!!            ZTERM2 = 0.65*YRECLDP%RCL_CONST6S*ZPR02**YRECLDP%RCL_CONST4S +  &
+            ZTERM2 = 0.65*YRECLDP%RCL_CONST6S*ZPR02**YRECLDP%RCL_CONST4S +  &
+!!            & (YRECLDP%RCL_CONST3S*ZCORRFAC**0.5*ZRHO**0.5*ZPR02**YRECLDP%RCL_CONST5S) / ZCORRFAC2**0.5
+            & (YRECLDP%RCL_CONST3S*ZCORRFAC**0.5*ZRHO**0.5*ZPR02**YRECLDP%RCL_CONST5S) / ZCORRFAC2**0.5
+            
+            ZDPEVAP = MAX(ZCOVPCLR*ZTERM1*ZTERM2*PTSPHY, 0.0_JPRB)
+            
+            !--------------------------------------------------------------------
+            ! Limit evaporation to snow amount
+            !--------------------------------------------------------------------
+            ZEVAP = MIN(ZDPEVAP, ZEVAPLIMICE)
+            ZEVAP = MIN(ZEVAP, ZQX(JK, NCLDQS))
+            
+            
+            ZSOLQA(NCLDQV, NCLDQS) = ZSOLQA(NCLDQV, NCLDQS) + ZEVAP
+            ZSOLQA(NCLDQS, NCLDQV) = ZSOLQA(NCLDQS, NCLDQV) - ZEVAP
+            
+            !-------------------------------------------------------------
+            ! Reduce the total precip coverage proportional to evaporation
+            ! to mimic the previous scheme which had a diagnostic
+            ! 2-flux treatment, abandoned due to the new prognostic precip
+            !-------------------------------------------------------------
+!!            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQX(JK, NCLDQS)))
+            ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK))*ZEVAP) / ZQX(JK, NCLDQS)))
+            
+            !Update first guess field
+            ZQXFG(NCLDQS) = ZQXFG(NCLDQS) - ZEVAP
+            
+          END IF
+          
+        END IF
+        ! on IEVAPSNOW
+        
+        !--------------------------------------
+        ! Evaporate small precipitation amounts
+        !--------------------------------------
+        DO JM=1,NCLV
+          IF (LLFALL(JM)) THEN
+!!            IF (ZQXFG(JM) < YRECLDP%RLMIN) THEN
+            IF (ZQXFG(JM) < YRECLDP%RLMIN) THEN
+              ZSOLQA(NCLDQV, JM) = ZSOLQA(NCLDQV, JM) + ZQXFG(JM)
+              ZSOLQA(JM, NCLDQV) = ZSOLQA(JM, NCLDQV) - ZQXFG(JM)
+            END IF
+          END IF
+        END DO
+        
+        !######################################################################
+        !            5.0  *** SOLVERS FOR A AND L ***
+        ! now use an implicit solution rather than exact solution
+        ! solver is forward in time, upstream difference for advection
+        !######################################################################
+        
+        !---------------------------
+        ! 5.1 solver for cloud cover
+        !---------------------------
+        ZANEW = (ZA(JK) + ZSOLAC) / (1.0_JPRB + ZSOLAB)
+        ZANEW = MIN(ZANEW, 1.0_JPRB)
+!!        IF (ZANEW < YRECLDP%RAMIN)         ZANEW = 0.0_JPRB
+        IF (ZANEW < YRECLDP%RAMIN)         ZANEW = 0.0_JPRB
+        ZDA = ZANEW - ZAORIG(JK)
+        !---------------------------------
+        ! variables needed for next level
+        !---------------------------------
+        ZANEWM1 = ZANEW
+        
+        !--------------------------------
+        ! 5.2 solver for the microphysics
+        !--------------------------------
+        
+        !--------------------------------------------------------------
+        ! Truncate explicit sinks to avoid negatives
+        ! Note: Species are treated in the order in which they run out
+        ! since the clipping will alter the balance for the other vars
+        !--------------------------------------------------------------
+        
+        DO JM=1,NCLV
+!$claw nodep
+          DO JN=1,NCLV
+            LLINDEX3(JN, JM) = .false.
+          END DO
+          ZSINKSUM(JM) = 0.0_JPRB
+        END DO
+        
+        !----------------------------
+        ! collect sink terms and mark
+        !----------------------------
+        DO JM=1,NCLV
+          DO JN=1,NCLV
+            ZSINKSUM(JM) = ZSINKSUM(JM) - ZSOLQA(JM, JN)              ! +ve total is bad
+          END DO
+        END DO
+        
+        !---------------------------------------
+        ! calculate overshoot and scaling factor
+        !---------------------------------------
+        DO JM=1,NCLV
+          ZMAX = MAX(ZQX(JK, JM), ZEPSEC)
+          ZRAT = MAX(ZSINKSUM(JM), ZMAX)
+          ZRATIO(JM) = ZMAX / ZRAT
+        END DO
+        
+        !--------------------------------------------
+        ! scale the sink terms, in the correct order,
+        ! recalculating the scale factor each time
+        !--------------------------------------------
+        DO JM=1,NCLV
+          ZSINKSUM(JM) = 0.0_JPRB
+        END DO
+        
+        !----------------
+        ! recalculate sum
+        !----------------
+        DO JM=1,NCLV
+          PSUM_SOLQA = 0.0
+          DO JN=1,NCLV
+            PSUM_SOLQA = PSUM_SOLQA + ZSOLQA(JM, JN)
+          END DO
+          ! ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV))
+          ZSINKSUM(JM) = ZSINKSUM(JM) - PSUM_SOLQA
+          !---------------------------
+          ! recalculate scaling factor
+          !---------------------------
+          ZMM = MAX(ZQX(JK, JM), ZEPSEC)
+          ZRR = MAX(ZSINKSUM(JM), ZMM)
+          ZRATIO(JM) = ZMM / ZRR
+          !------
+          ! scale
+          !------
+          ZZRATIO = ZRATIO(JM)
+          !DIR$ IVDEP
+          !DIR$ PREFERVECTOR
+          DO JN=1,NCLV
+            IF (ZSOLQA(JM, JN) < 0.0_JPRB) THEN
+              ZSOLQA(JM, JN) = ZSOLQA(JM, JN)*ZZRATIO
+              ZSOLQA(JN, JM) = ZSOLQA(JN, JM)*ZZRATIO
+            END IF
+          END DO
+        END DO
+        
+        !--------------------------------------------------------------
+        ! 5.2.2 Solver
+        !------------------------
+        
+        !------------------------
+        ! set the LHS of equation
+        !------------------------
+        DO JM=1,NCLV
+          DO JN=1,NCLV
+            !----------------------------------------------
+            ! diagonals: microphysical sink terms+transport
+            !----------------------------------------------
+            IF (JN == JM) THEN
+              ZQLHS(JN, JM) = 1.0_JPRB + ZFALLSINK(JM)
+              DO JO=1,NCLV
+                ZQLHS(JN, JM) = ZQLHS(JN, JM) + ZSOLQB(JO, JN)
+              END DO
+              !------------------------------------------
+              ! non-diagonals: microphysical source terms
+              !------------------------------------------
+            ELSE
+              ZQLHS(JN, JM) = -ZSOLQB(JN, JM)                ! here is the delta T - missing from doc.
+            END IF
+          END DO
+        END DO
+        
+        !------------------------
+        ! set the RHS of equation
+        !------------------------
+        DO JM=1,NCLV
+          !---------------------------------
+          ! sum the explicit source and sink
+          !---------------------------------
+          ZEXPLICIT = 0.0_JPRB
+          DO JN=1,NCLV
+            ZEXPLICIT = ZEXPLICIT + ZSOLQA(JM, JN)              ! sum over middle index
+          END DO
+          ZQXN(JM) = ZQX(JK, JM) + ZEXPLICIT
+        END DO
+        
+        !-----------------------------------
+        ! *** solve by LU decomposition: ***
+        !-----------------------------------
+        
+        ! Note: This fast way of solving NCLVxNCLV system
+        !       assumes a good behaviour (i.e. non-zero diagonal
+        !       terms with comparable orders) of the matrix stored
+        !       in ZQLHS. For the moment this is the case but
+        !       be aware to preserve it when doing eventual
+        !       modifications.
+        
+        ! Non pivoting recursive factorization
+        DO JN=1,NCLV - 1
+          ! number of steps
+          DO JM=JN + 1,NCLV
+            ! row index
+            ZQLHS(JM, JN) = ZQLHS(JM, JN) / ZQLHS(JN, JN)
+            DO IK=JN + 1,NCLV
+              ! column index
+              ZQLHS(JM, IK) = ZQLHS(JM, IK) - ZQLHS(JM, JN)*ZQLHS(JN, IK)
+            END DO
+          END DO
+        END DO
+        
+        ! Backsubstitution
+        !  step 1
+        DO JN=2,NCLV
+          DO JM=1,JN - 1
+            ZQXN(JN) = ZQXN(JN) - ZQLHS(JN, JM)*ZQXN(JM)
+          END DO
+        END DO
+        !  step 2
+        ZQXN(NCLV) = ZQXN(NCLV) / ZQLHS(NCLV, NCLV)
+        DO JN=NCLV - 1,1,-1
+          DO JM=JN + 1,NCLV
+            ZQXN(JN) = ZQXN(JN) - ZQLHS(JN, JM)*ZQXN(JM)
+          END DO
+          ZQXN(JN) = ZQXN(JN) / ZQLHS(JN, JN)
+        END DO
+        
+        ! Ensure no small values (including negatives) remain in cloud variables nor
+        ! precipitation rates.
+        ! Evaporate l,i,r,s to water vapour. Latent heating taken into account below
+        DO JN=1,NCLV - 1
+          IF (ZQXN(JN) < ZEPSEC) THEN
+            ZQXN(NCLDQV) = ZQXN(NCLDQV) + ZQXN(JN)
+            ZQXN(JN) = 0.0_JPRB
+          END IF
+        END DO
+        
+        !--------------------------------
+        ! variables needed for next level
+        !--------------------------------
+        DO JM=1,NCLV
+          ZQXNM1(JM) = ZQXN(JM)
+          ZQXN2D(JK, JM) = ZQXN(JM)
+        END DO
+        
+        !------------------------------------------------------------------------
+        ! 5.3 Precipitation/sedimentation fluxes to next level
+        !     diagnostic precipitation fluxes
+        !     It is this scaled flux that must be used for source to next layer
+        !------------------------------------------------------------------------
+        
+        DO JM=1,NCLV
+          ZPFPLSX(JK + 1, JM) = ZFALLSINK(JM)*ZQXN(JM)*ZRDTGDP
+        END DO
+        
+        ! Ensure precipitation fraction is zero if no precipitation
+        ZQPRETOT = ZPFPLSX(JK + 1, NCLDQS) + ZPFPLSX(JK + 1, NCLDQR)
+        IF (ZQPRETOT < ZEPSEC) THEN
+          ZCOVPTOT = 0.0_JPRB
+        END IF
+        
+        !######################################################################
+        !              6  *** UPDATE TENDANCIES ***
+        !######################################################################
+        
+        !--------------------------------
+        ! 6.1 Temperature and CLV budgets
+        !--------------------------------
+        
+        DO JM=1,NCLV - 1
+          
+          ! calculate fluxes in and out of box for conservation of TL
+          ZFLUXQ(JM) = ZPSUPSATSRCE(JM) + ZCONVSRCE(JM) + ZFALLSRCE(JM) - (ZFALLSINK(JM) + ZCONVSINK(JM))*ZQXN(JM)
+          
+          IF (IPHASE(JM) == 1) THEN
+            TENDENCY_LOC(JL, JK, 1, JBLK) = TENDENCY_LOC(JL, JK, 1, JBLK) + RALVDCP*(ZQXN(JM) - ZQX(JK, JM) - ZFLUXQ(JM))*ZQTMST
+          END IF
+          
+          IF (IPHASE(JM) == 2) THEN
+            TENDENCY_LOC(JL, JK, 1, JBLK) = TENDENCY_LOC(JL, JK, 1, JBLK) + RALSDCP*(ZQXN(JM) - ZQX(JK, JM) - ZFLUXQ(JM))*ZQTMST
+          END IF
+          
+          !----------------------------------------------------------------------
+          ! New prognostic tendencies - ice,liquid rain,snow
+          ! Note: CLV arrays use PCLV in calculation of tendency while humidity
+          !       uses ZQX. This is due to clipping at start of cloudsc which
+          !       include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset
+          !----------------------------------------------------------------------
+          TENDENCY_LOC(JL, JK, 3+JM, JBLK) = TENDENCY_LOC(JL, JK, 3+JM, JBLK) + (ZQXN(JM) - ZQX0(JK, JM))*ZQTMST
+          
+        END DO
+        
+        !----------------------
+        ! 6.2 Humidity budget
+        !----------------------
+        TENDENCY_LOC(JL, JK, 3, JBLK) = TENDENCY_LOC(JL, JK, 3, JBLK) + (ZQXN(NCLDQV) - ZQX(JK, NCLDQV))*ZQTMST
+        
+        !-------------------
+        ! 6.3 cloud cover
+        !-----------------------
+        TENDENCY_LOC(JL, JK, 2, JBLK) = TENDENCY_LOC(JL, JK, 2, JBLK) + ZDA*ZQTMST
+        
+        !--------------------------------------------------
+        ! Copy precipitation fraction into output variable
+        !-------------------------------------------------
+        PCOVPTOT(JL, JK, JBLK) = ZCOVPTOT
+        
+      END DO
+      ! on vertical level JK
+      !----------------------------------------------------------------------
+      !                       END OF VERTICAL LOOP
+      !----------------------------------------------------------------------
+      
+      !######################################################################
+      !              8  *** FLUX/DIAGNOSTICS COMPUTATIONS ***
+      !######################################################################
+      
+      !--------------------------------------------------------------------
+      ! Copy general precip arrays back into PFP arrays for GRIB archiving
+      ! Add rain and liquid fluxes, ice and snow fluxes
+      !--------------------------------------------------------------------
+!! !$acc loop seq
+      DO JK=1,NLEV + 1
+        PFPLSL(JL, JK, JBLK) = ZPFPLSX(JK, NCLDQR) + ZPFPLSX(JK, NCLDQL)
+        PFPLSN(JL, JK, JBLK) = ZPFPLSX(JK, NCLDQS) + ZPFPLSX(JK, NCLDQI)
+      END DO
+      
+      !--------
+      ! Fluxes:
+      !--------
+      PFSQLF(JL, 1, JBLK) = 0.0_JPRB
+      PFSQIF(JL, 1, JBLK) = 0.0_JPRB
+      PFSQRF(JL, 1, JBLK) = 0.0_JPRB
+      PFSQSF(JL, 1, JBLK) = 0.0_JPRB
+      PFCQLNG(JL, 1, JBLK) = 0.0_JPRB
+      PFCQNNG(JL, 1, JBLK) = 0.0_JPRB
+      PFCQRNG(JL, 1, JBLK) = 0.0_JPRB        !rain
+      PFCQSNG(JL, 1, JBLK) = 0.0_JPRB        !snow
+      ! fluxes due to turbulence
+      PFSQLTUR(JL, 1, JBLK) = 0.0_JPRB
+      PFSQITUR(JL, 1, JBLK) = 0.0_JPRB
+      
+!! !$acc loop seq
+      DO JK=1,NLEV
+        
+        ZGDPH_R = -ZRG_R*(PAPH(JL, JK + 1, JBLK) - PAPH(JL, JK, JBLK))*ZQTMST
+        PFSQLF(JL, JK + 1, JBLK) = PFSQLF(JL, JK, JBLK)
+        PFSQIF(JL, JK + 1, JBLK) = PFSQIF(JL, JK, JBLK)
+        PFSQRF(JL, JK + 1, JBLK) = PFSQLF(JL, JK, JBLK)
+        PFSQSF(JL, JK + 1, JBLK) = PFSQIF(JL, JK, JBLK)
+        PFCQLNG(JL, JK + 1, JBLK) = PFCQLNG(JL, JK, JBLK)
+        PFCQNNG(JL, JK + 1, JBLK) = PFCQNNG(JL, JK, JBLK)
+        PFCQRNG(JL, JK + 1, JBLK) = PFCQLNG(JL, JK, JBLK)
+        PFCQSNG(JL, JK + 1, JBLK) = PFCQNNG(JL, JK, JBLK)
+        PFSQLTUR(JL, JK + 1, JBLK) = PFSQLTUR(JL, JK, JBLK)
+        PFSQITUR(JL, JK + 1, JBLK) = PFSQITUR(JL, JK, JBLK)
+        
+        ZALFAW = ZFOEALFA(JK)
+        
+        ! Liquid , LS scheme minus detrainment
+        PFSQLF(JL, JK + 1, JBLK) = PFSQLF(JL, JK + 1, JBLK) + (ZQXN2D(JK, NCLDQL) - ZQX0(JK, NCLDQL) + PVFL(JL, JK, JBLK)*PTSPHY -  &
+        & ZALFAW*PLUDE(JL, JK, JBLK))*ZGDPH_R
+        ! liquid, negative numbers
+        PFCQLNG(JL, JK + 1, JBLK) = PFCQLNG(JL, JK + 1, JBLK) + ZLNEG(JK, NCLDQL)*ZGDPH_R
+        
+        ! liquid, vertical diffusion
+        PFSQLTUR(JL, JK + 1, JBLK) = PFSQLTUR(JL, JK + 1, JBLK) + PVFL(JL, JK, JBLK)*PTSPHY*ZGDPH_R
+        
+        ! Rain, LS scheme
+        PFSQRF(JL, JK + 1, JBLK) = PFSQRF(JL, JK + 1, JBLK) + (ZQXN2D(JK, NCLDQR) - ZQX0(JK, NCLDQR))*ZGDPH_R
+        ! rain, negative numbers
+        PFCQRNG(JL, JK + 1, JBLK) = PFCQRNG(JL, JK + 1, JBLK) + ZLNEG(JK, NCLDQR)*ZGDPH_R
+        
+        ! Ice , LS scheme minus detrainment
+        PFSQIF(JL, JK + 1, JBLK) = PFSQIF(JL, JK + 1, JBLK) + (ZQXN2D(JK, NCLDQI) - ZQX0(JK, NCLDQI) + PVFI(JL, JK, JBLK)*PTSPHY -  &
+        & (1.0_JPRB - ZALFAW)*PLUDE(JL, JK, JBLK))*ZGDPH_R
+        ! ice, negative numbers
+        PFCQNNG(JL, JK + 1, JBLK) = PFCQNNG(JL, JK + 1, JBLK) + ZLNEG(JK, NCLDQI)*ZGDPH_R
+        
+        ! ice, vertical diffusion
+        PFSQITUR(JL, JK + 1, JBLK) = PFSQITUR(JL, JK + 1, JBLK) + PVFI(JL, JK, JBLK)*PTSPHY*ZGDPH_R
+        
+        ! snow, LS scheme
+        PFSQSF(JL, JK + 1, JBLK) = PFSQSF(JL, JK + 1, JBLK) + (ZQXN2D(JK, NCLDQS) - ZQX0(JK, NCLDQS))*ZGDPH_R
+        ! snow, negative numbers
+        PFCQSNG(JL, JK + 1, JBLK) = PFCQSNG(JL, JK + 1, JBLK) + ZLNEG(JK, NCLDQS)*ZGDPH_R
+      END DO
+      
+      !-----------------------------------
+      ! enthalpy flux due to precipitation
+      !-----------------------------------
+!! !$acc loop seq
+      DO JK=1,NLEV + 1
+        PFHPSL(JL, JK, JBLK) = -RLVTT*PFPLSL(JL, JK, JBLK)
+        PFHPSN(JL, JK, JBLK) = -RLSTT*PFPLSN(JL, JK, JBLK)
+      END DO
+      
+      !===============================================================================
+      !IF (LHOOK) CALL DR_HOOK('CLOUDSC',1,ZHOOK_HANDLE)
+!!    END DO
+
+ #ifdef false
+#endif
+
+  END SUBROUTINE CLOUDSC_SCC_CUF
+END MODULE CLOUDSC_GPU_SCC_CUF_MOD
diff --git a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90
index 6857e45b..65a3c5da 100644
--- a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90
+++ b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90
@@ -21,10 +21,18 @@ PROGRAM DWARF_CLOUDSC
 USE CLOUDSC_DRIVER_GPU_SCC_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC
 #endif
 
+#ifdef CLOUDSC_GPU_SCC_CUF
+USE CLOUDSC_DRIVER_GPU_SCC_CUF_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_CUF
+#endif
+
 #ifdef CLOUDSC_GPU_SCC_HOIST
 USE CLOUDSC_DRIVER_GPU_SCC_HOIST_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_HOIST
 #endif
 
+#ifdef CLOUDSC_GPU_OMP_SCC_HOIST
+USE CLOUDSC_DRIVER_GPU_OMP_SCC_HOIST_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_HOIST
+#endif
+
 IMPLICIT NONE
 
 CHARACTER(LEN=20) :: CLARG
@@ -78,7 +86,7 @@ PROGRAM DWARF_CLOUDSC
 END IF
 
 print '(1X,A42)', 'Executing CLOUDSC-GPU, "CLAW" variant...'
- 
+
 ! Call the driver to perform the parallel loop over our kernel
 CALL CLOUDSC_DRIVER_GPU_CLAW(NUMOMP, NPROMA, GLOBAL_STATE%KLEV, NGPTOT, NGPTOTG, &
      & GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, &
@@ -132,8 +140,35 @@ PROGRAM DWARF_CLOUDSC
      & )
 #endif
 
+#ifdef CLOUDSC_GPU_SCC_CUF
+print '(1X,A46)', 'Executing CLOUDSC-GPU, "SCC-CUF" CUDA variant...'
 
-#ifdef CLOUDSC_GPU_SCC_HOIST
+ ! Call the driver to perform the parallel loop over our kernel
+CALL CLOUDSC_DRIVER_GPU_SCC_CUF(NUMOMP, NPROMA, GLOBAL_STATE%KLEV, NGPTOT, GLOBAL_STATE%NBLOCKS, NGPTOTG, &
+     & GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, &
+     & GLOBAL_STATE%PT, GLOBAL_STATE%PQ, &
+     & GLOBAL_STATE%B_CML,   GLOBAL_STATE%B_TMP, GLOBAL_STATE%B_LOC, &
+     & GLOBAL_STATE%PVFA,    GLOBAL_STATE%PVFL,  GLOBAL_STATE%PVFI, &
+     & GLOBAL_STATE%PDYNA,   GLOBAL_STATE%PDYNL, GLOBAL_STATE%PDYNI, &
+     & GLOBAL_STATE%PHRSW,   GLOBAL_STATE%PHRLW, &
+     & GLOBAL_STATE%PVERVEL, GLOBAL_STATE%PAP,   GLOBAL_STATE%PAPH, &
+     & GLOBAL_STATE%PLSM,    GLOBAL_STATE%LDCUM, GLOBAL_STATE%KTYPE, &
+     & GLOBAL_STATE%PLU,     GLOBAL_STATE%PLUDE, GLOBAL_STATE%PSNDE, &
+     & GLOBAL_STATE%PMFU,    GLOBAL_STATE%PMFD, &
+     & GLOBAL_STATE%PA, &
+     & GLOBAL_STATE%PCLV,    GLOBAL_STATE%PSUPSAT,&
+     & GLOBAL_STATE%PLCRIT_AER, GLOBAL_STATE%PICRIT_AER, GLOBAL_STATE%PRE_ICE, &
+     & GLOBAL_STATE%PCCN,     GLOBAL_STATE%PNICE,&
+     & GLOBAL_STATE%PCOVPTOT, GLOBAL_STATE%PRAINFRAC_TOPRFZ, &
+     & GLOBAL_STATE%PFSQLF,   GLOBAL_STATE%PFSQIF ,  GLOBAL_STATE%PFCQNNG,  GLOBAL_STATE%PFCQLNG, &
+     & GLOBAL_STATE%PFSQRF,   GLOBAL_STATE%PFSQSF ,  GLOBAL_STATE%PFCQRNG,  GLOBAL_STATE%PFCQSNG, &
+     & GLOBAL_STATE%PFSQLTUR, GLOBAL_STATE%PFSQITUR, &
+     & GLOBAL_STATE%PFPLSL,   GLOBAL_STATE%PFPLSN,   GLOBAL_STATE%PFHPSL,   GLOBAL_STATE%PFHPSN &
+     & )
+#endif
+
+
+#if defined(CLOUDSC_GPU_SCC_HOIST) || defined(CLOUDSC_GPU_OMP_SCC_HOIST)
 print '(1X,A42)', 'Executing CLOUDSC-GPU, "SCC-hoist" variant...'
 
  ! Call the driver to perform the parallel loop over our kernel
diff --git a/src/cloudsc_gpu/nlev_mod.F90 b/src/cloudsc_gpu/nlev_mod.F90
new file mode 100644
index 00000000..99bd8a9f
--- /dev/null
+++ b/src/cloudsc_gpu/nlev_mod.F90
@@ -0,0 +1,3 @@
+MODULE NLEV_MOD
+  INTEGER, PARAMETER :: NLEV=137
+END MODULE NLEV_MOD
diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt
index 53764db5..1648c58c 100644
--- a/src/cloudsc_loki/CMakeLists.txt
+++ b/src/cloudsc_loki/CMakeLists.txt
@@ -12,6 +12,12 @@ ecbuild_add_option( FEATURE CLOUDSC_LOKI
     CONDITION Serialbox_FOUND OR HDF5_FOUND
 )
 
+# Define the CLAW-based Loki dwarf variants as an ECBuild feature
+ecbuild_add_option( FEATURE CLOUDSC_LOKI_CLAW
+    DESCRIPTION "Build optimized CPU and GPU version of CLOUDSC derived from Loki+CLAW" DEFAULT OFF
+    CONDITION HAVE_CLOUDSC_LOKI
+)
+
 function( cloudsc_xmod _TARGET )
 
     if( TARGET clawfc AND ${LOKI_FRONTEND} STREQUAL "omni" )
@@ -78,8 +84,8 @@ if( HAVE_CLOUDSC_LOKI )
         MODE idem FRONTEND ${LOKI_FRONTEND} CPP
         CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config
         PATH ${CMAKE_CURRENT_SOURCE_DIR}
-        HEADER ${COMMON_MODULE}/yomphyder.F90
-        INCLUDE ${COMMON_INCLUDE}
+        HEADERS ${COMMON_MODULE}/yomphyder.F90
+        INCLUDES ${COMMON_INCLUDE}
         XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
         OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem
         OUTPUT loki-idem/cloudsc.idem.F90 loki-idem/cloudsc_driver_loki_mod.idem.F90
@@ -91,15 +97,44 @@ if( HAVE_CLOUDSC_LOKI )
             dwarf_cloudsc.F90
             loki-idem/cloudsc_driver_loki_mod.idem.F90
             loki-idem/cloudsc.idem.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS}
     )
-    target_link_libraries( dwarf-cloudsc-loki-idem PRIVATE cloudsc-common-lib )
-    if( TARGET OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-idem PRIVATE OpenMP::OpenMP_Fortran )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-loki-idem PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
+
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-idem-serial
+        COMMAND bin/dwarf-cloudsc-loki-idem
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-idem-omp
+        COMMAND bin/dwarf-cloudsc-loki-idem
+        ARGS 4 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 4
+        CONDITION HAVE_OMP
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-idem-mpi
+        COMMAND bin/dwarf-cloudsc-loki-idem
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        MPI 2
+        OMP 1
+        CONDITION HAVE_MPI
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-idem-mpi-omp
+        COMMAND bin/dwarf-cloudsc-loki-idem
+        ARGS 4 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        MPI 2
+        OMP 4
+        CONDITION HAVE_OMP AND HAVE_MPI
+    )
 
 
     ####################################################
@@ -113,8 +148,8 @@ if( HAVE_CLOUDSC_LOKI )
         MODE sca FRONTEND ${LOKI_FRONTEND} CPP
         CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config
         PATH ${CMAKE_CURRENT_SOURCE_DIR}
-        HEADER ${COMMON_MODULE}/yomphyder.F90
-        INCLUDE ${COMMON_INCLUDE}
+        HEADERS ${COMMON_MODULE}/yomphyder.F90
+        INCLUDES ${COMMON_INCLUDE}
         XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
         OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-sca
         OUTPUT loki-sca/cloudsc.sca.F90 loki-sca/cloudsc_driver_loki_mod.sca.F90
@@ -126,23 +161,34 @@ if( HAVE_CLOUDSC_LOKI )
             dwarf_cloudsc.F90
             loki-sca/cloudsc_driver_loki_mod.sca.F90
             loki-sca/cloudsc.sca.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS}
     )
-    target_link_libraries( dwarf-cloudsc-loki-sca PRIVATE cloudsc-common-lib )
-    if( TARGET OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-sca PRIVATE OpenMP::OpenMP_Fortran )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-loki-sca PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
 
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-sca-serial
+        COMMAND bin/dwarf-cloudsc-loki-sca
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-sca-mpi
+        COMMAND bin/dwarf-cloudsc-loki-sca
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        MPI 2
+        OMP 1
+        CONDITION HAVE_MPI
+    )
 
     ####################################################
     ##  CLAW-CPU mode:                                ##
     ##   * Generate SCA code with CLAW annotations    ##
     ##   * Process with CLAW (CPU layout and OpenMP)  ##
     ####################################################
-    if( TARGET clawfc )
+    if( HAVE_CLOUDSC_LOKI_CLAW AND TARGET clawfc )
 
         cloudsc_xmod( loki-claw-cpu )
 
@@ -150,8 +196,8 @@ if( HAVE_CLOUDSC_LOKI )
             MODE claw FRONTEND ${LOKI_FRONTEND} CPP
             CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config
             PATH ${CMAKE_CURRENT_SOURCE_DIR}
-            HEADER ${COMMON_MODULE}/yomphyder.F90
-            INCLUDE ${COMMON_INCLUDE}
+            HEADERS ${COMMON_MODULE}/yomphyder.F90
+            INCLUDES ${COMMON_INCLUDE}
             XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
             OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-claw-cpu
             OUTPUT loki-claw-cpu/cloudsc.claw.F90 loki-claw-cpu/cloudsc_driver_loki_mod.claw.F90
@@ -163,7 +209,7 @@ if( HAVE_CLOUDSC_LOKI )
         claw_compile(
             MODEL_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/claw_cloudsc.config
             TARGET cpu DIRECTIVE none
-            INCLUDE ${COMMON_INCLUDE}
+            INCLUDES ${COMMON_INCLUDE}
             XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
             SOURCE loki-claw-cpu/cloudsc.claw.F90
             OUTPUT loki-claw-cpu/cloudsc.claw.cpu.F90
@@ -171,7 +217,7 @@ if( HAVE_CLOUDSC_LOKI )
         claw_compile(
             MODEL_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/claw_cloudsc.config
             TARGET cpu DIRECTIVE none
-            INCLUDE ${COMMON_INCLUDE}
+            INCLUDES ${COMMON_INCLUDE}
             XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
             SOURCE loki-claw-cpu/cloudsc_driver_loki_mod.claw.F90
             OUTPUT loki-claw-cpu/cloudsc_driver_loki_mod.claw.cpu.F90
@@ -183,15 +229,44 @@ if( HAVE_CLOUDSC_LOKI )
                 dwarf_cloudsc.F90
                 loki-claw-cpu/cloudsc_driver_loki_mod.claw.cpu.F90
                 loki-claw-cpu/cloudsc.claw.cpu.F90
+            LIBS
+                cloudsc-common-lib
             DEFINITIONS ${CLOUDSC_DEFINITIONS}
         )
-        target_link_libraries( dwarf-cloudsc-loki-claw-cpu PRIVATE cloudsc-common-lib )
-        if( TARGET OpenMP::OpenMP_Fortran )
-            target_link_libraries( dwarf-cloudsc-loki-claw-cpu PRIVATE OpenMP::OpenMP_Fortran )
-        endif()
- 	if ( HAVE_HDF5 )
-          target_include_directories( dwarf-cloudsc-loki-claw-cpu PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-	endif()
+
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-claw-cpu-serial
+            COMMAND bin/dwarf-cloudsc-loki-claw-cpu
+            ARGS 1 100 16
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            OMP 1
+        )
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-claw-cpu-omp
+            COMMAND bin/dwarf-cloudsc-loki-claw-cpu
+            ARGS 4 100 16
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            OMP 4
+            CONDITION HAVE_OMP
+        )
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-claw-cpu-mpi
+            COMMAND bin/dwarf-cloudsc-loki-claw-cpu
+            ARGS 1 100 16
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            MPI 2
+            OMP 1
+            CONDITION HAVE_MPI
+        )
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-claw-cpu-mpi-omp
+            COMMAND bin/dwarf-cloudsc-loki-claw-cpu
+            ARGS 4 100 16
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            MPI 2
+            OMP 4
+            CONDITION HAVE_OMP AND HAVE_MPI
+        )
     endif()
 
     ####################################################
@@ -199,7 +274,7 @@ if( HAVE_CLOUDSC_LOKI )
     ##   * Generate SCA code with CLAW annotations    ##
     ##   * Process with CLAW (GPU layout and OpenACC) ##
     ####################################################
-    if( TARGET clawfc )
+    if( HAVE_CLOUDSC_LOKI_CLAW AND TARGET clawfc )
 
         cloudsc_xmod( loki-claw-gpu )
 
@@ -209,9 +284,9 @@ if( HAVE_CLOUDSC_LOKI )
             MODE claw FRONTEND ${LOKI_FRONTEND} CPP
             CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config
             PATH ${CMAKE_CURRENT_SOURCE_DIR}
-            HEADER ${COMMON_MODULE}/yomphyder.F90
-            INCLUDE ${COMMON_INCLUDE}
-            DEFINE CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC}
+            HEADERS ${COMMON_MODULE}/yomphyder.F90
+            INCLUDES ${COMMON_INCLUDE}
+            DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC}
             DATA_OFFLOAD REMOVE_OPENMP
             XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
             OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-claw-gpu
@@ -222,7 +297,7 @@ if( HAVE_CLOUDSC_LOKI )
         claw_compile(
             MODEL_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/claw_cloudsc.config
             TARGET gpu DIRECTIVE openacc
-            INCLUDE ${COMMON_INCLUDE}
+            INCLUDES ${COMMON_INCLUDE}
             XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
             SOURCE loki-claw-gpu/cloudsc.claw.F90
             OUTPUT loki-claw-gpu/cloudsc.claw.gpu.F90
@@ -230,7 +305,7 @@ if( HAVE_CLOUDSC_LOKI )
         claw_compile(
             MODEL_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/claw_cloudsc.config
             TARGET gpu DIRECTIVE openacc
-            INCLUDE ${COMMON_INCLUDE}
+            INCLUDES ${COMMON_INCLUDE}
             XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
             SOURCE loki-claw-gpu/cloudsc_driver_loki_mod.claw.F90
             OUTPUT loki-claw-gpu/cloudsc_driver_loki_mod.claw.gpu.F90
@@ -242,20 +317,18 @@ if( HAVE_CLOUDSC_LOKI )
                 dwarf_cloudsc.F90
                 loki-claw-gpu/cloudsc_driver_loki_mod.claw.gpu.F90
                 loki-claw-gpu/cloudsc.claw.gpu.F90
+            LIBS
+                cloudsc-common-lib
             DEFINITIONS ${CLOUDSC_DEFINITIONS}
         )
-        if( TARGET OpenMP::OpenMP_Fortran )
-            target_link_libraries( dwarf-cloudsc-loki-claw-gpu PRIVATE OpenMP::OpenMP_Fortran )
-        endif()
-        if( TARGET OpenACC::OpenACC_Fortran )
-            target_link_libraries( dwarf-cloudsc-loki-claw-gpu PRIVATE OpenACC::OpenACC_Fortran )
-            target_link_libraries( dwarf-cloudsc-loki-claw-gpu PRIVATE cloudsc-common-lib-static )
-        else()
-            target_link_libraries( dwarf-cloudsc-loki-claw-gpu PRIVATE cloudsc-common-lib )
-        endif()
-	if ( HAVE_HDF5 )
-          target_include_directories( dwarf-cloudsc-loki-claw-gpu PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-	endif()
+
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-claw-gpu-serial
+            COMMAND bin/dwarf-cloudsc-loki-claw-gpu
+            ARGS 1 1280 128
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            OMP 1
+        )
     endif()
 
 
@@ -271,9 +344,9 @@ if( HAVE_CLOUDSC_LOKI )
         MODE scc FRONTEND ${LOKI_FRONTEND} CPP
         CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config
         PATH ${CMAKE_CURRENT_SOURCE_DIR}
-        HEADER ${COMMON_MODULE}/yomphyder.F90
-        INCLUDE ${COMMON_INCLUDE}
-        DEFINE CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC}
+        HEADERS ${COMMON_MODULE}/yomphyder.F90
+        INCLUDES ${COMMON_INCLUDE}
+        DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC}
         DATA_OFFLOAD REMOVE_OPENMP
         XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
         OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc
@@ -286,21 +359,19 @@ if( HAVE_CLOUDSC_LOKI )
             dwarf_cloudsc.F90
             loki-scc/cloudsc_driver_loki_mod.scc.F90
             loki-scc/cloudsc.scc.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS}
     )
-    if( TARGET OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-scc PRIVATE OpenMP::OpenMP_Fortran )
-    endif()
-    if( TARGET OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-scc PRIVATE OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-scc PRIVATE cloudsc-common-lib-static )
-    else()
-        target_link_libraries( dwarf-cloudsc-loki-scc PRIVATE cloudsc-common-lib )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-loki-scc PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
 
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-scc-serial
+        COMMAND bin/dwarf-cloudsc-loki-scc
+        ARGS 1 1280 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+        ENVIRONMENT "NVCOMPILER_ACC_CUDA_HEAPSIZE=64M"
+    )
 
     ####################################################
     ##  SCC-hoist mode                                ##
@@ -315,9 +386,9 @@ if( HAVE_CLOUDSC_LOKI )
         MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP
         CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config
         PATH ${CMAKE_CURRENT_SOURCE_DIR}
-        HEADER ${COMMON_MODULE}/yomphyder.F90
-        INCLUDE ${COMMON_INCLUDE}
-        DEFINE CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC}
+        HEADERS ${COMMON_MODULE}/yomphyder.F90
+        INCLUDES ${COMMON_INCLUDE}
+        DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC}
         DATA_OFFLOAD REMOVE_OPENMP
         XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
         OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist
@@ -332,72 +403,82 @@ if( HAVE_CLOUDSC_LOKI )
             dwarf_cloudsc.F90
             loki-scc-hoist/cloudsc_driver_loki_mod.scc_hoist.F90
             loki-scc-hoist/cloudsc.scc_hoist.F90
+        LIBS
+            cloudsc-common-lib
         DEFINITIONS ${CLOUDSC_DEFINITIONS}
     )
-    if( TARGET OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-scc-hoist PRIVATE OpenMP::OpenMP_Fortran )
-    endif()
-    if( TARGET OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-scc-hoist PRIVATE OpenACC::OpenACC_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-scc-hoist PRIVATE cloudsc-common-lib-static )
-    else()
-        target_link_libraries( dwarf-cloudsc-loki-scc-hoist PRIVATE cloudsc-common-lib )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-loki-scc-hoist PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
 
+    ecbuild_add_test(
+        TARGET dwarf-cloudsc-loki-scc-hoist-serial
+        COMMAND bin/dwarf-cloudsc-loki-scc-hoist
+        ARGS 1 1280 128
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
 
     ##############################################################################
     # C-transpilation mode for generating vectorized C host code (experimental!)
     ##############################################################################
 
-    cloudsc_xmod( loki-c )
-
-    loki_transform_transpile(
-        FRONTEND ${LOKI_FRONTEND} CPP
-        HEADER
-            ${COMMON_MODULE}/parkind1.F90
-            ${COMMON_MODULE}/yomphyder.F90
-            ${COMMON_MODULE}/yomcst.F90
-            ${COMMON_MODULE}/yoethf.F90
-            ${COMMON_MODULE}/yoecldp.F90
-            ${COMMON_MODULE}/fcttre_mod.F90
-            ${COMMON_MODULE}/fccld_mod.F90
-        DRIVER ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_driver_loki_mod.F90
-        SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc.F90
-        INCLUDE ${COMMON_INCLUDE}
-        XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
-        OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-c
-        OUTPUT
-            loki-c/cloudsc_driver_loki_mod.c.F90
-            loki-c/cloudsc_fc.F90 loki-c/cloudsc_c.c
-            loki-c/yoethf_fc.F90 loki-c/yomcst_fc.F90
-            loki-c/yoecldp_fc.F90
-        DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES}
-    )
+    if ( NOT HAVE_SINGLE_PRECISION )
+
+        cloudsc_xmod( loki-c )
+
+        loki_transform_transpile(
+            FRONTEND ${LOKI_FRONTEND} CPP
+            HEADERS
+                ${COMMON_MODULE}/parkind1.F90
+                ${COMMON_MODULE}/yomphyder.F90
+                ${COMMON_MODULE}/yomcst.F90
+                ${COMMON_MODULE}/yoethf.F90
+                ${COMMON_MODULE}/yoecldp.F90
+                ${COMMON_MODULE}/fcttre_mod.F90
+                ${COMMON_MODULE}/fccld_mod.F90
+            DRIVER ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_driver_loki_mod.F90
+            SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc.F90
+            INCLUDES ${COMMON_INCLUDE}
+            XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
+            OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-c
+            OUTPUT
+                loki-c/cloudsc_driver_loki_mod.c.F90
+                loki-c/cloudsc_fc.F90 loki-c/cloudsc_c.c
+                loki-c/yoethf_fc.F90 loki-c/yomcst_fc.F90
+                loki-c/yoecldp_fc.F90
+            DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES}
+        )
 
-    # Define the CLAW-CPU build target for this variant
-    ecbuild_add_executable( TARGET dwarf-cloudsc-loki-c
-        SOURCES
-            dwarf_cloudsc.F90
-            loki-c/cloudsc_driver_loki_mod.c.F90
-            loki-c/cloudsc_fc.F90
-            loki-c/cloudsc_c.c
-            loki-c/yoethf_fc.F90
-            loki-c/yomcst_fc.F90
-            loki-c/yoecldp_fc.F90
-        DEFINITIONS ${CLOUDSC_DEFINITIONS}
-    )
-    target_link_libraries( dwarf-cloudsc-loki-c PRIVATE cloudsc-common-lib )
-    if( TARGET OpenMP::OpenMP_Fortran AND TARGET OpenMP::OpenMP_C )
-        target_link_libraries( dwarf-cloudsc-loki-c PRIVATE OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-cloudsc-loki-c PRIVATE OpenMP::OpenMP_C )
-    endif()
-    if ( HAVE_HDF5 )
-        target_include_directories( dwarf-cloudsc-loki-c PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    endif()
+        # Define the CLAW-CPU build target for this variant
+        ecbuild_add_executable( TARGET dwarf-cloudsc-loki-c
+            SOURCES
+                dwarf_cloudsc.F90
+                loki-c/cloudsc_driver_loki_mod.c.F90
+                loki-c/cloudsc_fc.F90
+                loki-c/cloudsc_c.c
+                loki-c/yoethf_fc.F90
+                loki-c/yomcst_fc.F90
+                loki-c/yoecldp_fc.F90
+            LIBS
+                cloudsc-common-lib
+            DEFINITIONS ${CLOUDSC_DEFINITIONS}
+        )
+
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-c-serial
+            COMMAND bin/dwarf-cloudsc-loki-c
+            ARGS 1 100 16
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            OMP 1
+        )
+        ecbuild_add_test(
+            TARGET dwarf-cloudsc-loki-c-omp
+            COMMAND bin/dwarf-cloudsc-loki-c
+            ARGS 4 100 16
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+            OMP 4
+            CONDITION HAVE_MPI
+        )
 
+    endif()
 
     # Create symlink for the input data
     if( HAVE_SERIALBOX )
diff --git a/src/cloudsc_loki/cloudsc_loki.config b/src/cloudsc_loki/cloudsc_loki.config
index 41812da6..d6fda309 100644
--- a/src/cloudsc_loki/cloudsc_loki.config
+++ b/src/cloudsc_loki/cloudsc_loki.config
@@ -7,8 +7,9 @@ strict = true  # Throw exceptions during dicovery
 # Ensure that we are never adding these to the tree, and thus
 # do not attempt to look up the source files for these.
 # TODO: Add type-bound procedure support and adjust scheduler to it
-disable = ['timer%start', 'timer%end', 'timer%thread_start', 'timer%thread_end',
-           'timer%thread_log', 'timer%thread_log', 'timer%print_performance']
+disable = ['performance_timer%start', 'performance_timer%end', 'performance_timer%thread_start',
+           'performance_timer%thread_end', 'performance_timer%thread_log',
+           'performance_timer%thread_log', 'performance_timer%print_performance']
 
 # Define entry point for call-tree transformation
 [[routine]]
diff --git a/src/cloudsc_loki/dwarf_cloudsc.F90 b/src/cloudsc_loki/dwarf_cloudsc.F90
index b48b16c5..923f7b7e 100644
--- a/src/cloudsc_loki/dwarf_cloudsc.F90
+++ b/src/cloudsc_loki/dwarf_cloudsc.F90
@@ -59,7 +59,7 @@ PROGRAM DWARF_CLOUDSC
   NGPTOT = NGPTOTG - (NUMPROC - 1) * NGPTOT
 end if
 
-! Get the block size (NPROMA) for which to run the benchmark  
+! Get the block size (NPROMA) for which to run the benchmark
 IF (IARGS >= 3) THEN
   CALL GET_COMMAND_ARGUMENT(3, CLARG, LENARG)
   READ(CLARG(1:LENARG),*) NPROMA
@@ -94,4 +94,7 @@ PROGRAM DWARF_CLOUDSC
 ! Validate the output against serialized reference data
 CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG)
 
+! Tear down MPI environment
+CALL CLOUDSC_MPI_END()
+
 END PROGRAM DWARF_CLOUDSC
diff --git a/src/cloudsc_python/.pre-commit-config.yaml b/src/cloudsc_python/.pre-commit-config.yaml
new file mode 100644
index 00000000..6ff44b0d
--- /dev/null
+++ b/src/cloudsc_python/.pre-commit-config.yaml
@@ -0,0 +1,22 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.4.0
+    hooks:
+    -   id: check-yaml
+    -   id: check-added-large-files
+    -   id: check-case-conflict
+    -   id: check-json
+    -   id: check-merge-conflict
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: fix-encoding-pragma
+    -   id: requirements-txt-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.8.0
+    hooks:
+    -   id: rst-backticks
+-   repo: https://github.com/psf/black
+    rev: 22.6.0
+    hooks:
+    -   id: black
diff --git a/src/cloudsc_python/README.md b/src/cloudsc_python/README.md
new file mode 100644
index 00000000..a464d81c
--- /dev/null
+++ b/src/cloudsc_python/README.md
@@ -0,0 +1,48 @@
+This folder contains a Python implementation of the CLOUDSC microphysics scheme based on
+[GT4Py](https://github.com/GridTools/gt4py/tree/master). The code is bundled as an installable
+package called `cloudsc4py`, whose source code is placed under `src/`.
+
+We strongly recommend installing the package in an isolated virtual environment, which can be
+created by issuing the following command from within this directory:
+```shell
+$ python -m venv venv
+```
+The virtual environment will be contained in the folder `venv/` and can be activated with
+```shell
+$ source venv/bin/activate
+```
+and deactivated with
+```shell
+$ (venv) deactivate
+```
+The package `cloudsc4py` can be installed via the Python package manager [pip](https://pypi.org/project/pip/):
+```shell
+$ (venv) pip install -e .
+```
+The resulting installation will work on CPU only. To get access to the GPU-accelerated backends of
+GT4Py, [CuPy](https://cupy.dev/) is required. We suggest installing CuPy as a precompiled binary
+package (wheel)
+```shell
+$ (venv) pip install cupy-cudaXXX  # XXX stands for the CUDA version available on the system
+```
+If the installation of CuPy completed successfully, the command
+```shell
+$ (venv) python -c "import cupy"
+```
+should produce no output.
+All the aforementioned steps can be executed in a single shot by executing the Bash script `bootstrap_venv.sh`:
+```shell
+$ FRESH_INSTALL=1 VENV=venv INSTALL_CUPY=1 CUPY_VERSION=cupy-cudaXXX [PIP_UPGRADE=1 INSTALL_PRE_COMMIT=1] ./bootstrap_venv.sh
+```
+
+The scheme comes in two forms: one where computations are carried out in a single stencil
+(see `src/cloudsc4py/{physics,_stencils}/cloudsc.py`), and one where calculations are split into two
+stencils (one computing tendencies on the main vertical levels, the other computing fluxes at the
+interface levels; see `src/cloudsc4py/{physics,_stencils}/cloudsc_split.py`).
+
+The easiest way to run the dwarf is through the driver scripts `drivers/run.py` and `drivers/run_split.py`.
+Run the two scripts with `--help` option to get the full list of command-line options.
+
+For the sake of convenience, we provide the driver `drivers/run_fortran.py` to invoke one of the
+FORTRAN variants of the dwarf from Python, and the Bash script `drivers/run_batch.sh` to run the
+FORTRAN and Python implementations under different settings.
diff --git a/src/cloudsc_python/bootstrap_venv.sh b/src/cloudsc_python/bootstrap_venv.sh
new file mode 100755
index 00000000..3cc211f2
--- /dev/null
+++ b/src/cloudsc_python/bootstrap_venv.sh
@@ -0,0 +1,75 @@
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+#!/bin/bash
+
+PYTHON=$(which python3)
+PIP_UPGRADE=${PIP_UPGRADE:-1}
+VENV=${VENV:-venv}
+FRESH_INSTALL=${FRESH_INSTALL:-1}
+INSTALL_PRE_COMMIT=${INSTALL_PRE_COMMIT:-1}
+INSTALL_CUPY=${INSTALL_CUPY:-0}
+CUPY_VERSION=${CUPY_VERSION:-cupy}
+
+
+function install()
+{
+  # activate environment
+  source "$VENV"/bin/activate
+
+  # upgrade pip and setuptools
+  if [ "$PIP_UPGRADE" -ne 0 ]; then
+    pip install --upgrade pip setuptools wheel
+  fi
+
+  # install cloudsc4py
+  pip install -e .
+
+  # install gt sources
+  python -m gt4py.gt_src_manager install
+
+  # setup gt4py cache
+  mkdir -p gt_cache
+  echo -e "\nexport GT_CACHE_ROOT=$PWD/gt_cache" >> "$VENV"/bin/activate
+
+  # install cupy
+  if [ "$INSTALL_CUPY" -eq 1 ]; then
+    pip install "$CUPY_VERSION"
+  fi
+
+  # install development packages
+  pip install -r requirements_dev.txt
+
+  # install pre-commit
+  if [ "$INSTALL_PRE_COMMIT" -eq 1 ]; then
+    pre-commit install
+  fi
+
+  # deactivate environment
+  deactivate
+}
+
+
+if [ "$FRESH_INSTALL" -eq 1 ]; then
+  echo -e "Creating new environment..."
+  rm -rf "$VENV"
+  $PYTHON -m venv "$VENV"
+fi
+
+
+install || deactivate
+
+
+echo -e ""
+echo -e "Command to activate environment:"
+echo -e "\t\$ source $VENV/bin/activate"
+echo -e ""
+echo -e "Command to deactivate environment:"
+echo -e "\t\$ deactivate"
+echo -e ""
diff --git a/src/cloudsc_python/drivers/__init__.py b/src/cloudsc_python/drivers/__init__.py
new file mode 100644
index 00000000..95e3c8ad
--- /dev/null
+++ b/src/cloudsc_python/drivers/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/cloudsc_python/drivers/config.py b/src/cloudsc_python/drivers/config.py
new file mode 100644
index 00000000..b9353d12
--- /dev/null
+++ b/src/cloudsc_python/drivers/config.py
@@ -0,0 +1,172 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+import numpy as np
+from os.path import dirname, join, normpath, splitext
+from pydantic import BaseModel, validator
+import socket
+from typing import Optional
+
+from cloudsc4py.framework.config import DataTypes, GT4PyConfig
+
+
+class IOConfig(BaseModel):
+    """Gathers options for I/O."""
+
+    output_csv_file: Optional[str]
+    host_name: Optional[str]
+
+    @validator("output_csv_file")
+    @classmethod
+    def check_extension(cls, v: Optional[str]) -> Optional[str]:
+        if v is None:
+            return v
+
+        basename, extension = splitext(v)
+        if extension == "":
+            return v + ".csv"
+        elif extension == ".csv":
+            return v
+        else:
+            return basename + ".csv"
+
+    @validator("host_name")
+    @classmethod
+    def set_host_name(cls, v: Optional[str]) -> str:
+        return v or socket.gethostname()
+
+    def with_host_name(self, host_name: str) -> IOConfig:
+        args = self.dict()
+        args["host_name"] = host_name
+        return IOConfig(**args)
+
+    def with_output_csv_file(self, output_csv_file: str) -> IOConfig:
+        args = self.dict()
+        args["output_csv_file"] = output_csv_file
+        return IOConfig(**args)
+
+
+default_io_config = IOConfig(output_file=None, host_name=None)
+
+
+class PythonConfig(BaseModel):
+    """Gathers options controlling execution of Python/GT4Py code."""
+
+    # domain
+    num_cols: Optional[int]
+
+    # validation
+    enable_validation: bool
+    input_file: str
+    reference_file: str
+
+    # run
+    num_runs: int
+
+    # low-level and/or backend-related
+    data_types: DataTypes
+    gt4py_config: GT4PyConfig
+    sympl_enable_checks: bool
+
+    @validator("gt4py_config")
+    @classmethod
+    def add_dtypes(cls, v, values) -> GT4PyConfig:
+        return v.with_dtypes(values["data_types"])
+
+    def with_backend(self, backend: Optional[str]) -> PythonConfig:
+        args = self.dict()
+        args["gt4py_config"] = GT4PyConfig(**args["gt4py_config"]).with_backend(backend).dict()
+        return PythonConfig(**args)
+
+    def with_checks(self, enabled: bool) -> PythonConfig:
+        args = self.dict()
+        args["gt4py_config"] = (
+            GT4PyConfig(**args["gt4py_config"]).with_validate_args(enabled).dict()
+        )
+        args["sympl_enable_checks"] = enabled
+        return PythonConfig(**args)
+
+    def with_num_cols(self, num_cols: Optional[int]) -> PythonConfig:
+        args = self.dict()
+        if num_cols is not None:
+            args["num_cols"] = num_cols
+        return PythonConfig(**args)
+
+    def with_num_runs(self, num_runs: Optional[int]) -> PythonConfig:
+        args = self.dict()
+        if num_runs is not None:
+            args["num_runs"] = num_runs
+        return PythonConfig(**args)
+
+    def with_validation(self, enabled: bool) -> PythonConfig:
+        args = self.dict()
+        args["enable_validation"] = enabled
+        return PythonConfig(**args)
+
+
+config_files_dir = normpath(join(dirname(__file__), "../../../config-files"))
+default_python_config = PythonConfig(
+    num_cols=1,
+    enable_validation=True,
+    input_file=join(config_files_dir, "input.h5"),
+    reference_file=join(config_files_dir, "reference.h5"),
+    num_runs=15,
+    data_types=DataTypes(bool=bool, float=np.float64, int=int),
+    gt4py_config=GT4PyConfig(backend="numpy", rebuild=False, validate_args=True, verbose=True),
+    sympl_enable_checks=True,
+)
+
+
+class FortranConfig(BaseModel):
+    """Gathers options controlling execution of FORTRAN code."""
+
+    build_dir: str
+    variant: str
+    nproma: int
+    num_cols: int
+    num_runs: int
+    num_threads: int
+
+    def with_build_dir(self, build_dir: str) -> FortranConfig:
+        args = self.dict()
+        args["build_dir"] = build_dir
+        return FortranConfig(**args)
+
+    def with_nproma(self, nproma: int) -> FortranConfig:
+        args = self.dict()
+        args["nproma"] = nproma
+        return FortranConfig(**args)
+
+    def with_num_cols(self, num_cols: int) -> FortranConfig:
+        args = self.dict()
+        args["num_cols"] = num_cols
+        return FortranConfig(**args)
+
+    def with_num_runs(self, num_runs: int) -> FortranConfig:
+        args = self.dict()
+        args["num_runs"] = num_runs
+        return FortranConfig(**args)
+
+    def with_num_threads(self, num_threads: int) -> FortranConfig:
+        args = self.dict()
+        args["num_threads"] = num_threads
+        return FortranConfig(**args)
+
+    def with_variant(self, variant: str) -> FortranConfig:
+        args = self.dict()
+        args["variant"] = variant
+        return FortranConfig(**args)
+
+
+default_fortran_config = FortranConfig(
+    build_dir=".", variant="fortran", nproma=32, num_cols=1, num_runs=1, num_threads=1
+)
diff --git a/src/cloudsc_python/drivers/input.h5 b/src/cloudsc_python/drivers/input.h5
new file mode 100644
index 00000000..9482579a
Binary files /dev/null and b/src/cloudsc_python/drivers/input.h5 differ
diff --git a/src/cloudsc_python/drivers/reference.h5 b/src/cloudsc_python/drivers/reference.h5
new file mode 100644
index 00000000..18e952e7
Binary files /dev/null and b/src/cloudsc_python/drivers/reference.h5 differ
diff --git a/src/cloudsc_python/drivers/run.py b/src/cloudsc_python/drivers/run.py
new file mode 100644
index 00000000..ee31f4b1
--- /dev/null
+++ b/src/cloudsc_python/drivers/run.py
@@ -0,0 +1,196 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import click
+import csv
+import datetime
+import os
+from typing import Optional, Type
+
+from cloudsc4py.framework.grid import ComputationalGrid
+from cloudsc4py.physics.cloudsc import Cloudsc
+from cloudsc4py.initialization.reference import get_reference_tendencies, get_reference_diagnostics
+from cloudsc4py.initialization.state import get_state
+from cloudsc4py.utils.iox import HDF5Reader
+from cloudsc4py.utils.timing import timing
+from cloudsc4py.utils.validation import validate
+
+from config import PythonConfig, IOConfig, default_python_config, default_io_config
+from utils import print_performance, to_csv
+
+
+def core(config: PythonConfig, io_config: IOConfig, cloudsc_cls: Type) -> None:
+    hdf5_reader = HDF5Reader(config.input_file, config.data_types)
+
+    nx = config.num_cols or hdf5_reader.get_nlon()
+    nz = hdf5_reader.get_nlev()
+    computational_grid = ComputationalGrid(nx, 1, nz)
+
+    state = get_state(computational_grid, hdf5_reader, gt4py_config=config.gt4py_config)
+    dt = hdf5_reader.get_timestep()
+
+    yoecldp_paramaters = hdf5_reader.get_yoecldp_parameters()
+    yoethf_parameters = hdf5_reader.get_yoethf_parameters()
+    yomcst_parameters = hdf5_reader.get_yomcst_parameters()
+    yrecldp_parameters = hdf5_reader.get_yrecldp_parameters()
+
+    cloudsc = cloudsc_cls(
+        computational_grid,
+        yoecldp_paramaters,
+        yoethf_parameters,
+        yomcst_parameters,
+        yrecldp_parameters,
+        enable_checks=config.sympl_enable_checks,
+        gt4py_config=config.gt4py_config,
+    )
+    tends, diags = cloudsc(state, dt)
+
+    runtimes = []
+    for i in range(config.num_runs):
+        with timing(f"run_{i}") as timer:
+            cloudsc(state, dt, out_tendencies=tends, out_diagnostics=diags)
+        runtimes.append(timer.get_time(f"run_{i}") * 1000)
+
+    runtime_mean, runtime_stddev = print_performance(runtimes)
+
+    if io_config.output_csv_file is not None:
+        to_csv(
+            io_config.output_csv_file,
+            io_config.host_name,
+            config.gt4py_config.backend,
+            nx,
+            config.num_runs,
+            runtime_mean,
+            runtime_stddev,
+        )
+
+    if config.enable_validation:
+        hdf5_reader_ref = HDF5Reader(config.reference_file, config.data_types)
+        tends_ref = get_reference_tendencies(
+            computational_grid, hdf5_reader_ref, gt4py_config=config.gt4py_config
+        )
+        diags_ref = get_reference_diagnostics(
+            computational_grid, hdf5_reader_ref, gt4py_config=config.gt4py_config
+        )
+
+        tends_fail = validate(tends, tends_ref)
+        if len(tends_fail) == 0:
+            print("Results: All tendencies have been successfully validated. HOORAY!")
+        else:
+            print(
+                f"Results: Validation failed for {len(tends_fail)}/{len(tends_ref) - 1} "
+                f"tendencies: {', '.join(tends_fail)}."
+            )
+
+        diags_fail = validate(diags, diags_ref)
+        if len(diags_fail) == 0:
+            print("Results: All diagnostics have been successfully validated. HOORAY!")
+        else:
+            print(
+                f"Results: Validation failed for {len(diags_fail)}/{len(diags_ref) - 1} "
+                f"diagnostics: {', '.join(diags_fail)}."
+            )
+
+
+@click.command()
+@click.option(
+    "--backend",
+    type=str,
+    default=None,
+    help="GT4Py backend."
+    "\n\nOptions: numpy, gt:cpu_kfirst, gt:cpu_ifirst, gt:gpu, cuda, dace:cpu, dace:gpu."
+    "\n\nDefault: numpy.",
+)
+@click.option(
+    "--enable-checks/--disable-checks",
+    is_flag=True,
+    type=bool,
+    default=False,
+    help="Enable/disable sanity checks performed by Sympl and GT4Py.\n\nDefault: enabled.",
+)
+@click.option(
+    "--enable-validation/--disable-validation",
+    is_flag=True,
+    type=bool,
+    default=True,
+    help="Enable/disable data validation.\n\nDefault: enabled.",
+)
+@click.option("--num-cols", type=int, default=None, help="Number of domain columns.\n\nDefault: 1.")
+@click.option(
+    "--num-runs",
+    type=int,
+    default=1,
+    help="Number of executions.\n\nDefault: 1.",
+)
+@click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).")
+@click.option(
+    "--output-csv-file",
+    type=str,
+    default=None,
+    help="Path to the CSV file where writing performance counters (optional).",
+)
+@click.option(
+    "--output-csv-file-stencils",
+    type=str,
+    default=None,
+    help="Path to the CSV file where writing performance counters for each stencil (optional).",
+)
+def main(
+    backend: Optional[str],
+    enable_checks: bool,
+    enable_validation: bool,
+    num_cols: Optional[int],
+    num_runs: Optional[int],
+    host_alias: Optional[str],
+    output_csv_file: Optional[str],
+    output_csv_file_stencils: Optional[str],
+) -> None:
+    """
+    Driver for the GT4Py-based implementation of CLOUDSC.
+
+    Computations are carried out in a single stencil.
+    """
+    config = (
+        default_python_config.with_backend(backend)
+        .with_checks(enable_checks)
+        .with_validation(enable_validation)
+        .with_num_cols(num_cols)
+        .with_num_runs(num_runs)
+    )
+    io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias)
+    core(config, io_config, cloudsc_cls=Cloudsc)
+
+    if output_csv_file_stencils is not None:
+        call_time = None
+        for key, value in config.gt4py_config.exec_info.items():
+            if "cloudsc" in key:
+                call_time = value["total_call_time"] * 1000 / config.num_runs
+
+        if not os.path.exists(output_csv_file_stencils):
+            with open(output_csv_file_stencils, "w") as f:
+                writer = csv.writer(f, delimiter=",")
+                writer.writerow(("date", "host", "backend", "num_cols", "num_runs", "cloudsc"))
+        with open(output_csv_file_stencils, "a") as f:
+            writer = csv.writer(f, delimiter=",")
+            writer.writerow(
+                (
+                    datetime.date.today().strftime("%Y%m%d"),
+                    io_config.host_name,
+                    config.gt4py_config.backend,
+                    config.num_cols,
+                    config.num_runs,
+                    call_time,
+                )
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/cloudsc_python/drivers/run_batch.sh b/src/cloudsc_python/drivers/run_batch.sh
new file mode 100755
index 00000000..aaf609a7
--- /dev/null
+++ b/src/cloudsc_python/drivers/run_batch.sh
@@ -0,0 +1,115 @@
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+#!/bin/bash
+
+# === general
+# name of the host machine
+HOST=meluxina
+# list of number of columns
+NUM_COLS_L=( 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 )
+
+# === FORTRAN
+# list of environments
+# options: nvhpc
+FORTRAN_ENV_L=( )
+# list of variants
+# options: fortran, gpu-scc, gpu-scc-hoist, gpu-omp-scc-hoist
+FORTRAN_VARIANT_L=( fortran gpu-scc gpu-scc-hoist gpu-omp-scc-hoist )
+# list of NPROMA values (array must have the same length of FORTRAN_VARIANT_L)
+# recommended values: 32 for CPUs, 128 on GPUs
+NPROMA_L=( 32 128 128 128 )
+# list of number of threads (array must have the same length of FORTRAN_VARIANT_L)
+# recommended values: 24 on Piz Daint's CPUs, 128 on MLux's CPUs, 1 on GPUs
+FORTRAN_NUM_THREADS_L=( 128 1 1 1 )
+
+# === python
+# list of environments
+# options: aocc gcc intel
+PYTHON_ENV_L=( aocc gcc intel )
+# list of C compilers (array must have the same length of PYTHON_ENV_L)
+CC_L=( clang gcc icx )
+# list of C++ compilers (array must have the same length of PYTHON_ENV_L)
+CXX_L=( clang++ g++ icx )
+# list of C++ compiler flags (array must have the same length of PYTHON_ENV_L)
+CXXFLAGS_L=( "-fbracket-depth=1024" "" "-fbracket-depth=1024" )
+# list of linker flags (array must have the same length of PYTHON_ENV_L)
+LFLAGS_L=( "" "" "-lstdc++" )
+# list of GT4Py backends
+# options: numpy, gt:cpu_ifirst, gt:cpu_kfirst, gt:gpu, cuda, dace:cpu, dace:gpu
+GT4PY_BACKEND_L=( gt:cpu_ifirst gt:cpu_kfirst dace:cpu )
+# list of number of threads (array must have the same length of GT4PY_BACKEND_L)
+# recommended values: 24 on Piz Daint, 128 on MLux
+PYTHON_NUM_THREADS_L=( 128 128 128 128 128 )
+
+echo "FORTRAN: start"
+LEN_FORTRAN_ENV_L=${#FORTRAN_ENV_L[@]}
+LEN_FORTRAN_VARIANT_L=${#FORTRAN_VARIANT_L[@]}
+
+for (( i=0; i<"$LEN_FORTRAN_ENV_L"; i++ )); do
+  ENV=${FORTRAN_ENV_L[$i]}
+  echo "  Env: $ENV: start"
+
+  for (( j=0; j<"$LEN_FORTRAN_VARIANT_L"; j++ )); do
+    VARIANT=${FORTRAN_VARIANT_L[$j]}
+    mkdir -p ../data/"$HOST"/"$ENV"
+    echo "    Variant: $VARIANT: start"
+    for NUM_COLS in "${NUM_COLS_L[@]}"; do
+      echo -n "      num_cols=$NUM_COLS: "
+      python run_fortran.py \
+        --build-dir=../../../../develop/build/"$ENV" \
+        --nproma="${NPROMA_L[$j]}" \
+        --num-runs=20 \
+        --num-threads="${FORTRAN_NUM_THREADS_L[$j]}" \
+        --output-csv-file=../data/"$HOST"/"$ENV"/performance.csv \
+        --host-alias="$HOST" \
+        --variant="$VARIANT" \
+        --num-cols="$NUM_COLS" || true
+    done
+    echo "    Variant: $FORTRAN_MODE: end"
+  done
+  echo "  Env: $ENV: end"
+done
+echo "FORTRAN: end"
+
+echo ""
+
+echo "Python: start"
+LEN_PYTHON_ENV_L=${#PYTHON_ENV_L[@]}
+LEN_GT4PY_BACKEND_L=${#GT4PY_BACKEND_L[@]}
+
+for (( i=0; i<"$LEN_PYTHON_ENV_L"; i++ )); do
+  ENV=${PYTHON_ENV_L[$i]}
+  echo "  Env: $ENV: start"
+  export GT_CACHE_ROOT=$PWD/../gt_cache/"$ENV"
+  mkdir -p ../data/"$HOST"/"$ENV"
+
+  for (( j=0; j<"$LEN_GT4PY_BACKEND_L"; j++ )); do
+    GT4PY_BACKEND=${GT4PY_BACKEND_L[$j]}
+    echo "    Backend: $GT4PY_BACKEND: start"
+
+    for NUM_COLS in "${NUM_COLS_L[@]}"; do
+      echo -n "      num_cols=$NUM_COLS: "
+      OMP_NUM_THREADS=${PYTHON_NUM_THREADS_L[$j]} \
+      CC=${CC_L[$i]} CXX=${CXX_L[$i]} CXXFLAGS=${CXXFLAGS_L[$i]} LFLAGS=${LFLAGS_L[$i]} CUDA_HOST_CXX=${CXX_L[$i]} \
+        python run_split.py \
+        --num-runs=20 \
+        --disable-checks \
+        --disable-validation \
+        --host-alias="$HOST" \
+        --backend="$GT4PY_BACKEND" \
+        --num-cols="$NUM_COLS" \
+        --output-csv-file=../data/"$HOST"/"$ENV"/performance_split.csv \
+        --output-csv-file-stencils=../data/"$HOST"/"$ENV"/performance_split_stencils.csv || true
+    done
+    echo "    Backend: $GT4PY_BACKEND: end"
+  done
+  echo "  Env: $ENV: end"
+done
+echo "Python: end"
diff --git a/src/cloudsc_python/drivers/run_fortran.py b/src/cloudsc_python/drivers/run_fortran.py
new file mode 100644
index 00000000..5f97543e
--- /dev/null
+++ b/src/cloudsc_python/drivers/run_fortran.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import click
+import os
+import subprocess
+from typing import Optional
+
+from config import FortranConfig, IOConfig, default_fortran_config, default_io_config
+from utils import print_performance, to_csv
+
+
+def core(config: FortranConfig, io_config: IOConfig) -> None:
+    executable = os.path.join(
+        os.path.dirname(__file__), config.build_dir, f"bin/dwarf-cloudsc-{config.variant}"
+    )
+    if not os.path.exists(executable):
+        raise RuntimeError(f"The executable `{executable}` does not exist.")
+
+    # warm-up cache
+    _ = subprocess.run(
+        [
+            executable,
+            str(config.num_threads),
+            str(config.num_cols),
+            str(min(config.num_cols, config.nproma)),
+        ],
+        capture_output=True,
+    )
+
+    # run and profile
+    runtimes = []
+    for _ in range(config.num_runs):
+        out = subprocess.run(
+            [
+                executable,
+                str(config.num_threads),
+                str(config.num_cols),
+                str(min(config.num_cols, config.nproma)),
+            ],
+            capture_output=True,
+        )
+        if "gpu" in config.variant:
+            x = out.stderr.decode("utf-8").split("\n")[2]
+            y = x.split(" ")
+            z = [c for c in y if c != ""]
+            runtimes.append(float(z[-4]))
+        else:
+            x = out.stderr.decode("utf-8").split("\n")[-2]
+            y = x.split(" ")
+            z = [c for c in y if c != ""]
+            runtimes.append(float(z[-4]))
+
+    runtime_mean, runtime_stddev = print_performance(runtimes)
+
+    if io_config.output_csv_file is not None:
+        to_csv(
+            io_config.output_csv_file,
+            io_config.host_name,
+            config.variant,
+            config.num_cols,
+            config.num_runs,
+            runtime_mean,
+            runtime_stddev,
+        )
+
+
+@click.command()
+@click.option(
+    "--build-dir",
+    type=str,
+    default="fortran",
+    help="Path to the build directory of the FORTRAN dwarf.",
+)
+@click.option(
+    "--variant",
+    type=str,
+    default="fortran",
+    help="Code variant."
+    "\n\nOptions: fortran, gpu-scc, gpu-scc-hoist, gpu-omp-scc-hoist."
+    "\n\nDefault: fortran.",
+)
+@click.option(
+    "--nproma",
+    type=int,
+    default=32,
+    help="Block size.\n\nRecommended values: 32 on CPUs, 128 on GPUs.\n\nDefault: 32.",
+)
+@click.option("--num-cols", type=int, default=1, help="Number of domain columns.\n\nDefault: 1.")
+@click.option(
+    "--num-runs",
+    type=int,
+    default=1,
+    help="Number of executions.\n\nDefault: 1.",
+)
+@click.option(
+    "--num-threads",
+    type=int,
+    default=1,
+    help="Number of threads."
+    "\n\nRecommended values: 24 on Piz Daint's CPUs, 128 on MLux's CPUs, 1 on GPUs."
+    "\n\nDefault: 1.",
+)
+@click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).")
+@click.option(
+    "--output-csv-file",
+    type=str,
+    default=None,
+    help="Path to the CSV file where writing performance counters (optional).",
+)
+def main(
+    build_dir: str,
+    variant: str,
+    nproma: int,
+    num_cols: int,
+    num_runs: int,
+    num_threads: int,
+    host_alias: Optional[str],
+    output_csv_file: Optional[str],
+) -> None:
+    """Driver for the FORTRAN implementation of CLOUDSC."""
+    config = (
+        default_fortran_config.with_build_dir(build_dir)
+        .with_variant(variant)
+        .with_nproma(nproma)
+        .with_num_cols(num_cols)
+        .with_num_runs(num_runs)
+        .with_num_threads(num_threads)
+    )
+    io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias)
+    core(config, io_config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/cloudsc_python/drivers/run_split.py b/src/cloudsc_python/drivers/run_split.py
new file mode 100644
index 00000000..188e01aa
--- /dev/null
+++ b/src/cloudsc_python/drivers/run_split.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import click
+import csv
+import datetime
+import os
+from typing import Optional
+
+from cloudsc4py.physics.cloudsc_split import Cloudsc
+
+from config import default_python_config, default_io_config
+from run import core
+
+
+@click.command()
+@click.option(
+    "--backend",
+    type=str,
+    default=None,
+    help="GT4Py backend."
+    "\n\nOptions: numpy, gt:cpu_kfirst, gt:cpu_ifirst, gt:gpu, cuda, dace:cpu, dace:gpu."
+    "\n\nDefault: numpy.",
+)
+@click.option(
+    "--enable-checks/--disable-checks",
+    is_flag=True,
+    type=bool,
+    default=False,
+    help="Enable/disable sanity checks performed by Sympl and GT4Py.\n\nDefault: enabled.",
+)
+@click.option(
+    "--enable-validation/--disable-validation",
+    is_flag=True,
+    type=bool,
+    default=True,
+    help="Enable/disable data validation.\n\nDefault: enabled.",
+)
+@click.option("--num-cols", type=int, default=None, help="Number of domain columns.\n\nDefault: 1.")
+@click.option(
+    "--num-runs",
+    type=int,
+    default=1,
+    help="Number of executions.\n\nDefault: 1.",
+)
+@click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).")
+@click.option(
+    "--output-csv-file",
+    type=str,
+    default=None,
+    help="Path to the CSV file where writing performance counters (optional).",
+)
+@click.option(
+    "--output-csv-file-stencils",
+    type=str,
+    default=None,
+    help="Path to the CSV file where writing performance counters for each stencil (optional).",
+)
+def main(
+    backend: Optional[str],
+    enable_checks: bool,
+    enable_validation: bool,
+    num_cols: Optional[int],
+    num_runs: Optional[int],
+    host_alias: Optional[str],
+    output_csv_file: Optional[str],
+    output_csv_file_stencils: Optional[str],
+) -> None:
+    """
+    Driver for the GT4Py-based implementation of CLOUDSC.
+
+    Computations are split into two stencils.
+    """
+    config = (
+        default_python_config.with_backend(backend)
+        .with_checks(enable_checks)
+        .with_validation(enable_validation)
+        .with_num_cols(num_cols)
+        .with_num_runs(num_runs)
+    )
+    io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias)
+    core(config, io_config, cloudsc_cls=Cloudsc)
+
+    if output_csv_file_stencils is not None:
+        cloudsc_tendencies_call_time = None
+        cloudsc_fluxes_call_time = None
+        for key, value in config.gt4py_config.exec_info.items():
+            if "tendencies" in key:
+                cloudsc_tendencies_call_time = value["total_call_time"] * 1000 / config.num_runs
+            elif "fluxes" in key:
+                cloudsc_fluxes_call_time = value["total_call_time"] * 1000 / config.num_runs
+
+        if not os.path.exists(output_csv_file_stencils):
+            with open(output_csv_file_stencils, "w") as f:
+                writer = csv.writer(f, delimiter=",")
+                writer.writerow(
+                    (
+                        "date",
+                        "host",
+                        "backend",
+                        "num_cols",
+                        "num_runs",
+                        "cloudsc_tendencies",
+                        "cloudsc_fluxes",
+                    )
+                )
+        with open(output_csv_file_stencils, "a") as f:
+            writer = csv.writer(f, delimiter=",")
+            writer.writerow(
+                (
+                    datetime.date.today().strftime("%Y%m%d"),
+                    io_config.host_name,
+                    config.gt4py_config.backend,
+                    config.num_cols,
+                    config.num_runs,
+                    cloudsc_tendencies_call_time,
+                    cloudsc_fluxes_call_time,
+                )
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/cloudsc_python/drivers/utils.py b/src/cloudsc_python/drivers/utils.py
new file mode 100644
index 00000000..96bef382
--- /dev/null
+++ b/src/cloudsc_python/drivers/utils.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+import csv
+import datetime
+import os
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Tuple
+
+
+def to_csv(
+    output_file: str,
+    host_name: str,
+    variant: str,
+    num_cols: int,
+    num_runs: int,
+    runtime_mean: float,
+    runtime_stddev: float,
+) -> None:
+    """Write mean and standard deviation of measured runtimes to a CSV file."""
+    if not os.path.exists(output_file):
+        with open(output_file, "w") as csv_file:
+            writer = csv.writer(csv_file, delimiter=",")
+            writer.writerow(("date", "host", "variant", "num_cols", "num_runs", "mean", "stddev"))
+    with open(output_file, "a") as csv_file:
+        writer = csv.writer(csv_file, delimiter=",")
+        writer.writerow(
+            (
+                datetime.date.today().strftime("%Y%m%d"),
+                host_name,
+                variant,
+                num_cols,
+                num_runs,
+                runtime_mean,
+                runtime_stddev,
+            )
+        )
+
+
+def print_performance(runtimes: list[float]) -> Tuple[float, float]:
+    """Print means and standard deviation of measure runtimes to screen."""
+    n = len(runtimes)
+    mean = sum(runtimes) / n
+    stddev = (sum((runtime - mean) ** 2 for runtime in runtimes) / (n - 1 if n > 1 else n)) ** 0.5
+    print(f"Performance: Average runtime over {n} runs: {mean:.3f} \u00B1 {stddev:.3f} ms.")
+    return mean, stddev
diff --git a/src/cloudsc_python/pyproject.toml b/src/cloudsc_python/pyproject.toml
new file mode 100644
index 00000000..b3413f4e
--- /dev/null
+++ b/src/cloudsc_python/pyproject.toml
@@ -0,0 +1,28 @@
+[build-system]
+requires = ['setuptools>=42', 'wheel']
+
+[tool.setuptools_scm]
+
+[tool.black]
+line-length = 100
+target-version = ['py37', 'py38', 'py39']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+
+  # The following are specific to Black, you probably don't want those.
+  | blib2to3
+  | tests/data
+  | profiling
+)/
+'''
\ No newline at end of file
diff --git a/src/cloudsc_python/requirements.txt b/src/cloudsc_python/requirements.txt
new file mode 100644
index 00000000..0e2878cc
--- /dev/null
+++ b/src/cloudsc_python/requirements.txt
@@ -0,0 +1,8 @@
+click
+gt4py[dace]@git+https://github.com/gridtools/gt4py.git#egg=gt4py
+h5py
+numpy
+pandas
+pydantic
+sympl@git+https://github.com/stubbiali/sympl.git@oop#egg=sympl
+xarray
diff --git a/src/cloudsc_python/requirements_dev.txt b/src/cloudsc_python/requirements_dev.txt
new file mode 100644
index 00000000..3876e777
--- /dev/null
+++ b/src/cloudsc_python/requirements_dev.txt
@@ -0,0 +1,8 @@
+black >= 22.6.0
+flake8
+ipdb
+ipython
+matplotlib
+mypy
+pre-commit
+pytest
diff --git a/src/cloudsc_python/setup.cfg b/src/cloudsc_python/setup.cfg
new file mode 100644
index 00000000..cdaac6f9
--- /dev/null
+++ b/src/cloudsc_python/setup.cfg
@@ -0,0 +1,61 @@
+[metadata]
+name = cloudsc4py
+description = GT4Py-based implementation of the CLOUDSC dwarf
+author = ETH Zurich, ECMWF
+author_email = subbiali@phys.ethz.ch, michael.lange@ecmwf.int
+license = Apache-2.0
+license_file = ../../LICENSE
+;long_description = file: ../../README.md
+;long_description_content_type = text/markdown
+project_urls =
+    Source = https://github.com/ecmwf-ifs/dwarf-p-cloudsc
+platforms = Linux, Mac
+classifiers =
+    Development Status :: 3 - Alpha
+    Intended Audience:: Science / Research
+    License :: OSI Approved:: Apache License, Version 2.0
+    Natural Language :: English
+    Operating System :: POSIX
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Topic :: Scientific/Engineering :: Atmospheric Science
+
+[options]
+zip_safe = False
+packages = find:
+include_package_data = True
+python_requires = >= 3.7
+package_dir =
+    =src
+install_requires =
+    click
+    gt4py[dace] @ git+https://github.com/GridTools/gt4py.git@master#egg=gt4py
+    h5py
+    numpy
+    pandas
+    pydantic
+    sympl @ git+https://github.com/stubbiali/sympl.git@oop#egg=sympl
+    xarray
+
+[options.packages.find]
+where = src
+exclude =
+    data
+    drivers
+    scripts
+    tests
+
+;[tool:pytest]
+;testpaths = tests
+
+;[build_sphinx]
+;source-dir = docs/source
+;build-dir = docs/build
+;builder = html latexpdf
+
+[flake8]
+exclude =
+    .eggs
+    .git
+max-line-length = 100
diff --git a/src/cloudsc_python/setup.py b/src/cloudsc_python/setup.py
new file mode 100644
index 00000000..40c9b8da
--- /dev/null
+++ b/src/cloudsc_python/setup.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from setuptools import setup
+import sys
+
+
+if sys.version_info.major < 3:
+    print("Python 3.x is required.")
+    sys.exit(1)
+
+
+setup(use_scm_version=False)
diff --git a/src/cloudsc_python/src/cloudsc4py/__init__.py b/src/cloudsc_python/src/cloudsc4py/__init__.py
new file mode 100644
index 00000000..0f97d401
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/__init__.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import os
+
+import gt4py.config as gt_config
+
+import cloudsc4py.physics
+
+
+# customize compilation/linking of GT4Py generated code
+cxxflags = os.environ.get("CXXFLAGS", "")
+if cxxflags != "":
+    gt_config.build_settings["extra_compile_args"]["cxx"] += cxxflags.split(" ")
+
+lflags = os.environ.get("LFLAGS", "")
+if lflags != "":
+    gt_config.build_settings["extra_link_args"] += lflags.split(" ")
diff --git a/src/cloudsc_python/src/cloudsc4py/framework/__init__.py b/src/cloudsc_python/src/cloudsc4py/framework/__init__.py
new file mode 100644
index 00000000..95e3c8ad
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/framework/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/cloudsc_python/src/cloudsc4py/framework/components.py b/src/cloudsc_python/src/cloudsc4py/framework/components.py
new file mode 100644
index 00000000..c3db2b57
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/framework/components.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from abc import abstractmethod
+from functools import cached_property
+from typing import Optional, TYPE_CHECKING
+
+from sympl._core.core_components import (
+    DiagnosticComponent as SymplDiagnosticComponent,
+    ImplicitTendencyComponent as SymplImplicitTendencyComponent,
+)
+
+from cloudsc4py.framework.config import GT4PyConfig
+from cloudsc4py.framework.stencil import compile_stencil
+from cloudsc4py.framework.storage import get_data_shape_from_name, get_dtype_from_name, zeros
+
+if TYPE_CHECKING:
+    from typing import Any, Dict
+
+    from gt4py import StencilObject
+    from gt4py.storage import Storage
+    from sympl._core.typingx import PropertyDict
+
+    from cloudsc4py.framework.grid import ComputationalGrid
+
+
+class ComputationalGridComponent:
+    """Model component defined over a computational grid."""
+
+    def __init__(self, computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig) -> None:
+        self.computational_grid = computational_grid
+        self.gt4py_config = gt4py_config
+
+    def compile_stencil(
+        self, name: str, externals: Optional[Dict[str, Any]] = None
+    ) -> StencilObject:
+        return compile_stencil(name, self.gt4py_config, externals)
+
+    def fill_properties_with_dims(self, properties: PropertyDict) -> PropertyDict:
+        for field_name, field_prop in properties.items():
+            field_prop["dims"] = self.computational_grid.grids[field_prop["grid"]].dims
+        return properties
+
+    def allocate(self, name: str, properties: PropertyDict) -> Storage:
+        data_shape = get_data_shape_from_name(name)
+        dtype = get_dtype_from_name(name)
+        return zeros(
+            self.computational_grid,
+            properties[name]["grid"],
+            data_shape,
+            gt4py_config=self.gt4py_config,
+            dtype=dtype,
+        )
+
+
+class DiagnosticComponent(ComputationalGridComponent, SymplDiagnosticComponent):
+    """Grid-aware variant of Sympl's ``DiagnosticComponent``."""
+
+    def __init__(
+        self,
+        computational_grid: ComputationalGrid,
+        *,
+        enable_checks: bool = True,
+        gt4py_config: GT4PyConfig,
+    ) -> None:
+        super().__init__(computational_grid, gt4py_config=gt4py_config)
+        super(ComputationalGridComponent, self).__init__(enable_checks=enable_checks)
+
+    @cached_property
+    def input_properties(self) -> PropertyDict:
+        return self.fill_properties_with_dims(self._input_properties)
+
+    @abstractmethod
+    @cached_property
+    def _input_properties(self) -> PropertyDict:
+        """
+        Dictionary where each key is the name of an input field, and the corresponding value is a
+        dictionary specifying the units for that field ('units') and the identifier of the grid over
+        which it is defined ('grid').
+        """
+        ...
+
+    def allocate_diagnostic(self, name: str) -> Storage:
+        return self.allocate(name, self.diagnostic_properties)
+
+    @cached_property
+    def diagnostic_properties(self) -> PropertyDict:
+        return self.fill_properties_with_dims(self._diagnostic_properties)
+
+    @abstractmethod
+    @cached_property
+    def _diagnostic_properties(self) -> PropertyDict:
+        """
+        Dictionary where each key is the name of a field diagnosed by the component, and the
+        corresponding value is a dictionary specifying the units for that field ('units') and the
+        identifier of the grid over which it is defined ('grid').
+        """
+        ...
+
+
+class ImplicitTendencyComponent(ComputationalGridComponent, SymplImplicitTendencyComponent):
+    """Grid-aware variant of Sympl's ``ImplicitTendencyComponent``."""
+
+    def __init__(
+        self,
+        computational_grid: ComputationalGrid,
+        *,
+        enable_checks: bool = True,
+        gt4py_config: GT4PyConfig,
+    ) -> None:
+        super().__init__(computational_grid, gt4py_config=gt4py_config)
+        super(ComputationalGridComponent, self).__init__(enable_checks=enable_checks)
+
+    @cached_property
+    def input_properties(self) -> PropertyDict:
+        return self.fill_properties_with_dims(self._input_properties)
+
+    @abstractmethod
+    @cached_property
+    def _input_properties(self) -> PropertyDict:
+        """
+        Dictionary where each key is the name of an input field, and the corresponding value is a
+        dictionary specifying the units for that field ('units') and the identifier of the grid over
+        which it is defined ('grid').
+        """
+        ...
+
+    def allocate_tendency(self, name: str) -> Storage:
+        return self.allocate(name, self.tendency_properties)
+
+    @cached_property
+    def tendency_properties(self) -> PropertyDict:
+        return self.fill_properties_with_dims(self._tendency_properties)
+
+    @abstractmethod
+    @cached_property
+    def _tendency_properties(self) -> PropertyDict:
+        """
+        Dictionary where each key is the name of a tendency field computed by the component, and the
+        corresponding value is a dictionary specifying the units for that field ('units') and the
+        identifier of the grid over which it is defined ('grid').
+        """
+        ...
+
+    def allocate_diagnostic(self, name: str) -> Storage:
+        return self.allocate(name, self.diagnostic_properties)
+
+    @cached_property
+    def diagnostic_properties(self) -> PropertyDict:
+        return self.fill_properties_with_dims(self._diagnostic_properties)
+
+    @abstractmethod
+    @cached_property
+    def _diagnostic_properties(self) -> PropertyDict:
+        """
+        Dictionary where each key is the name of a field diagnosed by the component, and the
+        corresponding value is a dictionary specifying the units for that field ('units') and the
+        identifier of the grid over which it is defined ('grid').
+        """
+        ...
diff --git a/src/cloudsc_python/src/cloudsc4py/framework/config.py b/src/cloudsc_python/src/cloudsc4py/framework/config.py
new file mode 100644
index 00000000..8c63fe50
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/framework/config.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from pydantic import BaseModel, validator
+from typing import Any, Dict, Optional, Union, Type
+
+
+class DataTypes(BaseModel):
+    """Specify the datatypes for bool, float and integer fields."""
+
+    bool: Type
+    float: Type
+    int: Type
+
+
+class GT4PyConfig(BaseModel):
+    """Gather options controlling the compilation and execution of the code generated by GT4Py."""
+
+    backend: str
+    backend_opts: Dict[str, Any] = {}
+    build_info: Optional[Dict[str, Any]] = None
+    device_sync: bool = True
+    dtypes: DataTypes = DataTypes(bool=bool, float=float, int=int)
+    exec_info: Optional[Dict[str, Any]] = None
+    managed: Union[bool, str] = "gt4py"
+    rebuild: bool = False
+    validate_args: bool = False
+    verbose: bool = True
+
+    @validator("exec_info")
+    @classmethod
+    def set_exec_info(cls, v: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+        v = v or {}
+        return {**v, "__aggregate_data": True}
+
+    def reset_exec_info(self):
+        self.exec_info = {"__aggregate_data": self.exec_info.get("__aggregate_data", True)}
+
+    def with_backend(self, backend: Optional[str]) -> GT4PyConfig:
+        args = self.dict()
+        if backend is not None:
+            args["backend"] = backend
+        return GT4PyConfig(**args)
+
+    def with_dtypes(self, dtypes: DataTypes) -> GT4PyConfig:
+        args = self.dict()
+        args["dtypes"] = dtypes
+        return GT4PyConfig(**args)
+
+    def with_validate_args(self, flag: bool) -> GT4PyConfig:
+        args = self.dict()
+        args["validate_args"] = flag
+        return GT4PyConfig(**args)
diff --git a/src/cloudsc_python/src/cloudsc4py/framework/grid.py b/src/cloudsc_python/src/cloudsc4py/framework/grid.py
new file mode 100644
index 00000000..1503eb18
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/framework/grid.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from functools import cached_property
+import numpy as np
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Dict, Tuple
+
+
+class DimSymbol:
+    """Symbol identifying a dimension, e.g. I or I-1/2."""
+
+    _instances: Dict[int, DimSymbol] = {}
+
+    name: str
+    offset: float
+
+    def __new__(cls, *args) -> DimSymbol:
+        key = hash(args)
+        if key not in cls._instances:
+            cls._instances[key] = super().__new__(cls)
+        return cls._instances[key]
+
+    def __init__(self, name: str, offset: float) -> None:
+        self.name = name
+        self.offset = offset
+
+    def __add__(self, other: float) -> DimSymbol:
+        return DimSymbol(self.name, self.offset + other)
+
+    def __sub__(self, other: float) -> DimSymbol:
+        return self + (-other)
+
+    def __repr__(self) -> str:
+        if self.offset > 0:
+            return f"{self.name} + {self.offset}"
+        elif self.offset < 0:
+            return f"{self.name} - {-self.offset}"
+        else:
+            return f"{self.name}"
+
+
+I = DimSymbol("I", 0)
+J = DimSymbol("J", 0)
+K = DimSymbol("K", 0)
+
+
+class Grid:
+    """Grid of points."""
+
+    def __init__(
+        self, shape: Tuple[int, ...], dims: Tuple[str, ...], storage_shape: Tuple[int, ...] = None
+    ) -> None:
+        assert len(shape) == len(dims)
+        self.shape = shape
+        self.dims = dims
+        self.storage_shape = storage_shape or self.shape
+
+    @cached_property
+    def coords(self) -> Tuple[np.ndarray, ...]:
+        return tuple(np.arange(size) for size in self.storage_shape)
+
+
+class ComputationalGrid:
+    """A three-dimensional computational grid consisting of mass and staggered grid points."""
+
+    grids: Dict[Tuple[DimSymbol, ...], Grid]
+
+    def __init__(self, nx: int, ny: int, nz: int) -> None:
+        self.grids = {
+            (I, J, K): Grid((nx, ny, nz), ("x", "y", "z"), (nx, ny, nz + 1)),
+            (I, J, K - 1 / 2): Grid((nx, ny, nz + 1), ("x", "y", "z_h")),
+            (I, J): Grid((nx, ny), ("x", "y")),
+            (K,): Grid((nz,), ("z",), (nz + 1,)),
+        }
diff --git a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py
new file mode 100644
index 00000000..dc06f146
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
+from gt4py import gtscript
+
+if TYPE_CHECKING:
+    from typing import Any, Dict
+
+    from gt4py import StencilObject
+
+    from cloudsc4py.framework.config import GT4PyConfig
+
+
+FUNCTION_COLLECTION = {}
+STENCIL_COLLECTION = {}
+
+
+def function_collection(name: str):
+    """Decorator for GT4Py functions."""
+    if name in FUNCTION_COLLECTION:
+        raise RuntimeError(f"Another function called `{name}` found.")
+
+    def core(definition):
+        FUNCTION_COLLECTION[name] = {"definition": definition}
+        return definition
+
+    return core
+
+
+def stencil_collection(name: str):
+    """Decorator for GT4Py stencil definitions."""
+    if name in STENCIL_COLLECTION:
+        raise RuntimeError(f"Another stencil called `{name}` found.")
+
+    def core(definition):
+        STENCIL_COLLECTION[name] = {"definition": definition}
+        return definition
+
+    return core
+
+
+def compile_stencil(
+    name: str,
+    gt4py_config: GT4PyConfig,
+    externals: Dict[str, Any] = None,
+) -> StencilObject:
+    """Automate and customize the compilation of GT4Py stencils."""
+    stencil_info = STENCIL_COLLECTION.get(name, None)
+    if stencil_info is None:
+        raise RuntimeError(f"Unknown stencil `{name}`.")
+    definition = stencil_info["definition"]
+
+    dtypes = gt4py_config.dtypes.dict()
+    externals = externals or {}
+
+    kwargs = gt4py_config.backend_opts.copy()
+    if gt4py_config.backend not in ("debug", "numpy", "gtc:numpy"):
+        kwargs["verbose"] = gt4py_config.verbose
+
+    return gtscript.stencil(
+        gt4py_config.backend,
+        definition,
+        name=name,
+        build_info=gt4py_config.build_info,
+        dtypes=dtypes,
+        externals=externals,
+        rebuild=gt4py_config.rebuild,
+        **kwargs,
+    )
diff --git a/src/cloudsc_python/src/cloudsc4py/framework/storage.py b/src/cloudsc_python/src/cloudsc4py/framework/storage.py
new file mode 100644
index 00000000..2887b5af
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/framework/storage.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from contextlib import contextmanager
+import numpy as np
+from typing import TYPE_CHECKING
+
+import gt4py
+from sympl._core.data_array import DataArray
+
+if TYPE_CHECKING:
+    from typing import Dict, List, Literal, Optional, Tuple
+
+    from cloudsc4py.framework.config import GT4PyConfig
+    from cloudsc4py.framework.grid import ComputationalGrid, DimSymbol
+    from cloudsc4py.utils.typingx import Storage
+
+
+def zeros(
+    computational_grid: ComputationalGrid,
+    grid_id: Tuple[DimSymbol, ...],
+    data_shape: Optional[Tuple[int, ...]] = None,
+    *,
+    gt4py_config: GT4PyConfig,
+    dtype: Literal["bool", "float", "int"],
+) -> Storage:
+    """
+    Create an array defined over the grid ``grid_id`` of ``computational_grid``
+    and fill it with zeros.
+
+    Relying on GT4Py utilities to optimally allocate memory based on the chosen backend.
+    """
+    grid = computational_grid.grids[grid_id]
+    data_shape = data_shape or ()
+    shape = grid.storage_shape + data_shape
+    dtype = gt4py_config.dtypes.dict()[dtype]
+    return gt4py.storage.zeros(shape, dtype, backend=gt4py_config.backend)
+
+
+def get_data_array(
+    buffer: Storage,
+    computational_grid: ComputationalGrid,
+    grid_id: Tuple[DimSymbol, ...],
+    units: str,
+    data_dims: Optional[Tuple[str, ...]] = None,
+) -> DataArray:
+    """Create a ``DataArray`` out of ``buffer``."""
+    grid = computational_grid.grids[grid_id]
+    data_dims = data_dims or ()
+    dims = grid.dims + data_dims
+    coords = grid.coords + tuple(
+        np.arange(data_size) for data_size in buffer.shape[len(grid.dims) :]
+    )
+    return DataArray(buffer, dims=dims, coords=coords, attrs={"units": units})
+
+
+def allocate_data_array(
+    computational_grid: ComputationalGrid,
+    grid_id: Tuple[DimSymbol, ...],
+    units: str,
+    data_shape: Optional[Tuple[int, ...]] = None,
+    data_dims: Optional[Tuple[str, ...]] = None,
+    *,
+    gt4py_config: GT4PyConfig,
+    dtype: Literal["bool", "float", "int"],
+) -> DataArray:
+    """
+    Create a ``DataArray`` defined over the grid ``grid_id`` of ``computational_grid``
+    and fill it with zeros.
+    """
+    buffer = zeros(
+        computational_grid, grid_id, data_shape=data_shape, gt4py_config=gt4py_config, dtype=dtype
+    )
+    return get_data_array(buffer, computational_grid, grid_id, units, data_dims=data_dims)
+
+
+def get_dtype_from_name(field_name: str) -> str:
+    """
+    Retrieve the datatype of a field from its name.
+
+    Assume that the name of a bool field is of the form 'b_{some_name}',
+    the name of a float field is of the form 'f_{some_name}',
+    and the name of an integer field is of the form 'i_{some_name}'.
+    """
+    if field_name.startswith("b"):
+        return "bool"
+    elif field_name.startswith("f"):
+        return "float"
+    elif field_name.startswith("i"):
+        return "int"
+    else:
+        raise RuntimeError(f"Cannot retrieve dtype for field `{field_name}`.")
+
+
+def get_data_shape_from_name(field_name: str) -> Tuple[int, ...]:
+    """
+    Retrieve the data dimension of a field from its name.
+
+    Assume that the name of an n-dimensional field, with n > 1, is '{some_name}_n'.
+    """
+    data_dims = field_name.split("_", maxsplit=1)[0][1:]
+    out = tuple(int(c) for c in data_dims)
+    return out
+
+
+TEMPORARY_STORAGE_POOL: Dict[int, List[Storage]] = {}
+
+
+@contextmanager
+def managed_temporary_storage(
+    computational_grid: ComputationalGrid,
+    *args: Tuple[Tuple[DimSymbol, ...], Literal["bool", "float", "int"]],
+    gt4py_config: GT4PyConfig,
+):
+    """
+    Get temporary storages defined over the grids of ``computational_grid``.
+
+    Each ``arg`` is a tuple where the first element specifies the grid identifier, and the second
+    element specifies the datatype.
+
+    The storages are either created on-the-fly, or retrieved from ``TEMPORARY_STORAGE_POOL``
+    if available. On exit, all storages are included in ``TEMPORARY_STORAGE_POOL`` for later use.
+    """
+    grid_hashes = []
+    storages = []
+    for grid_id, dtype in args:
+        grid = computational_grid.grids[grid_id]
+        grid_hash = hash((grid.shape + grid_id, dtype))
+        pool = TEMPORARY_STORAGE_POOL.setdefault(grid_hash, [])
+        if len(pool) > 0:
+            storage = pool.pop()
+        else:
+            storage = zeros(computational_grid, grid_id, gt4py_config=gt4py_config, dtype=dtype)
+        grid_hashes.append(grid_hash)
+        storages.append(storage)
+
+    try:
+        if len(storages) == 1:
+            yield storages[0]
+        else:
+            yield storages
+    finally:
+        for grid_hash, storage in zip(grid_hashes, storages):
+            TEMPORARY_STORAGE_POOL[grid_hash].append(storage)
+
+
+@contextmanager
+def managed_temporary_storage_pool():
+    """
+    Clear the pool of temporary storages ``TEMPORARY_STORAGE_POOL`` on entry and exit.
+
+    Useful when running multiple simulations using different backends within the same session.
+    All simulations using the same backend should be wrapped by this context manager.
+    """
+    try:
+        TEMPORARY_STORAGE_POOL.clear()
+        yield None
+    finally:
+        for grid_hash, storages in TEMPORARY_STORAGE_POOL.items():
+            num_storages = len(storages)
+            for _ in range(num_storages):
+                storage = storages.pop()
+                del storage
+        TEMPORARY_STORAGE_POOL.clear()
diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/__init__.py b/src/cloudsc_python/src/cloudsc4py/initialization/__init__.py
new file mode 100644
index 00000000..95e3c8ad
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/initialization/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/reference.py b/src/cloudsc_python/src/cloudsc4py/initialization/reference.py
new file mode 100644
index 00000000..e2c5804a
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/initialization/reference.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from datetime import datetime
+from functools import partial
+from typing import TYPE_CHECKING
+
+from cloudsc4py.framework.grid import I, J, K
+from cloudsc4py.framework.storage import allocate_data_array
+from cloudsc4py.initialization.utils import initialize_field
+
+if TYPE_CHECKING:
+    from typing import Literal, Tuple
+
+    from sympl._core.data_array import DataArray
+    from sympl._core.typingx import DataArrayDict
+
+    from cloudsc4py.framework.config import GT4PyConfig
+    from cloudsc4py.framework.grid import ComputationalGrid, DimSymbol
+    from cloudsc4py.utils.iox import HDF5Reader
+
+
+def allocate_tendencies(
+    computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig
+) -> DataArrayDict:
+    def allocate(units: str = "") -> DataArray:
+        return allocate_data_array(
+            computational_grid, (I, J, K), units, gt4py_config=gt4py_config, dtype="float"
+        )
+
+    return {
+        "time": datetime(year=2022, month=1, day=1),
+        "f_a": allocate(),
+        "f_qi": allocate(),
+        "f_ql": allocate(),
+        "f_qr": allocate(),
+        "f_qs": allocate(),
+        "f_qv": allocate(),
+        "f_t": allocate(),
+    }
+
+
+def initialize_tendencies(tendencies: DataArrayDict, hdf5_reader: HDF5Reader) -> None:
+    hdf5_reader_keys = {"f_a": "TENDENCY_LOC_A", "f_qv": "TENDENCY_LOC_Q", "f_t": "TENDENCY_LOC_T"}
+    for name, hdf5_reader_key in hdf5_reader_keys.items():
+        buffer = hdf5_reader.get_field(hdf5_reader_key)
+        initialize_field(tendencies[name], buffer)
+
+    cld = hdf5_reader.get_field("TENDENCY_LOC_CLD")
+    for idx, name in enumerate(("f_ql", "f_qi", "f_qr", "f_qs")):
+        initialize_field(tendencies[name], cld[..., idx])
+
+
+def allocate_diagnostics(
+    computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig
+) -> DataArrayDict:
+    def _allocate(
+        grid_id: Tuple[DimSymbol, ...], units: str, dtype: Literal["bool", "float", "int"]
+    ) -> DataArray:
+        return allocate_data_array(
+            computational_grid, grid_id, units, gt4py_config=gt4py_config, dtype=dtype
+        )
+
+    allocate = partial(_allocate, grid_id=(I, J, K), units="", dtype="float")
+    allocate_h = partial(_allocate, grid_id=(I, J, K - 1 / 2), units="", dtype="float")
+    allocate_ij = partial(_allocate, grid_id=(I, J), units="", dtype="float")
+
+    return {
+        "time": datetime(year=2022, month=1, day=1),
+        "f_covptot": allocate(),
+        "f_fcqlng": allocate_h(),
+        "f_fcqnng": allocate_h(),
+        "f_fcqrng": allocate_h(),
+        "f_fcqsng": allocate_h(),
+        "f_fhpsl": allocate_h(),
+        "f_fhpsn": allocate_h(),
+        "f_fplsl": allocate_h(),
+        "f_fplsn": allocate_h(),
+        "f_fsqif": allocate_h(),
+        "f_fsqitur": allocate_h(),
+        "f_fsqlf": allocate_h(),
+        "f_fsqltur": allocate_h(),
+        "f_fsqrf": allocate_h(),
+        "f_fsqsf": allocate_h(),
+        "f_rainfrac_toprfz": allocate_ij(),
+    }
+
+
+def initialize_diagnostics(diagnostics: DataArrayDict, hdf5_reader: HDF5Reader) -> None:
+    hdf5_reader_keys = {name: "P" + name[2:].upper() for name in diagnostics if name != "time"}
+    for name, hdf5_reader_key in hdf5_reader_keys.items():
+        buffer = hdf5_reader.get_field(hdf5_reader_key)
+        initialize_field(diagnostics[name], buffer)
+
+
+def get_reference_tendencies(
+    computational_grid: ComputationalGrid, hdf5_reader: HDF5Reader, *, gt4py_config: GT4PyConfig
+) -> DataArrayDict:
+    tendencies = allocate_tendencies(computational_grid, gt4py_config=gt4py_config)
+    initialize_tendencies(tendencies, hdf5_reader)
+    return tendencies
+
+
+def get_reference_diagnostics(
+    computational_grid: ComputationalGrid, hdf5_reader: HDF5Reader, *, gt4py_config: GT4PyConfig
+) -> DataArrayDict:
+    diagnostics = allocate_diagnostics(computational_grid, gt4py_config=gt4py_config)
+    initialize_diagnostics(diagnostics, hdf5_reader)
+    return diagnostics
diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/state.py b/src/cloudsc_python/src/cloudsc4py/initialization/state.py
new file mode 100644
index 00000000..0e743e11
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/initialization/state.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from datetime import datetime
+from functools import partial
+from typing import TYPE_CHECKING
+
+from cloudsc4py.framework.grid import I, J, K
+from cloudsc4py.framework.storage import allocate_data_array
+from cloudsc4py.initialization.utils import initialize_field
+
+if TYPE_CHECKING:
+    from typing import Literal, Tuple
+
+    from sympl._core.data_array import DataArray
+    from sympl._core.typingx import DataArrayDict
+
+    from cloudsc4py.framework.config import GT4PyConfig
+    from cloudsc4py.framework.grid import ComputationalGrid, DimSymbol
+    from cloudsc4py.utils.iox import HDF5Reader
+
+
+def allocate_state(
+    computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig
+) -> DataArrayDict:
+    def _allocate(
+        grid_id: Tuple[DimSymbol, ...], units: str, dtype: Literal["bool", "float", "int"]
+    ) -> DataArray:
+        return allocate_data_array(
+            computational_grid, grid_id, units, gt4py_config=gt4py_config, dtype=dtype
+        )
+
+    allocate_b_ij = partial(_allocate, grid_id=(I, J), units="", dtype="bool")
+    allocate_f = partial(_allocate, grid_id=(I, J, K), units="", dtype="float")
+    allocate_f_h = partial(_allocate, grid_id=(I, J, K - 1 / 2), units="", dtype="float")
+    allocate_f_ij = partial(_allocate, grid_id=(I, J), units="", dtype="float")
+    allocate_i_ij = partial(_allocate, grid_id=(I, J), units="", dtype="int")
+
+    return {
+        "time": datetime(year=2022, month=1, day=1),
+        "b_convection_on": allocate_b_ij(),
+        "f_a": allocate_f(),
+        "f_ap": allocate_f(),
+        "f_aph": allocate_f_h(),
+        "f_ccn": allocate_f(),
+        "f_dyni": allocate_f(),
+        "f_dynl": allocate_f(),
+        "f_hrlw": allocate_f(),
+        "f_hrsw": allocate_f(),
+        "f_icrit_aer": allocate_f(),
+        "f_lcrit_aer": allocate_f(),
+        "f_lsm": allocate_f_ij(),
+        "f_lu": allocate_f(),
+        "f_lude": allocate_f(),
+        "f_mfd": allocate_f(),
+        "f_mfu": allocate_f(),
+        "f_nice": allocate_f(),
+        "f_qi": allocate_f(),
+        "f_ql": allocate_f(),
+        "f_qr": allocate_f(),
+        "f_qs": allocate_f(),
+        "f_qv": allocate_f(),
+        "f_re_ice": allocate_f(),
+        "f_snde": allocate_f(),
+        "f_supsat": allocate_f(),
+        "f_t": allocate_f(),
+        "f_tnd_tmp_a": allocate_f(),
+        "f_tnd_tmp_qi": allocate_f(),
+        "f_tnd_tmp_ql": allocate_f(),
+        "f_tnd_tmp_qr": allocate_f(),
+        "f_tnd_tmp_qs": allocate_f(),
+        "f_tnd_tmp_qv": allocate_f(),
+        "f_tnd_tmp_t": allocate_f(),
+        "f_vfa": allocate_f(),
+        "f_vfi": allocate_f(),
+        "f_vfl": allocate_f(),
+        "f_w": allocate_f(),
+        "i_convection_type": allocate_i_ij(),
+    }
+
+
+def initialize_state(state: DataArrayDict, hdf5_reader: HDF5Reader) -> None:
+    hdf5_reader_keys = {
+        "b_convection_on": "LDCUM",
+        "f_a": "PA",
+        "f_ap": "PAP",
+        "f_aph": "PAPH",
+        "f_ccn": "PCCN",
+        "f_dyni": "PDYNI",
+        "f_dynl": "PDYNL",
+        "f_hrlw": "PHRLW",
+        "f_hrsw": "PHRSW",
+        "f_icrit_aer": "PICRIT_AER",
+        "f_lcrit_aer": "PLCRIT_AER",
+        "f_lsm": "PLSM",
+        "f_lu": "PLU",
+        "f_lude": "PLUDE",
+        "f_mfd": "PMFD",
+        "f_mfu": "PMFU",
+        "f_nice": "PNICE",
+        "f_qv": "PQ",
+        "f_re_ice": "PRE_ICE",
+        "f_snde": "PSNDE",
+        "f_supsat": "PSUPSAT",
+        "f_t": "PT",
+        "f_tnd_tmp_a": "TENDENCY_TMP_A",
+        "f_tnd_tmp_qv": "TENDENCY_TMP_Q",
+        "f_tnd_tmp_t": "TENDENCY_TMP_T",
+        "f_vfa": "PVFA",
+        "f_vfi": "PVFI",
+        "f_vfl": "PVFL",
+        "f_w": "PVERVEL",
+        "i_convection_type": "KTYPE",
+    }
+    for name, hdf5_reader_key in hdf5_reader_keys.items():
+        buffer = hdf5_reader.get_field(hdf5_reader_key)
+        initialize_field(state[name], buffer)
+
+    clv = hdf5_reader.get_field("PCLV")
+    for idx, name in enumerate(("f_ql", "f_qi", "f_qr", "f_qs")):
+        initialize_field(state[name], clv[..., idx])
+
+    tnd_tmp_cld = hdf5_reader.get_field("TENDENCY_TMP_CLD")
+    for idx, name in enumerate(("f_tnd_tmp_ql", "f_tnd_tmp_qi", "f_tnd_tmp_qr", "f_tnd_tmp_qs")):
+        initialize_field(state[name], tnd_tmp_cld[..., idx])
+
+
+def get_state(
+    computational_grid: ComputationalGrid, hdf5_reader: HDF5Reader, *, gt4py_config: GT4PyConfig
+) -> DataArrayDict:
+    state = allocate_state(computational_grid, gt4py_config=gt4py_config)
+    initialize_state(state, hdf5_reader)
+    return state
diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/utils.py b/src/cloudsc_python/src/cloudsc4py/initialization/utils.py
new file mode 100644
index 00000000..f0fef9c0
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/initialization/utils.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+import numpy as np
+from typing import TYPE_CHECKING
+
+from cloudsc4py.utils.numpyx import assign
+
+if TYPE_CHECKING:
+    from sympl._core.data_array import DataArray
+
+    from cloudsc4py.utils.typingx import Storage
+
+
+def initialize_storage_2d(storage: Storage, buffer: np.ndarray) -> None:
+    ni = storage.shape[0]
+    mi = buffer.size
+    nb = ni // mi
+    for b in range(nb):
+        assign(storage[b * mi : (b + 1) * mi, 0:1], buffer[:, np.newaxis])
+    assign(storage[nb * mi :, 0:1], buffer[: ni - nb * mi, np.newaxis])
+
+
+def initialize_storage_3d(storage: Storage, buffer: np.ndarray) -> None:
+    ni, _, nk = storage.shape
+    mi, mk = buffer.shape
+    lk = min(nk, mk)
+    nb = ni // mi
+    for b in range(nb):
+        assign(storage[b * mi : (b + 1) * mi, 0:1, :lk], buffer[:, np.newaxis, :lk])
+    assign(storage[nb * mi :, 0:1, :lk], buffer[: ni - nb * mi, np.newaxis, :lk])
+
+
+def initialize_field(field: DataArray, buffer: np.ndarray) -> None:
+    if field.ndim == 2:
+        initialize_storage_2d(field.data, buffer)
+    elif field.ndim == 3:
+        initialize_storage_3d(field.data, buffer)
+    else:
+        raise ValueError("The field to initialize must be either 2-d or 3-d.")
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/__init__.py b/src/cloudsc_python/src/cloudsc4py/physics/__init__.py
new file mode 100644
index 00000000..7a356af6
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/__init__.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import cloudsc4py.physics._stencils
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py
new file mode 100644
index 00000000..03f5582b
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import cloudsc4py.physics._stencils.cloudsc
+import cloudsc4py.physics._stencils.cloudsc_split
+import cloudsc4py.physics._stencils.cuadjtq
+import cloudsc4py.physics._stencils.fccld
+import cloudsc4py.physics._stencils.fcttre
+import cloudsc4py.physics._stencils.helpers
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py
new file mode 100644
index 00000000..ecb4f48d
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py
@@ -0,0 +1,2186 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+
+from gt4py.gtscript import Field, IJ, K
+
+from cloudsc4py.framework.stencil import stencil_collection
+from cloudsc4py.physics._stencils.cuadjtq import f_cuadjtq
+from cloudsc4py.physics._stencils.fccld import f_fokoop
+from cloudsc4py.physics._stencils.fcttre import (
+    f_foealfa,
+    f_foedelta,
+    f_foedem,
+    f_foeeice,
+    f_foeeliq,
+    f_foeewm,
+    f_foeldcpm,
+)
+from cloudsc4py.physics._stencils.helpers import f_helper_0, f_helper_1
+
+
+@stencil_collection("cloudsc")
+def cloudsc(
+    in_a: Field["float"],
+    in_ap: Field["float"],
+    in_aph: Field["float"],  # staggered
+    in_ccn: Field["float"],
+    in_convection_on: Field[IJ, "bool"],
+    in_convection_type: Field[IJ, "int"],
+    in_hrlw: Field["float"],
+    in_hrsw: Field["float"],
+    in_icrit_aer: Field["float"],
+    in_lcrit_aer: Field["float"],
+    in_lsm: Field[IJ, "float"],
+    in_lu: Field["float"],
+    in_lude: Field["float"],
+    in_mfd: Field["float"],
+    in_mfu: Field["float"],
+    in_nice: Field["float"],
+    in_qi: Field["float"],
+    in_ql: Field["float"],
+    in_qr: Field["float"],
+    in_qs: Field["float"],
+    in_qv: Field["float"],
+    in_re_ice: Field["float"],
+    in_snde: Field["float"],
+    in_supsat: Field["float"],
+    in_t: Field["float"],
+    in_tnd_tmp_a: Field["float"],
+    in_tnd_tmp_qi: Field["float"],
+    in_tnd_tmp_ql: Field["float"],
+    in_tnd_tmp_qr: Field["float"],
+    in_tnd_tmp_qs: Field["float"],
+    in_tnd_tmp_qv: Field["float"],
+    in_tnd_tmp_t: Field["float"],
+    in_vfi: Field["float"],
+    in_vfl: Field["float"],
+    in_w: Field["float"],
+    out_covptot: Field["float"],
+    out_fcqlng: Field["float"],  # staggered
+    out_fcqnng: Field["float"],  # staggered
+    out_fcqrng: Field["float"],  # staggered
+    out_fcqsng: Field["float"],  # staggered
+    out_fhpsl: Field["float"],  # staggered
+    out_fhpsn: Field["float"],  # staggered
+    out_fplsl: Field["float"],  # staggered
+    out_fplsn: Field["float"],  # staggered
+    out_fsqif: Field["float"],  # staggered
+    out_fsqitur: Field["float"],  # staggered
+    out_fsqlf: Field["float"],  # staggered
+    out_fsqltur: Field["float"],  # staggered
+    out_fsqrf: Field["float"],  # staggered
+    out_fsqsf: Field["float"],  # staggered
+    out_rainfrac_toprfz: Field[IJ, "float"],
+    out_tnd_loc_a: Field["float"],
+    out_tnd_loc_qi: Field["float"],
+    out_tnd_loc_ql: Field["float"],
+    out_tnd_loc_qr: Field["float"],
+    out_tnd_loc_qs: Field["float"],
+    out_tnd_loc_qv: Field["float"],
+    out_tnd_loc_t: Field["float"],
+    tmp_aph_s: Field[IJ, "float"],
+    tmp_cldtopdist: Field[IJ, "float"],
+    tmp_covpmax: Field[IJ, "float"],
+    tmp_covptot: Field[IJ, "float"],
+    tmp_klevel: Field[K, "int"],
+    tmp_paphd: Field[IJ, "float"],
+    tmp_rainliq: Field[IJ, "bool"],
+    tmp_trpaus: Field[IJ, "float"],
+    *,
+    dt: "float",
+):
+    from __externals__ import (
+        DEPICE,
+        EPSEC,
+        EPSILON,
+        EVAPRAIN,
+        EVAPSNOW,
+        FALLQI,
+        FALLQL,
+        FALLQR,
+        FALLQS,
+        FALLQV,
+        LAERICEAUTO,
+        LAERICESED,
+        LAERLIQAUTOLSP,
+        LAERLIQCOLL,
+        NCLDTOP,
+        NLEV,
+        NSSOPT,
+        PHASEQI,
+        PHASEQL,
+        PHASEQR,
+        PHASEQS,
+        PHASEQV,
+        R4IES,
+        R4LES,
+        R5IES,
+        R5LES,
+        RALFDCP,
+        RALSDCP,
+        RALVDCP,
+        RAMID,
+        RAMIN,
+        RCCN,
+        RCL_APB1,
+        RCL_APB2,
+        RCL_APB3,
+        RCL_CDENOM1,
+        RCL_CDENOM2,
+        RCL_CDENOM3,
+        RCL_CONST1I,
+        RCL_CONST1R,
+        RCL_CONST1S,
+        RCL_CONST2I,
+        RCL_CONST2R,
+        RCL_CONST2S,
+        RCL_CONST3I,
+        RCL_CONST3R,
+        RCL_CONST3S,
+        RCL_CONST4I,
+        RCL_CONST4R,
+        RCL_CONST4S,
+        RCL_CONST5I,
+        RCL_CONST5R,
+        RCL_CONST5S,
+        RCL_CONST6I,
+        RCL_CONST6R,
+        RCL_CONST6S,
+        RCL_CONST7S,
+        RCL_CONST8S,
+        RCL_FAC1,
+        RCL_FAC2,
+        RCL_FZRAB,
+        RCL_KK_cloud_num_land,
+        RCL_KK_cloud_num_sea,
+        RCL_KKAac,
+        RCL_KKAau,
+        RCL_KKBac,
+        RCL_KKBaun,
+        RCL_KKBauq,
+        RCLCRIT_LAND,
+        RCLCRIT_SEA,
+        RCLDIFF,
+        RCLDIFF_CONVI,
+        RCLDTOPCF,
+        RCOVPMIN,
+        RD,
+        RDCP,
+        RDENSREF,
+        RDEPLIQREFDEPTH,
+        RDEPLIQREFRATE,
+        RETV,
+        RG,
+        RICEINIT,
+        RKCONV,
+        RKOOPTAU,
+        RLCRITSNOW,
+        RLDCP,
+        RLMIN,
+        RLSTT,
+        RLVTT,
+        RNICE,
+        RPECONS,
+        RPRC1,
+        RPRECRHMAX,
+        RSNOWLIN1,
+        RSNOWLIN2,
+        RTAUMEL,
+        RTHOMO,
+        RTT,
+        RV,
+        RVRFACTOR,
+        TW1,
+        TW2,
+        TW3,
+        TW4,
+        TW5,
+        VQI,
+        VQL,
+        VQR,
+        VQS,
+        VQV,
+        WARMRAIN,
+    )
+
+    with computation(FORWARD), interval(0, 1):
+        # zero arrays
+        out_rainfrac_toprfz[0, 0] = 0.0
+        tmp_cldtopdist[0, 0] = 0.0
+        tmp_covpmax[0, 0] = 0.0
+        tmp_covptot[0, 0] = 0.0
+        tmp_paphd[0, 0] = 0.0
+        tmp_rainliq[0, 0] = True
+        tmp_trpaus[0, 0] = 0.0
+
+    with computation(FORWARD), interval(0, -1):
+        # === 1: initial values for variables
+        # --- initialization of output tendencies
+        out_tnd_loc_t[0, 0, 0] = 0
+        out_tnd_loc_a[0, 0, 0] = 0
+        out_tnd_loc_ql[0, 0, 0] = 0
+        out_tnd_loc_qr[0, 0, 0] = 0
+        out_tnd_loc_qi[0, 0, 0] = 0
+        out_tnd_loc_qs[0, 0, 0] = 0
+        out_tnd_loc_qv[0, 0, 0] = 0
+
+        # --- non CLV initialization
+        t = in_t[0, 0, 0] + dt * in_tnd_tmp_t[0, 0, 0]
+        a = in_a[0, 0, 0] + dt * in_tnd_tmp_a[0, 0, 0]
+        a0 = a
+
+        # --- initialization for CLV family
+        ql = in_ql[0, 0, 0] + dt * in_tnd_tmp_ql[0, 0, 0]
+        ql0 = ql
+        qi = in_qi[0, 0, 0] + dt * in_tnd_tmp_qi[0, 0, 0]
+        qi0 = qi
+        qr = in_qr[0, 0, 0] + dt * in_tnd_tmp_qr[0, 0, 0]
+        qr0 = qr
+        qs = in_qs[0, 0, 0] + dt * in_tnd_tmp_qs[0, 0, 0]
+        qs0 = qs
+        qv = in_qv[0, 0, 0] + dt * in_tnd_tmp_qv[0, 0, 0]
+
+        # --- zero arrays
+        lneg_ql = 0.0
+        lneg_qi = 0.0
+        lneg_qr = 0.0
+        lneg_qs = 0.0
+
+        # --- tidy up very small cloud cover or total cloud water
+        expr1 = ql + qi
+        if expr1 < RLMIN or a < RAMIN:
+            # evaporate small cloud liquid water amounts
+            lneg_ql += ql
+            qadj = ql / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            qv += ql
+            ql = 0.0
+
+            # evaporate small cloud ice water amounts
+            lneg_qi += qi
+            qadj = qi / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qi
+            qi = 0.0
+
+            # set cloud cover to zero
+            a = 0.0
+
+        # --- tidy up small CLV variables: ql
+        if ql < RLMIN:
+            lneg_ql += ql
+            qadj = ql / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQL == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQL == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += ql
+            ql = 0.0
+
+        # --- tidy up small CLV variables: qi
+        if qi < RLMIN:
+            lneg_qi += qi
+            qadj = qi / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQI == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQI == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qi
+            qi = 0.0
+
+        # --- tidy up small CLV variables: qr
+        if qr < RLMIN:
+            lneg_qr += qr
+            qadj = qr / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQR == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQR == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qr
+            qr = 0.0
+
+        # --- tidy up small CLV variables: qs
+        if qs < RLMIN:
+            lneg_qs += qs
+            qadj = qs / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQS == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQS == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qs
+            qs = 0.0
+
+        # --- define saturation values
+        # --- old *diagnostic* mixed phase saturation
+        foealfa = f_foealfa(t)
+        foeewmt = min(f_foeewm(t) / in_ap[0, 0, 0], 0.5)
+        qsmix = foeewmt / (1 - RETV * foeewmt)
+
+        # --- ice saturation T < 273K
+        # --- liquid water saturation for T > 273K
+        alfa = f_foedelta(t)
+        foeew = min((alfa * f_foeeliq(t) + (1 - alfa) * f_foeeice(t)) / in_ap[0, 0, 0], 0.5)
+        qsice = foeew / (1 - RETV * foeew)
+
+        # --- liquid water saturation
+        foeeliqt = min(f_foeeliq(t) / in_ap[0, 0, 0], 0.5)
+        qsliq = foeeliqt / (1 - RETV * foeeliqt)
+
+        # --- ensure cloud fraction is between 0 and 1
+        a = max(0, min(1, a))
+
+        # --- calculate liq/ice fractions (no longer a diagnostic relationship)
+        li = ql + qi
+        if li > RLMIN:
+            liqfrac = ql / li
+            icefrac = 1 - liqfrac
+        else:
+            liqfrac = 0.0
+            icefrac = 0.0
+
+    # === 2: constants and parameters
+    # --- find tropopause level
+    with computation(FORWARD), interval(0, 1):
+        tmp_trpaus[0, 0] = 0.1
+        tmp_paphd[0, 0] = 1 / tmp_aph_s[0, 0]
+    with computation(FORWARD), interval(0, -2):
+        sig = in_ap[0, 0, 0] * tmp_paphd[0, 0]
+        if sig > 0.1 and sig < 0.4 and t[0, 0, 0] > t[0, 0, 1]:
+            tmp_trpaus[0, 0] = sig
+
+    # === 3: physics
+    # --- main vertical loop
+    with computation(FORWARD):
+        with interval(0, NCLDTOP - 1):
+            # --- initialize variables
+            lude = in_lude[0, 0, 0]
+            pfplsl = 0.0
+            pfplsi = 0.0
+            pfplsr = 0.0
+            pfplss = 0.0
+            pfplsv = 0.0
+            qln = 0.0
+            qin = 0.0
+            qrn = 0.0
+            qsn = 0.0
+            qvn = 0.0
+            anew = 0.0
+        with interval(NCLDTOP - 1, -1):
+            # *** 3.0: initialize variables
+            # --- first guess microphysics
+            qlfg = ql
+            qifg = qi
+            qrfg = qr
+            qsfg = qs
+            qvfg = qv
+
+            convsink_ql = 0.0
+            convsink_qi = 0.0
+            convsink_qr = 0.0
+            convsink_qs = 0.0
+            convsrce_ql = 0.0
+            convsrce_qi = 0.0
+            convsrce_qr = 0.0
+            convsrce_qs = 0.0
+            convsrce_qv = 0.0
+            fallsrce_ql = 0.0
+            fallsrce_qi = 0.0
+            fallsrce_qr = 0.0
+            fallsrce_qs = 0.0
+            index1_ql = True
+            index1_qi = True
+            index1_qr = True
+            index1_qs = True
+            index1_qv = True
+            index3_ql_ql = False
+            index3_ql_qi = False
+            index3_ql_qr = False
+            index3_ql_qs = False
+            index3_ql_qv = False
+            index3_qi_ql = False
+            index3_qi_qi = False
+            index3_qi_qr = False
+            index3_qi_qs = False
+            index3_qi_qv = False
+            index3_qr_ql = False
+            index3_qr_qi = False
+            index3_qr_qr = False
+            index3_qr_qs = False
+            index3_qr_qv = False
+            index3_qs_ql = False
+            index3_qs_qi = False
+            index3_qs_qr = False
+            index3_qs_qs = False
+            index3_qs_qv = False
+            index3_qv_ql = False
+            index3_qv_qi = False
+            index3_qv_qr = False
+            index3_qv_qs = False
+            index3_qv_qv = False
+            lcust_ql = 0.0
+            lcust_qi = 0.0
+            lcust_qr = 0.0
+            lcust_qs = 0.0
+            lcust_qv = 0.0
+            ldefr = 0.0
+            lfinalsum = 0.0
+            order_ql = -999
+            order_qi = -999
+            order_qr = -999
+            order_qs = -999
+            order_qv = -999
+            psupsatsrce_ql = 0.0
+            psupsatsrce_qi = 0.0
+            psupsatsrce_qr = 0.0
+            psupsatsrce_qs = 0.0
+            qpretot = 0.0
+            solab = 0.0
+            solac = 0.0
+            solqa_ql_ql = 0.0
+            solqa_ql_qi = 0.0
+            solqa_ql_qr = 0.0
+            solqa_ql_qs = 0.0
+            solqa_ql_qv = 0.0
+            solqa_qi_ql = 0.0
+            solqa_qi_qi = 0.0
+            solqa_qi_qr = 0.0
+            solqa_qi_qs = 0.0
+            solqa_qi_qv = 0.0
+            solqa_qr_ql = 0.0
+            solqa_qr_qi = 0.0
+            solqa_qr_qr = 0.0
+            solqa_qr_qs = 0.0
+            solqa_qr_qv = 0.0
+            solqa_qs_ql = 0.0
+            solqa_qs_qi = 0.0
+            solqa_qs_qr = 0.0
+            solqa_qs_qs = 0.0
+            solqa_qs_qv = 0.0
+            solqa_qv_ql = 0.0
+            solqa_qv_qi = 0.0
+            solqa_qv_qr = 0.0
+            solqa_qv_qs = 0.0
+            solqa_qv_qv = 0.0
+            solqb_ql_ql = 0.0
+            solqb_ql_qi = 0.0
+            solqb_ql_qr = 0.0
+            solqb_ql_qs = 0.0
+            solqb_ql_qv = 0.0
+            solqb_qi_ql = 0.0
+            solqb_qi_qi = 0.0
+            solqb_qi_qr = 0.0
+            solqb_qi_qs = 0.0
+            solqb_qi_qv = 0.0
+            solqb_qr_ql = 0.0
+            solqb_qr_qi = 0.0
+            solqb_qr_qr = 0.0
+            solqb_qr_qs = 0.0
+            solqb_qr_qv = 0.0
+            solqb_qs_ql = 0.0
+            solqb_qs_qi = 0.0
+            solqb_qs_qr = 0.0
+            solqb_qs_qs = 0.0
+            solqb_qs_qv = 0.0
+            solqb_qv_ql = 0.0
+            solqb_qv_qi = 0.0
+            solqb_qv_qr = 0.0
+            solqb_qv_qs = 0.0
+            solqb_qv_qv = 0.0
+
+            # derived variables needed
+            dp = in_aph[0, 0, 1] - in_aph[0, 0, 0]
+            gdp = RG / dp
+            rho = in_ap[0, 0, 0] / (RD * t)
+            dtgdp = dt * gdp
+            rdtgdp = dp / (RG * dt)
+
+            # --- calculate dqs/dT correction factor
+            # liquid
+            facw = R5LES / (t - R4LES) ** 2
+            cor = 1 / (1 - RETV * foeeliqt)
+            dqsliqdt = facw * cor * qsliq
+            corqsliq = 1 + RALVDCP * dqsliqdt
+
+            # ice
+            faci = R5IES / (t - R4IES) ** 2
+            cor = 1 / (1 - RETV * foeew)
+            dqsicedt = faci * cor * qsice
+            corqsice = 1 + RALSDCP * dqsicedt
+
+            # diagnostic mixed
+            fac = foealfa * facw + (1 - foealfa) * faci
+            cor = 1 / (1 - RETV * foeewmt)
+            dqsmixdt = fac * cor * qsmix
+            corqsmix = 1 + f_foeldcpm(t) * dqsmixdt
+
+            # evaporation/sublimation limits
+            evaplimmix = max((qsmix - qv) / corqsmix, 0.0)
+            evaplimice = max((qsice - qv) / corqsice, 0.0)
+
+            # --- in-cloud condensate amount
+            tmpa = 1 / max(a, EPSEC)
+            liqcld = ql * tmpa
+            icecld = qi * tmpa
+            licld = liqcld + icecld
+
+            # --- evaporate very small amounts of liquid...
+            if ql < RLMIN:
+                solqa_qv_ql += ql
+                solqa_ql_qv -= ql
+
+            # --- ...and ice
+            if qi < RLMIN:
+                solqa_qv_qi += qi
+                solqa_qi_qv -= qi
+
+            # *** 3.1: ice supersaturation adjustment
+            # --- supersaturation limit (from Koop)
+            fokoop = f_fokoop(t)
+
+            if t >= RTT or NSSOPT == 0:
+                fac = 1.0
+                faci = 1.0
+            else:
+                fac = a + fokoop * (1 - a)
+                faci = dt / RKOOPTAU
+
+            # calculate supersaturation to add to cloud
+            if a > 1 - RAMIN:
+                supsat = max((qv - fac * qsice) / corqsice, 0.0)
+            else:
+                # calculate environmental humidity supersaturation
+                qp1env = (qv - a * qsice) / max(1 - a, EPSILON)
+                supsat = max((1 - a) * (qp1env - fac * qsice) / corqsice, 0.0)
+
+            # --- here the supersaturation is turned into liquid water
+            if supsat > EPSEC:
+                if t > RTHOMO:
+                    # turn supersaturation into liquid water
+                    solqa_ql_qv += supsat
+                    solqa_qv_ql -= supsat
+                    # include liquid in first guess
+                    qlfg += supsat
+                else:
+                    # turn supersaturation into ice water
+                    solqa_qi_qv += supsat
+                    solqa_qv_qi -= supsat
+                    # add ice to first guess for deposition term
+                    qifg += supsat
+
+                # increase cloud amount using RKOOPTAU timescale
+                solac = (1 - a) * faci
+
+            # --- include supersaturation from previous timestep
+            if in_supsat[0, 0, 0] > EPSEC:
+                if t > RTHOMO:
+                    # turn supersaturation into liquid water
+                    solqa_ql_ql += in_supsat[0, 0, 0]
+                    psupsatsrce_ql = in_supsat[0, 0, 0]
+                    # add liquid to first guess for deposition term
+                    qlfg += in_supsat[0, 0, 0]
+                else:
+                    # turn supersaturation into ice water
+                    solqa_qi_qi += in_supsat[0, 0, 0]
+                    psupsatsrce_qi = in_supsat[0, 0, 0]
+                    # add ice to first guess for deposition term
+                    qifg += in_supsat[0, 0, 0]
+
+                # increase cloud amount using RKOOPTAU timescale
+                solac = (1 - a) * faci
+
+            # *** 3.2: detrainment from convection
+            if tmp_klevel[0] < NLEV - 1:
+                lude = in_lude[0, 0, 0] * dtgdp
+
+                if in_convection_on[0, 0] and lude[0, 0, 0] > RLMIN and in_lu[0, 0, 1] > EPSEC:
+                    solac += lude[0, 0, 0] / in_lu[0, 0, 1]
+                    # diagnostic temperature split
+                    convsrce_ql = foealfa * lude
+                    convsrce_qi = (1 - foealfa) * lude
+                    solqa_ql_ql += convsrce_ql
+                    solqa_qi_qi += convsrce_qi
+                else:
+                    lude = 0.0
+
+                # convective snow detrainment source
+                if in_convection_on[0, 0]:
+                    solqa_qs_qs += in_snde[0, 0, 0] * dtgdp
+            else:
+                lude = in_lude[0, 0, 0]
+
+            # *** 3.3: subsidence compensating convective updraughts
+            # --- subsidence source from layer above and evaporation of cloud within the layer
+            if tmp_klevel[0] > NCLDTOP - 1:
+                mf = max(0.0, (in_mfu + in_mfd) * dtgdp)
+                acust = mf * anew[0, 0, -1]
+
+                if __INLINED(not FALLQL and PHASEQL > 0):
+                    lcust_ql = mf * qln[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_ql += lcust_ql
+
+                if __INLINED(not FALLQI and PHASEQI > 0):
+                    lcust_qi = mf * qin[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qi += lcust_qi
+
+                if __INLINED(not FALLQR and PHASEQR > 0):
+                    lcust_qr = mf * qrn[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qr += lcust_qr
+
+                if __INLINED(not FALLQS and PHASEQS > 0):
+                    lcust_qs = mf * qsn[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qs += lcust_qs
+
+                if __INLINED(not FALLQV and PHASEQV > 0):
+                    lcust_qv = mf * qvn[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qv += lcust_qv
+
+                # work out how much liquid evaporates at arrival point
+                dtdp = RDCP * 0.5 * (t[0, 0, -1] + t[0, 0, 0]) / in_aph[0, 0, 0]
+                dtforc = dtdp[0, 0, 0] * (in_ap[0, 0, 0] - in_ap[0, 0, -1])
+                dqs = anew[0, 0, -1] * dtforc * dqsmixdt
+
+                if __INLINED(not FALLQL and PHASEQL > 0):
+                    lfinal = max(0.0, lcust_ql - dqs)
+                    evap = min(lcust_ql - lfinal, evaplimmix)
+                    lfinal = lcust_ql - evap
+                    lfinalsum += lfinal
+                    solqa_ql_ql += lcust_ql
+                    solqa_qv_ql += evap
+                    solqa_ql_qv -= evap
+
+                if __INLINED(not FALLQI and PHASEQI > 0):
+                    lfinal = max(0.0, lcust_qi - dqs)
+                    evap = min(lcust_qi - lfinal, evaplimmix)
+                    lfinal = lcust_qi - evap
+                    lfinalsum += lfinal
+                    solqa_qi_qi += lcust_qi
+                    solqa_qv_qi += evap
+                    solqa_qi_qv -= evap
+
+                if __INLINED(not FALLQR and PHASEQR > 0):
+                    lfinal = max(0.0, lcust_qr - dqs)
+                    evap = min(lcust_qr - lfinal, evaplimmix)
+                    lfinal = lcust_qr - evap
+                    lfinalsum += lfinal
+                    solqa_qr_qr += lcust_qr
+                    solqa_qv_qr += evap
+                    solqa_qr_qv -= evap
+
+                if __INLINED(not FALLQS and PHASEQS > 0):
+                    lfinal = max(0.0, lcust_qs - dqs)
+                    evap = min(lcust_qs - lfinal, evaplimmix)
+                    lfinal = lcust_qs - evap
+                    lfinalsum += lfinal
+                    solqa_qs_qs += lcust_qs
+                    solqa_qv_qs += evap
+                    solqa_qs_qv -= evap
+
+                if __INLINED(not FALLQV and PHASEQV > 0):
+                    lfinal = max(0.0, lcust_qv - dqs)
+                    evap = min(lcust_qv - lfinal, evaplimmix)
+                    lfinal = lcust_qv - evap
+                    lfinalsum += lfinal
+                    solqa_qv_qv += lcust_qv
+
+                # reset the cloud contribution if no cloud water survives to this level
+                if lfinalsum < EPSEC:
+                    acust = 0.0
+                solac += acust
+
+            # --- subsidence sink of cloud to the layer below
+            if tmp_klevel[0] < NLEV - 1:
+                mfdn = max(0.0, (in_mfu[0, 0, 1] + in_mfd[0, 0, 1]) * dtgdp)
+                solab += mfdn
+                solqb_ql_ql += mfdn
+                solqb_qi_qi += mfdn
+
+                # record sink for cloud budget and enthalpy budget diagnostics
+                convsink_ql = mfdn
+                convsink_qi = mfdn
+
+            # *** 3.4: erosion of clouds by turbulent mixing
+            # --- define turbulent erosion rate
+            ldifdt = RCLDIFF * dt
+            if in_convection_type[0, 0] > 0 and lude > EPSEC:
+                ldifdt *= RCLDIFF_CONVI
+
+            if li > EPSEC:
+                # calculate environmental humidity
+                e = ldifdt * max(qsmix - qv, 0.0)
+                leros = min(min(a * e, evaplimmix), li)
+                aeros = leros / licld
+
+                # erosion is -ve linear in L, A
+                solac -= aeros
+                solqa_qv_ql += liqfrac * leros
+                solqa_ql_qv -= liqfrac * leros
+                solqa_qv_qi += icefrac * leros
+                solqa_qi_qv -= icefrac * leros
+
+            # *** 3.5: condensation/evaporation due to dqsat/dT
+            dtdp = RDCP * t / in_ap[0, 0, 0]
+            dpmxdt = dp / dt
+            mfdn = in_mfu[0, 0, 1] + in_mfd[0, 0, 1] if tmp_klevel[0] < NLEV - 1 else 0.0
+            wtot = in_w[0, 0, 0] + 0.5 * RG * (in_mfu[0, 0, 0] + in_mfd[0, 0, 0] + mfdn)
+            wtot = min(dpmxdt, max(-dpmxdt, wtot))
+            zzdt = in_hrsw[0, 0, 0] + in_hrlw[0, 0, 0]
+            dtdiab = min(dpmxdt * dtdp, max(-dpmxdt * dtdp, zzdt)) * dt + RALFDCP * ldefr
+            dtforc = dtdp * wtot * dt + dtdiab
+            qold = qsmix
+            told = t
+            t = max(t + dtforc, 160.0)
+
+            qsmix, t = f_cuadjtq(in_ap, qsmix, t)
+
+            dqs = qsmix - qold
+            qsmix = qold
+            t = told
+
+            # ***: 3.5a: evaporation of clouds
+            if dqs > 0:
+                levap = min(min(a * min(dqs, licld), evaplimmix), max(qsmix - qv, 0.0))
+                solqa_qv_ql += liqfrac * levap
+                solqa_ql_qv -= liqfrac * levap
+                solqa_qv_qi += icefrac * levap
+                solqa_qi_qv -= icefrac * levap
+
+            # *** 3.5b: formation of clouds
+            # increase of cloud water in existing clouds
+            if a > EPSEC and dqs <= -RLMIN:
+                lcond1 = max(-dqs, 0.0)
+
+                # old limiter
+                if a > 0.99:
+                    cor = 1 / (1 - RETV * qsmix)
+                    cdmax = (qv - qsmix) / (1 + cor * qsmix * f_foedem(t))
+                else:
+                    cdmax = (qv - a * qsmix) / a
+
+                lcond1 = a * max(min(lcond1, cdmax), 0.0)
+                if lcond1 < RLMIN:
+                    lcond1 = 0.0
+
+                # --- all increase goes into liquid unless so cold cloud homogeneously freezes
+                if t > RTHOMO:
+                    solqa_ql_qv += lcond1
+                    solqa_qv_ql -= lcond1
+                    qlfg += lcond1
+                else:
+                    solqa_qi_qv += lcond1
+                    solqa_qv_qi -= lcond1
+                    qifg += lcond1
+
+            # generation of new clouds (da/dt > 0)
+            if dqs <= -RLMIN and a < 1 - EPSEC:
+                # --- critical relative humidity
+                rhc = RAMID
+                sigk = in_ap[0, 0, 0] / tmp_aph_s[0, 0]
+                if sigk > 0.8:
+                    rhc += (1 - RAMID) * ((sigk - 0.8) / 0.2) ** 2
+
+                # --- supersaturation options
+                if __INLINED(NSSOPT == 0):
+                    # no scheme
+                    qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a))
+                elif __INLINED(NSSOPT == 1):
+                    # Tompkins
+                    qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a))
+                elif __INLINED(NSSOPT == 2):
+                    # Lohmann and Karcher
+                    qe = qv
+                else:
+                    # Gierens
+                    qe = qv + li
+
+                if t >= RTT or NSSOPT == 0:
+                    # no ice supersaturation allowed
+                    fac = 1.0
+                else:
+                    # ice supersaturation
+                    fac = fokoop
+
+                if qe >= rhc * qsice * fac and qe < qsice * fac:
+                    acond = -(1 - a) * fac * dqs / max(2 * (fac * qsice - qe), EPSEC)
+                    acond = min(acond, 1 - a)
+
+                    # linear term
+                    lcond2 = -fac * dqs * 0.5 * acond
+
+                    # new limiter formulation
+                    zdl = 2 * (fac * qsice - qe) / max(EPSEC, 1 - a)
+                    expr2 = fac * dqs
+                    if expr2 < -zdl:
+                        lcondlim = (a - 1) * expr2 - fac * qsice + qv
+                        lcond2 = min(lcond2, lcondlim)
+                    lcond2 = max(lcond2, 0.0)
+
+                    expr10 = 1 - a
+                    if lcond2 < RLMIN or expr10 < EPSEC:
+                        lcond2 = 0.0
+                        acond = 0.0
+                    if lcond2 == 0.0:
+                        acond = 0.0
+
+                    # large-scale generation is linear in A and linear in L
+                    solac += acond
+
+                    # --- all increase goes into liquid unless so cold cloud homogeneously freezes
+                    if t > RTHOMO:
+                        solqa_ql_qv += lcond2
+                        solqa_qv_ql -= lcond2
+                        qlfg += lcond2
+                    else:  # homogeneous freezing
+                        solqa_qi_qv += lcond2
+                        solqa_qv_qi -= lcond2
+                        qifg += lcond2
+
+            # *** 3.6: growth of ice by vapour deposition
+            if __INLINED(DEPICE == 1):  # --- ice deposition following Rotstayn et al. (2001)
+                # --- calculate distance from cloud top
+                if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF:
+                    tmp_cldtopdist[0, 0] = 0.0
+                else:
+                    tmp_cldtopdist[0, 0] += dp / (rho * RG)
+
+                # --- only treat depositional growth if liquid present
+                if t < RTT and qlfg > RLMIN:
+                    vpice = f_foeeice(t) * RV / RD
+                    vpliq = vpice * fokoop
+                    icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639)
+
+                    # --- 0.024 is conductivity of air
+                    # --- 8.8 = 700 ** (1/3) = density of ice to the third
+                    add = RLSTT * (RLSTT / (RV * t) - 1) / (0.024 * t)
+                    bdd = RV * t * in_ap[0, 0, 0] / (2.21 * vpice)
+                    cvds = (
+                        7.8
+                        * (icenuclei / rho) ** 0.666
+                        * (vpliq - vpice)
+                        / (8.87 * (add + bdd) * vpice)
+                    )
+
+                    # --- RICEINIT = 1e-12 is initial mass of ice particle
+                    ice0 = max(icecld, icenuclei * RICEINIT / rho)
+
+                    # --- new value of ice
+                    inew = (0.666 * cvds * dt + ice0**0.666) ** 1.5
+
+                    # --- grid-mean deposition rate
+                    depos = max(a * (inew - ice0), 0.0)
+
+                    # --- limit deposition to liquid water amount
+                    depos = min(depos, qlfg)
+
+                    # --- at top of cloud, reduce deposition rate near cloud top
+                    infactor = min(icenuclei / 15000, 1.0)
+                    depos *= min(
+                        infactor
+                        + (1 - infactor)
+                        * (RDEPLIQREFRATE + tmp_cldtopdist[0, 0] / RDEPLIQREFDEPTH),
+                        1.0,
+                    )
+
+                    # --- add to matrix
+                    solqa_qi_ql += depos
+                    solqa_ql_qi -= depos
+                    qifg += depos
+                    qlfg -= depos
+            elif __INLINED(DEPICE == 2):  # --- ice deposition assuming ice PSD
+                # --- calculate distance from cloud top
+                if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF:
+                    tmp_cldtopdist = 0.0
+                else:
+                    tmp_cldtopdist += dp / (rho * RG)
+
+                # --- only treat depositional growth if liquid present
+                if t < RTT and qlfg > RLMIN:
+                    vpice = f_foeeice(t) * RV / RD
+                    vpliq = vpice * fokoop
+                    icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639)
+
+                    # --- RICEINIT=1e-12 is the initial mass of ice particle
+                    ice0 = max(icecld, icenuclei * RICEINIT / rho)
+
+                    # particle size distribution
+                    tcg = 1
+                    facx1i = 1
+                    apb = RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap * RCL_APB3 * t**3
+                    corrfac = (1 / rho) ** 0.5
+                    corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120)
+                    pr02 = rho * ice0 * RCL_CONST1I / (tcg * facx1i)
+                    term1 = (
+                        (vpliq - vpice)
+                        * t**2
+                        * vpice
+                        * corrfac2
+                        * tcg
+                        * RCL_CONST2I
+                        * facx1i
+                        / (rho * apb * vpice)
+                    )
+                    term2 = (
+                        0.65 * RCL_CONST6I * pr02**RCL_CONST4I
+                        + RCL_CONST3I
+                        * corrfac**0.5
+                        * rho**0.5
+                        * pr02**RCL_CONST5I
+                        / corrfac2**0.5
+                    )
+                    depos = max(a * term1 * term2 * dt, 0.0)
+
+                    # --- limit deposition to liquid water amount
+                    depos = min(depos, qlfg)
+
+                    # --- at top of cloud, reduce deposition rate near cloud top to account for
+                    # --- small scale turbulent processes
+                    infactor = min(icenuclei / 15000, 1.0)
+                    depos *= min(
+                        infactor
+                        + (1 - infactor) * (RDEPLIQREFRATE + tmp_cldtopdist / RDEPLIQREFDEPTH),
+                        1.0,
+                    )
+
+                    # --- add to matrix
+                    solqa_qi_ql += depos
+                    solqa_ql_qi -= depos
+                    qifg += depos
+                    qlfg -= depos
+
+            # === 4: precipitation processes
+            # --- revise in-cloud condensate amount
+            tmpa = 1 / max(a, EPSEC)
+            liqcld = qlfg * tmpa
+            icecld = qifg * tmpa
+
+            # *** 4.1a: sedimentation/falling of ql
+            if __INLINED(FALLQL):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_ql = pfplsl[0, 0, -1] * dtgdp
+                    solqa_ql_ql += fallsrce_ql
+                    qlfg += fallsrce_ql
+                    # use first guess precip
+                    qpretot += qlfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_ql = dtgdp * VQL * rho
+            else:
+                fallsink_ql = 0.0
+
+            # *** 4.1b: sedimentation/falling of qi
+            # --- source from layer above
+            if tmp_klevel[0] > NCLDTOP - 1:
+                fallsrce_qi = pfplsi[0, 0, -1] * dtgdp
+                solqa_qi_qi += fallsrce_qi
+                qifg += fallsrce_qi
+                # use first guess precip
+                qpretot += qifg
+
+            # --- sink to next layer, constant fall speed
+            if __INLINED(LAERICESED):
+                vqi = 0.002 * in_re_ice[0, 0, 0]
+            else:
+                vqi = VQI
+            fallsink_qi = dtgdp * vqi * rho
+
+            # *** 4.1c: sedimentation/falling of qr
+            if __INLINED(FALLQR):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_qr = pfplsr[0, 0, -1] * dtgdp
+                    solqa_qr_qr += fallsrce_qr
+                    qrfg += fallsrce_qr
+                    # use first guess precip
+                    qpretot += qrfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_qr = dtgdp * VQR * rho
+            else:
+                fallsink_qr = 0.0
+
+            # *** 4.1d: sedimentation/falling of qs
+            if __INLINED(FALLQS):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_qs = pfplss[0, 0, -1] * dtgdp
+                    solqa_qs_qs += fallsrce_qs
+                    qsfg += fallsrce_qs
+                    # use first guess precip
+                    qpretot += qsfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_qs = dtgdp * VQS * rho
+            else:
+                fallsink_qs = 0.0
+
+            # *** 4.1e: sedimentation/falling of qv
+            if __INLINED(FALLQV):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_qv = pfplsv[0, 0, -1] * dtgdp
+                    solqa_qv_qv += fallsrce_qv
+                    qvfg += fallsrce_qv
+                    # use first guess precip
+                    qpretot += qvfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_qv = dtgdp * VQV * rho
+            else:
+                fallsink_qv = 0.0
+
+            # --- precip cover overlap using RAX-RAN Overlap
+            if qpretot > EPSEC:
+                tmp_covptot[0, 0] = 1 - (
+                    (1 - tmp_covptot[0, 0])
+                    * (1 - max(a[0, 0, 0], a[0, 0, -1]))
+                    / (1 - min(a[0, 0, -1], 1 - 1e-6))
+                )
+                tmp_covptot[0, 0] = max(tmp_covptot[0, 0], RCOVPMIN)
+                covpclr = max(0.0, tmp_covptot[0, 0] - a)
+                raincld = qrfg / tmp_covptot[0, 0]
+                snowcld = qsfg / tmp_covptot[0, 0]
+                tmp_covpmax[0, 0] = max(tmp_covptot[0, 0], tmp_covpmax[0, 0])
+            else:
+                raincld = 0.0
+                snowcld = 0.0
+                tmp_covptot[0, 0] = 0.0
+                covpclr = 0.0
+                tmp_covpmax[0, 0] = 0.0
+
+            # *** 4.2a: autoconversion to snow
+            if t <= RTT:
+                # --- snow autoconversion rate follow Lin et al. 1983
+                if icecld > EPSEC:
+                    co = dt * RSNOWLIN1 * exp(RSNOWLIN2 * (t - RTT))
+
+                    if __INLINED(LAERICEAUTO):
+                        lcrit = in_icrit_aer[0, 0, 0]
+                        co *= (RNICE / in_nice[0, 0, 0]) ** 0.333
+                    else:
+                        lcrit = RLCRITSNOW
+
+                    snowaut = co * (1 - exp(-((icecld / lcrit) ** 2)))
+                    solqb_qs_qi += snowaut
+
+            # *** 4.2b: autoconversion warm clouds
+            if liqcld > EPSEC:
+                if __INLINED(WARMRAIN == 1):  # --- warm-rain process follow Sundqvist (1989)
+                    co = RKCONV * dt
+
+                    if __INLINED(LAERLIQAUTOLSP):
+                        lcrit = in_lcrit_aer[0, 0, 0]
+                        co *= (RCCN / in_ccn[0, 0, 0]) ** 0.333
+                    else:
+                        lcrit = RCLCRIT_LAND if in_lsm[0, 0] > 0.5 else RCLCRIT_SEA
+
+                    # --- parameters for cloud collection by rain and snow
+                    precip = (pfplss[0, 0, -1] + pfplsr[0, 0, -1]) / max(EPSEC, tmp_covptot[0, 0])
+                    cfpr = 1 + RPRC1 * sqrt(max(precip, 0.0))
+                    if __INLINED(LAERLIQCOLL):
+                        cfpr *= (RCCN / in_ccn[0, 0, 0]) ** 0.333
+
+                    co *= cfpr
+                    lcrit /= max(cfpr, EPSEC)
+
+                    rainaut = co
+                    if liqcld / lcrit < 20:
+                        rainaut *= 1 - exp(-((liqcld / lcrit) ** 2))
+
+                    # rain freezes instantly
+                    if t <= RTT:
+                        solqb_qs_ql += rainaut
+                    else:
+                        solqb_qr_ql += rainaut
+                elif __INLINED(
+                    WARMRAIN == 2
+                ):  # --- warm-rain process follow Khairoutdinov and Kogan (2000)
+                    if in_lsm[0, 0] > 0.5:
+                        const = RCL_KK_cloud_num_land
+                        lcrit = RCLCRIT_LAND
+                    else:
+                        const = RCL_KK_cloud_num_sea
+                        lcrit = RCLCRIT_SEA
+
+                    if liqcld > lcrit:
+                        rainaut = (
+                            1.5 * a * dt * RCL_KKAau * liqcld**RCL_KKBauq * const**RCL_KKBaun
+                        )
+                        rainaut = min(rainaut, qlfg)
+                        if rainaut < EPSEC:
+                            rainaut = 0.0
+                        rainacc = 2 * a * dt * RCL_KKAac * (liqcld * raincld) ** RCL_KKBac
+                        rainacc = min(rainacc, qlfg)
+                        if rainacc < EPSEC:
+                            rainacc = 0.0
+                    else:
+                        rainaut = 0.0
+                        rainacc = 0.0
+
+                    expr3 = rainaut + rainacc
+                    if t <= RTT:
+                        solqa_qs_ql += expr3
+                        solqa_ql_qs -= expr3
+                    else:
+                        solqa_qr_ql += expr3
+                        solqa_ql_qr -= expr3
+
+            # --- riming - collection of cloud liquid drops by snow and ice
+            if __INLINED(WARMRAIN > 1):
+                if t <= RTT and liqcld > EPSEC:
+                    # fallspeed air density correction
+                    fallcorr = (RDENSREF / rho) ** 0.4
+
+                    # --- riming of snow by cloud water - implicit in lwc
+                    if snowcld > EPSEC and tmp_covptot[0, 0] > 0.01:
+                        # calculate riming term
+                        snowrime = (
+                            0.3
+                            * tmp_covptot[0, 0]
+                            * dt
+                            * RCL_CONST7S
+                            * fallcorr
+                            * (rho * snowcld * RCL_CONST1S) ** RCL_CONST8S
+                        )
+
+                        # limit snow riming term
+                        snowrime = min(snowrime, 1.0)
+
+                        solqb_qs_ql += snowrime
+
+            # *** 4.3a: melting of snow and ice
+            icetot = qifg + qsfg
+            meltmax = 0.0
+
+            # if there are frozen hydrometeors present and dry-bulb temperature > 0degC
+            if icetot > EPSEC and t > RTT:
+                # calculate subsaturation
+                subsat = max(qsice - qv, 0.0)
+
+                # calculate difference between dry-bulb and the temperature at which the wet-buld=0degC
+                # using and approx
+                tdmtw0 = t - RTT - subsat * (TW1 + TW2 * (in_ap[0, 0, 0] - TW3) - TW4 * (t - TW5))
+
+                # ensure cons1 is positive
+                cons1 = abs(dt * (1 + 0.5 * tdmtw0) / RTAUMEL)
+                meltmax = max(tdmtw0 * cons1 * RLDCP, 0.0)
+
+            if meltmax > EPSEC and icetot > EPSEC:
+                # apply melting in same proportion as frozen hydrometeor fractions
+                alfa_qi = qifg / icetot
+                melt_qi = min(qifg, alfa_qi * meltmax)
+                alfa_qs = qsfg / icetot
+                melt_qs = min(qsfg, alfa_qs * meltmax)
+
+                # needed in first guess
+                qifg -= melt_qi
+                qrfg += melt_qi + melt_qs
+                qsfg -= melt_qs
+                solqa_qi_qr -= melt_qi
+                solqa_qr_qi += melt_qi
+                solqa_qr_qs += melt_qs
+                solqa_qs_qr -= melt_qs
+
+            # *** 4.3b: freezing of rain
+            if qr > EPSEC:
+                if t[0, 0, 0] <= RTT and t[0, 0, -1] > RTT:
+                    # base of melting layer/top of refreezing layer so store rain/snow fraction for
+                    # precip type diagnosis
+                    qpretot = max(qs + qr, EPSEC)
+                    out_rainfrac_toprfz[0, 0] = qr / qpretot
+                    tmp_rainliq[0, 0] = out_rainfrac_toprfz[0, 0] > 0.8
+
+                if t < RTT:
+                    if tmp_rainliq[0, 0]:
+                        # majority of raindrops completely melted
+                        # slope of rain partical size distribution
+                        lambda_ = (RCL_FAC1 / (rho * qr)) ** RCL_FAC2
+
+                        # calculate freezing rate based on Bigg (1953) and Wisner (1972)
+                        temp = RCL_FZRAB * (t - RTT)
+                        frz = dt * (RCL_CONST5R / rho) * (exp(temp) - 1) * lambda_**RCL_CONST6R
+                        frzmax = max(frz, 0.0)
+                    else:
+                        # majority of raindrops only partially melted
+                        cons1 = abs(dt * (1 + 0.5 * (RTT - t)) / RTAUMEL)
+                        frzmax = max((RTT - t) * cons1 * RLDCP, 0.0)
+
+                    if frzmax > EPSEC:
+                        frz = min(qr, frzmax)
+                        solqa_qs_qr += frz
+                        solqa_qr_qs -= frz
+
+            # *** 4.3c: freezing of liquid
+            frzmax = max((RTHOMO - t) * RLDCP, 0.0)
+            if frzmax > EPSEC and qlfg > EPSEC:
+                frz = min(qlfg, frzmax)
+                solqa_qi_ql += frz
+                solqa_ql_qi -= frz
+
+            # *** 4.4: evaporation of rain/snow
+            if __INLINED(EVAPRAIN == 1):  # --- rain evaporation scheme from Sundquist
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+                qe = (qv - a * qsliq) / max(EPSEC, 1 - a)
+
+                # --- humidity in moistest covpclr part of domain
+                qe = max(0.0, min(qe, qsliq))
+                lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq
+                if lo1:
+                    # note: preclr is a rain flux
+                    expr4 = tmp_covptot[0, 0] * dtgdp
+                    expr5 = max(abs(expr4), EPSILON)
+                    expr6 = expr5 if expr4 > 0 else -expr5
+                    preclr = qrfg * covpclr / expr6
+
+                    # --- actual microphysics formula in beta
+                    beta1 = (
+                        sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0])
+                        / RVRFACTOR
+                        * preclr
+                        / max(covpclr, EPSEC)
+                    )
+                    beta = RG * RPECONS * 0.5 * beta1**0.5777
+                    denom = 1 + beta * dt * corqsliq
+                    dpr = covpclr * beta * (qsliq - qe) / denom * dp / RG
+                    dpevap = dpr * dtgdp
+
+                    # --- add evaporation term to explicit sink
+                    evap = min(dpevap, qrfg)
+                    solqa_qv_qr += evap
+                    solqa_qr_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN,
+                        tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg),
+                    )
+
+                    # update fg field
+                    qrfg -= evap
+            elif __INLINED(
+                EVAPRAIN == 2
+            ):  # --- rain evaporation scheme based on Abel and Boutle (2013)
+                # --- calculate relative humidity limit for rain evaporation
+                # limit rh for rain evaporation dependent on precipitation fraction
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+
+                # further limit rh for rain evaporation to 80%
+                rh = min(0.8, rh)
+
+                qe = max(0.0, min(qv, qsliq))
+                lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq
+                if lo1:
+                    # --- Abel and Boutle (2012) evaporation
+                    # calculate local precipitation (kg/kg)
+                    preclr = qrfg / tmp_covptot[0, 0]
+
+                    # fallspeed air density correction
+                    fallcorr = (RDENSREF / rho) ** 0.4
+
+                    # saturation vapor pressure with respect to liquid phase
+                    esatliq = RV / RD * f_foeeliq(t)
+
+                    # slope of particle size distribution
+                    lambda_ = (RCL_FAC1 / (rho * preclr)) ** RCL_FAC2
+
+                    evap_denom = (
+                        RCL_CDENOM1 * esatliq
+                        - RCL_CDENOM2 * t * esatliq
+                        + RCL_CDENOM3 * t**3 * in_ap[0, 0, 0]
+                    )
+
+                    # temperature dependent conductivity
+                    corr2 = (t / 273) ** 1.5 * 393 / (t + 120)
+
+                    subsat = max(rh * qsliq - qe, 0.0)
+                    beta = (
+                        0.5
+                        / qsliq
+                        * t**2
+                        * esatliq
+                        * RCL_CONST1R
+                        * (corr2 / evap_denom)
+                        * (
+                            0.78 / lambda_**RCL_CONST4R
+                            + RCL_CONST2R
+                            * (rho * fallcorr) ** 0.5
+                            / (corr2**0.5 * lambda_**RCL_CONST3R)
+                        )
+                    )
+                    denom = 1 + beta * dt
+                    dpevap = covpclr * beta * dt * subsat / denom
+
+                    # --- add evaporation term to explicit sink
+                    evap = min(dpevap, qrfg)
+                    solqa_qv_qr += evap
+                    solqa_qr_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN,
+                        tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg),
+                    )
+
+                    # update fg field
+                    qrfg -= evap
+
+            # *** 4.5: evaporation of snow
+            if __INLINED(EVAPSNOW == 1):
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+                qe = (qv - a * qsice) / max(EPSEC, 1 - a)
+
+                # --- humidity in moistest covpclr part of domain
+                qe = max(0.0, min(qe, qsice))
+                lo1 = covpclr > EPSEC and qsfg > EPSEC and qe < rh * qsice
+                if lo1:
+                    expr7 = tmp_covptot[0, 0] * dtgdp
+                    expr8 = max(abs(expr7), EPSILON)
+                    expr9 = expr8 if expr7 > 0 else -expr8
+                    preclr = qsfg * covpclr / expr9
+
+                    # --- actual microphysics formula in beta
+                    beta1 = (
+                        sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0])
+                        / RVRFACTOR
+                        * preclr
+                        / max(covpclr, EPSEC)
+                    )
+                    beta = RG * RPECONS * beta1**0.5777
+                    denom = 1 + beta * dt * corqsice
+                    dpr = covpclr * beta * (qsice - qe) / denom * dp / RG
+                    dpevap = dpr * dtgdp
+
+                    # --- add evaporation term to explicit sink
+                    evap = min(dpevap, qsfg)
+                    solqa_qv_qs += evap
+                    solqa_qs_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN,
+                        tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qsfg),
+                    )
+
+                    # update first guess field
+                    qsfg -= evap
+            elif __INLINED(EVAPSNOW == 2):
+                # --- calculate relative humidity limit for snow evaporation
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+                qe = (qv - a * qsice) / max(EPSEC, 1 - a)
+
+                # --- humidity in moistest covpclr part of domain
+                qe = max(0.0, min(qe, qsice))
+                lo1 = covpclr > EPSEC and qs > EPSEC and qe < rh * qsice
+                if lo1:
+                    # calculate local precipitation (kg/kg)
+                    preclr = qsfg / tmp_covptot[0, 0]
+                    vpice = f_foeeice(t) * RV / RD
+
+                    # particle size distribution
+                    tcg = 1.0
+                    facx1s = 1.0
+                    apb = (
+                        RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap[0, 0, 0] * RCL_APB3 * t**3
+                    )
+                    corrfac = (1 / rho) ** 0.5
+                    corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120)
+                    pr02 = rho * preclr * RCL_CONST1S / (tcg * facx1s)
+                    term1 = (
+                        (qsice - qe)
+                        * t**2
+                        * vpice
+                        * corrfac2
+                        * tcg
+                        * RCL_CONST2S
+                        * facx1s
+                        / (rho * apb * qsice)
+                    )
+                    term2 = (
+                        0.65 * RCL_CONST6S * pr02**RCL_CONST4S
+                        + RCL_CONST3S
+                        * corrfac**0.5
+                        * rho**0.5
+                        * pr02**RCL_CONST5S
+                        / corrfac2**0.5
+                    )
+                    dpevap = max(covpclr * term1 * term2 * dt, 0.0)
+
+                    # --- limit evaporation to snow amount
+                    evap = min(min(dpevap, evaplimice), qs)
+                    solqa_qv_qs += evap
+                    solqa_qs_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN, tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qs)
+                    )
+
+                    # update first guess field
+                    qsfg -= evap
+
+            # --- evaporate small precipitation amounts
+            if __INLINED(FALLQL):
+                if qlfg < RLMIN:
+                    solqa_qv_ql += qlfg
+                    solqa_ql_qv -= qlfg
+            if __INLINED(FALLQI):
+                if qifg < RLMIN:
+                    solqa_qv_qi += qifg
+                    solqa_qi_qv -= qifg
+            if __INLINED(FALLQR):
+                if qrfg < RLMIN:
+                    solqa_qv_qr += qrfg
+                    solqa_qr_qv -= qrfg
+            if __INLINED(FALLQS):
+                if qsfg < RLMIN:
+                    solqa_qv_qs += qsfg
+                    solqa_qs_qv -= qsfg
+
+            # === 5: solvers for A and L
+            # *** 5.1: solver for cloud cover
+            anew = min((a + solac) / (1 + solab), 1.0)
+            if anew < RAMIN:
+                anew = 0.0
+            da = anew - a0
+
+            # *** 5.2: solver for the microphysics
+            # --- collect sink terms and mark
+            sinksum_ql = -(solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv)
+            sinksum_qi = -(solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv)
+            sinksum_qr = -(solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv)
+            sinksum_qs = -(solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv)
+            sinksum_qv = -(solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv)
+
+            # --- calculate overshoot and scaling factor
+            max_ql = max(ql, EPSEC)
+            rat_ql = max(sinksum_ql, max_ql)
+            ratio_ql = max_ql / rat_ql
+            max_qi = max(qi, EPSEC)
+            rat_qi = max(sinksum_qi, max_qi)
+            ratio_qi = max_qi / rat_qi
+            max_qr = max(qr, EPSEC)
+            rat_qr = max(sinksum_qr, max_qr)
+            ratio_qr = max_qr / rat_qr
+            max_qs = max(qs, EPSEC)
+            rat_qs = max(sinksum_qs, max_qs)
+            ratio_qs = max_qs / rat_qs
+            max_qv = max(qv, EPSEC)
+            rat_qv = max(sinksum_qv, max_qv)
+            ratio_qv = max_qv / rat_qv
+
+            # --- now sort ratio to find out which species run out first
+            order_ql, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_ql,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qi, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qi,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qr, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qr,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qs, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qs,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qv, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qv,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+
+            # scale the sink terms, in the correct order, recalculating the scale factor each time
+            sinksum_ql = 0.0
+            sinksum_qi = 0.0
+            sinksum_qr = 0.0
+            sinksum_qs = 0.0
+            sinksum_qv = 0.0
+
+            # --- recalculate sum and scaling factor, and then scale
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_ql,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qi,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qr,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qs,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qv,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+
+            # *** 5.2.2: solver
+            # --- set the lhs of equation
+            # --- diagonals: microphysical sink terms + transport
+            lhs_ql_ql = (
+                1
+                + fallsink_ql
+                + solqb_qv_ql
+                + solqb_ql_ql
+                + solqb_qi_ql
+                + solqb_qr_ql
+                + solqb_qs_ql
+            )
+            lhs_qi_qi = (
+                1
+                + fallsink_qi
+                + solqb_qv_qi
+                + solqb_ql_qi
+                + solqb_qi_qi
+                + solqb_qr_qi
+                + solqb_qs_qi
+            )
+            lhs_qr_qr = (
+                1
+                + fallsink_qr
+                + solqb_qv_qr
+                + solqb_ql_qr
+                + solqb_qi_qr
+                + solqb_qr_qr
+                + solqb_qs_qr
+            )
+            lhs_qs_qs = (
+                1
+                + fallsink_qs
+                + solqb_qv_qs
+                + solqb_ql_qs
+                + solqb_qi_qs
+                + solqb_qr_qs
+                + solqb_qs_qs
+            )
+            lhs_qv_qv = (
+                1
+                + fallsink_qv
+                + solqb_qv_qv
+                + solqb_ql_qv
+                + solqb_qi_qv
+                + solqb_qr_qv
+                + solqb_qs_qv
+            )
+
+            # --- non-diagonals: microphysical source terms
+            lhs_ql_qi = -solqb_ql_qi
+            lhs_ql_qr = -solqb_ql_qr
+            lhs_ql_qs = -solqb_ql_qs
+            lhs_ql_qv = -solqb_ql_qv
+            lhs_qi_ql = -solqb_qi_ql
+            lhs_qi_qr = -solqb_qi_qr
+            lhs_qi_qs = -solqb_qi_qs
+            lhs_qi_qv = -solqb_qi_qv
+            lhs_qr_ql = -solqb_qr_ql
+            lhs_qr_qi = -solqb_qr_qi
+            lhs_qr_qs = -solqb_qr_qs
+            lhs_qr_qv = -solqb_qr_qv
+            lhs_qs_ql = -solqb_qs_ql
+            lhs_qs_qi = -solqb_qs_qi
+            lhs_qs_qr = -solqb_qs_qr
+            lhs_qs_qv = -solqb_qs_qv
+            lhs_qv_ql = -solqb_qv_ql
+            lhs_qv_qi = -solqb_qv_qi
+            lhs_qv_qr = -solqb_qv_qr
+            lhs_qv_qs = -solqb_qv_qs
+
+            # --- set the rhs of equation
+            # --- sum the explicit source and sink
+            qln = ql + solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv
+            qin = qi + solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv
+            qrn = qr + solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv
+            qsn = qs + solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv
+            qvn = qv + solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv
+
+            # --- solve by LU decomposition
+            # non pivoting recursive factorization
+            lhs_qi_ql /= lhs_ql_ql  #             JN=1, JM=2
+            lhs_qi_qi -= lhs_qi_ql * lhs_ql_qi  # JN=1, JM=2, IK=2
+            lhs_qi_qr -= lhs_qi_ql * lhs_ql_qr  # JN=1, JM=2, IK=3
+            lhs_qi_qs -= lhs_qi_ql * lhs_ql_qs  # JN=1, JM=2, IK=4
+            lhs_qi_qv -= lhs_qi_ql * lhs_ql_qv  # JN=1, JM=2, IK=0
+            lhs_qr_ql /= lhs_ql_ql  #             JN=1, JM=3
+            lhs_qr_qi -= lhs_qr_ql * lhs_ql_qi  # JN=1, JM=3, IK=2
+            lhs_qr_qr -= lhs_qr_ql * lhs_ql_qr  # JN=1, JM=3, IK=3
+            lhs_qr_qs -= lhs_qr_ql * lhs_ql_qs  # JN=1, JM=3, IK=4
+            lhs_qr_qv -= lhs_qr_ql * lhs_ql_qv  # JN=1, JM=3, IK=0
+            lhs_qs_ql /= lhs_ql_ql  #             JN=1, JM=4
+            lhs_qs_qi -= lhs_qs_ql * lhs_ql_qi  # JN=1, JM=4, IK=2
+            lhs_qs_qr -= lhs_qs_ql * lhs_ql_qr  # JN=1, JM=4, IK=3
+            lhs_qs_qs -= lhs_qs_ql * lhs_ql_qs  # JN=1, JM=4, IK=4
+            lhs_qs_qv -= lhs_qs_ql * lhs_ql_qv  # JN=1, JM=4, IK=0
+            lhs_qv_ql /= lhs_ql_ql  #             JN=1, JM=0
+            lhs_qv_qi -= lhs_qv_ql * lhs_ql_qi  # JN=1, JM=0, IK=2
+            lhs_qv_qr -= lhs_qv_ql * lhs_ql_qr  # JN=1, JM=0, IK=3
+            lhs_qv_qs -= lhs_qv_ql * lhs_ql_qs  # JN=1, JM=0, IK=4
+            lhs_qv_qv -= lhs_qv_ql * lhs_ql_qv  # JN=1, JM=0, IK=0
+            lhs_qr_qi /= lhs_qi_qi  #             JN=2, JM=3
+            lhs_qr_qr -= lhs_qr_qi * lhs_qi_qr  # JN=2, JM=3, IK=3
+            lhs_qr_qs -= lhs_qr_qi * lhs_qi_qs  # JN=2, JM=3, IK=4
+            lhs_qr_qv -= lhs_qr_qi * lhs_qi_qv  # JN=2, JM=3, IK=0
+            lhs_qs_qi /= lhs_qi_qi  #             JN=2, JM=4
+            lhs_qs_qr -= lhs_qs_qi * lhs_qi_qr  # JN=2, JM=4, IK=3
+            lhs_qs_qs -= lhs_qs_qi * lhs_qi_qs  # JN=2, JM=4, IK=4
+            lhs_qs_qv -= lhs_qs_qi * lhs_qi_qv  # JN=2, JM=4, IK=0
+            lhs_qv_qi /= lhs_qi_qi  #             JN=2, JM=0
+            lhs_qv_qr -= lhs_qv_qi * lhs_qi_qr  # JN=2, JM=0, IK=3
+            lhs_qv_qs -= lhs_qv_qi * lhs_qi_qs  # JN=2, JM=0, IK=4
+            lhs_qv_qv -= lhs_qv_qi * lhs_qi_qv  # JN=2, JM=0, IK=0
+            lhs_qs_qr /= lhs_qr_qr  #             JN=3, JM=4
+            lhs_qs_qs -= lhs_qs_qr * lhs_qr_qs  # JN=3, JM=4, IK=4
+            lhs_qs_qv -= lhs_qs_qr * lhs_qr_qv  # JN=3, JM=4, IK=0
+            lhs_qv_qr /= lhs_qr_qr  #             JN=3, JM=0
+            lhs_qv_qs -= lhs_qv_qr * lhs_qr_qs  # JN=3, JM=0, IK=4
+            lhs_qv_qv -= lhs_qv_qr * lhs_qr_qv  # JN=3, JM=0, IK=0
+            lhs_qv_qs /= lhs_qs_qs  #             JN=4, JM=0
+            lhs_qv_qv -= lhs_qv_qs * lhs_qs_qv  # JN=4, JM=0, IK=0
+
+            # backsubstitution: step 1
+            qin -= lhs_qi_ql * qln
+            qrn -= lhs_qr_ql * qln + lhs_qr_qi * qin
+            qsn -= lhs_qs_ql * qln + lhs_qs_qi * qin + lhs_qs_qr * qrn
+            qvn -= lhs_qv_ql * qln + lhs_qv_qi * qin + lhs_qv_qr * qrn + lhs_qv_qs * qsn
+
+            # backsubstitution: step 2
+            qvn /= lhs_qv_qv
+            qsn -= lhs_qs_qv * qvn
+            qsn /= lhs_qs_qs
+            qrn -= lhs_qr_qs * qsn + lhs_qr_qv * qvn
+            qrn /= lhs_qr_qr
+            qin -= lhs_qi_qr * qrn + lhs_qi_qs * qsn + lhs_qi_qv * qvn
+            qin /= lhs_qi_qi
+            qln -= lhs_ql_qi * qin + lhs_ql_qr * qrn + lhs_ql_qs * qsn + lhs_ql_qv * qvn
+            qln /= lhs_ql_ql
+
+            # ensure no small values (including negatives) remain in cloud variables
+            # nor precipitation rates
+            if qln < EPSEC:
+                qvn += qln
+                qln = 0.0
+            if qin < EPSEC:
+                qvn += qin
+                qin = 0.0
+            if qrn < EPSEC:
+                qvn += qrn
+                qrn = 0.0
+            if qsn < EPSEC:
+                qvn += qsn
+                qsn = 0.0
+
+            # *** 5.3: precipitation/sedimentation fluxes to next level diagnostic precipitation fluxes
+            pfplsl = fallsink_ql * qln * rdtgdp
+            pfplsi = fallsink_qi * qin * rdtgdp
+            pfplsr = fallsink_qr * qrn * rdtgdp
+            pfplss = fallsink_qs * qsn * rdtgdp
+            pfplsv = fallsink_qv * qvn * rdtgdp
+
+            # ensure precipitation fraction is zero if no precipitation
+            qpretot = pfplss + pfplsr
+            if qpretot < EPSEC:
+                tmp_covptot[0, 0] = 0.0
+
+            # === 6: update tendencies
+            # *** 6.1: temperature and CLV budgets
+            flux_ql = psupsatsrce_ql + convsrce_ql + fallsrce_ql - (fallsink_ql + convsink_ql) * qln
+            if __INLINED(PHASEQL == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (qln - ql - flux_ql) / dt
+            if __INLINED(PHASEQL == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (qln - ql - flux_ql) / dt
+            out_tnd_loc_ql[0, 0, 0] += (qln - ql0) / dt
+
+            flux_qi = psupsatsrce_qi + convsrce_qi + fallsrce_qi - (fallsink_qi + convsink_qi) * qin
+            if __INLINED(PHASEQI == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (qin - qi - flux_qi) / dt
+            if __INLINED(PHASEQI == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (qin - qi - flux_qi) / dt
+            out_tnd_loc_qi[0, 0, 0] += (qin - qi0) / dt
+
+            flux_qr = psupsatsrce_qr + convsrce_qr + fallsrce_qr - (fallsink_qr + convsink_qr) * qrn
+            if __INLINED(PHASEQR == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (qrn - qr - flux_qr) / dt
+            if __INLINED(PHASEQR == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (qrn - qr - flux_qr) / dt
+            out_tnd_loc_qr[0, 0, 0] += (qrn - qr0) / dt
+
+            flux_qs = psupsatsrce_qs + convsrce_qs + fallsrce_qs - (fallsink_qs + convsink_qs) * qsn
+            if __INLINED(PHASEQS == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (qsn - qs - flux_qs) / dt
+            if __INLINED(PHASEQS == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (qsn - qs - flux_qs) / dt
+            out_tnd_loc_qs[0, 0, 0] += (qsn - qs0) / dt
+
+            # *** 6.2: humidity budget
+            out_tnd_loc_qv[0, 0, 0] += (qvn - qv) / dt
+
+            # *** 6.3: cloud cover
+            out_tnd_loc_a[0, 0, 0] += da / dt
+
+            # --- copy precipitation fraction into output variable
+            out_covptot[0, 0, 0] = tmp_covptot[0, 0]
+
+    # === 7: flux/diagnostics computations
+    with computation(FORWARD):
+        with interval(0, 1):
+            out_fplsl[0, 0, 0] = 0.0
+            out_fplsn[0, 0, 0] = 0.0
+            out_fhpsl[0, 0, 0] = 0.0
+            out_fhpsn[0, 0, 0] = 0.0
+            out_fsqlf[0, 0, 0] = 0.0
+            out_fsqif[0, 0, 0] = 0.0
+            out_fsqrf[0, 0, 0] = 0.0
+            out_fsqsf[0, 0, 0] = 0.0
+            out_fcqlng[0, 0, 0] = 0.0
+            out_fcqnng[0, 0, 0] = 0.0
+            out_fcqrng[0, 0, 0] = 0.0
+            out_fcqsng[0, 0, 0] = 0.0
+            out_fsqltur[0, 0, 0] = 0.0
+            out_fsqitur[0, 0, 0] = 0.0
+        with interval(1, None):
+            # --- copy general precip arrays back info PFP arrays for GRIB archiving
+            out_fplsl[0, 0, 0] = pfplsr[0, 0, -1] + pfplsl[0, 0, -1]
+            out_fplsn[0, 0, 0] = pfplss[0, 0, -1] + pfplsi[0, 0, -1]
+
+            # --- enthalpy flux due to precipitation
+            out_fhpsl[0, 0, 0] = -RLVTT * out_fplsl[0, 0, 0]
+            out_fhpsn[0, 0, 0] = -RLSTT * out_fplsn[0, 0, 0]
+
+            gdph_r = -(in_aph[0, 0, 0] - in_aph[0, 0, -1]) / (RG * dt)
+            out_fsqlf[0, 0, 0] = out_fsqlf[0, 0, -1]
+            out_fsqif[0, 0, 0] = out_fsqif[0, 0, -1]
+            out_fsqrf[0, 0, 0] = out_fsqlf[0, 0, -1]
+            out_fsqsf[0, 0, 0] = out_fsqif[0, 0, -1]
+            out_fcqlng[0, 0, 0] = out_fcqlng[0, 0, -1]
+            out_fcqnng[0, 0, 0] = out_fcqnng[0, 0, -1]
+            out_fcqrng[0, 0, 0] = out_fcqlng[0, 0, -1]
+            out_fcqsng[0, 0, 0] = out_fcqnng[0, 0, -1]
+            out_fsqltur[0, 0, 0] = out_fsqltur[0, 0, -1]
+            out_fsqitur[0, 0, 0] = out_fsqitur[0, 0, -1]
+
+            # liquid, LS scheme minus detrainment
+            out_fsqlf[0, 0, 0] += (
+                qln[0, 0, -1]
+                - ql0[0, 0, -1]
+                + in_vfl[0, 0, -1] * dt
+                - foealfa[0, 0, -1] * lude[0, 0, -1]
+            ) * gdph_r
+            # liquid, negative numbers
+            out_fcqlng[0, 0, 0] += lneg_ql[0, 0, -1] * gdph_r
+            # liquid, vertical diffusion
+            out_fsqltur[0, 0, 0] += in_vfl[0, 0, -1] * dt * gdph_r
+
+            # rain, LS scheme
+            out_fsqrf[0, 0, 0] += (qrn[0, 0, -1] - qr0[0, 0, -1]) * gdph_r
+            # rain, negative numbers
+            out_fcqrng[0, 0, 0] += lneg_qr[0, 0, -1] * gdph_r
+
+            # ice, LS scheme minus detrainment
+            out_fsqif[0, 0, 0] += (
+                qin[0, 0, -1]
+                - qi0[0, 0, -1]
+                + in_vfi[0, 0, -1] * dt
+                - (1 - foealfa[0, 0, -1]) * lude[0, 0, -1]
+            ) * gdph_r
+            # ice, negative numbers
+            out_fcqnng[0, 0, 0] += lneg_qi[0, 0, -1] * gdph_r
+            # ice, vertical diffusion
+            out_fsqitur[0, 0, 0] += in_vfi[0, 0, -1] * dt * gdph_r
+
+            # snow, LS scheme
+            out_fsqsf[0, 0, 0] += (qsn[0, 0, -1] - qs0[0, 0, -1]) * gdph_r
+            # snow, negative numbers
+            out_fcqsng[0, 0, 0] += lneg_qs[0, 0, -1] * gdph_r
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py
new file mode 100644
index 00000000..0a7d27c0
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py
@@ -0,0 +1,2279 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+
+from gt4py.gtscript import Field, IJ, K
+
+from cloudsc4py.framework.stencil import stencil_collection
+from cloudsc4py.physics._stencils.cuadjtq import f_cuadjtq
+from cloudsc4py.physics._stencils.fccld import f_fokoop
+from cloudsc4py.physics._stencils.fcttre import (
+    f_foealfa,
+    f_foedelta,
+    f_foedem,
+    f_foeeice,
+    f_foeeliq,
+    f_foeewm,
+    f_foeldcpm,
+)
+from cloudsc4py.physics._stencils.helpers import f_helper_0, f_helper_1
+
+
+@stencil_collection("cloudsc_tendencies")
+def cloudsc_tendencies(
+    in_a: Field["float"],
+    in_ap: Field["float"],
+    in_aph: Field["float"],  # staggered
+    in_ccn: Field["float"],
+    in_convection_on: Field[IJ, "bool"],
+    in_convection_type: Field[IJ, "int"],
+    in_hrlw: Field["float"],
+    in_hrsw: Field["float"],
+    in_icrit_aer: Field["float"],
+    in_lcrit_aer: Field["float"],
+    in_lsm: Field[IJ, "float"],
+    in_lu: Field["float"],
+    in_lude: Field["float"],
+    in_mfd: Field["float"],
+    in_mfu: Field["float"],
+    in_nice: Field["float"],
+    in_qi: Field["float"],
+    in_ql: Field["float"],
+    in_qr: Field["float"],
+    in_qs: Field["float"],
+    in_qv: Field["float"],
+    in_re_ice: Field["float"],
+    in_snde: Field["float"],
+    in_supsat: Field["float"],
+    in_t: Field["float"],
+    in_tnd_tmp_a: Field["float"],
+    in_tnd_tmp_qi: Field["float"],
+    in_tnd_tmp_ql: Field["float"],
+    in_tnd_tmp_qr: Field["float"],
+    in_tnd_tmp_qs: Field["float"],
+    in_tnd_tmp_qv: Field["float"],
+    in_tnd_tmp_t: Field["float"],
+    in_w: Field["float"],
+    out_covptot: Field["float"],
+    out_foealfa: Field["float"],
+    out_lneg_qi: Field["float"],
+    out_lneg_ql: Field["float"],
+    out_lneg_qr: Field["float"],
+    out_lneg_qs: Field["float"],
+    out_lude: Field["float"],
+    out_pfplsi: Field["float"],
+    out_pfplsl: Field["float"],
+    out_pfplsr: Field["float"],
+    out_pfplss: Field["float"],
+    out_qi0: Field["float"],
+    out_qin: Field["float"],
+    out_ql0: Field["float"],
+    out_qln: Field["float"],
+    out_qr0: Field["float"],
+    out_qrn: Field["float"],
+    out_qs0: Field["float"],
+    out_qsn: Field["float"],
+    out_rainfrac_toprfz: Field[IJ, "float"],
+    out_tnd_loc_a: Field["float"],
+    out_tnd_loc_qi: Field["float"],
+    out_tnd_loc_ql: Field["float"],
+    out_tnd_loc_qr: Field["float"],
+    out_tnd_loc_qs: Field["float"],
+    out_tnd_loc_qv: Field["float"],
+    out_tnd_loc_t: Field["float"],
+    tmp_aph_s: Field[IJ, "float"],
+    tmp_cldtopdist: Field[IJ, "float"],
+    tmp_covpmax: Field[IJ, "float"],
+    tmp_covptot: Field[IJ, "float"],
+    tmp_klevel: Field[K, "int"],
+    tmp_paphd: Field[IJ, "float"],
+    tmp_rainliq: Field[IJ, "bool"],
+    tmp_trpaus: Field[IJ, "float"],
+    *,
+    dt: "float",
+):
+    from __externals__ import (
+        DEPICE,
+        EPSEC,
+        EPSILON,
+        EVAPRAIN,
+        EVAPSNOW,
+        FALLQI,
+        FALLQL,
+        FALLQR,
+        FALLQS,
+        FALLQV,
+        LAERICEAUTO,
+        LAERICESED,
+        LAERLIQAUTOLSP,
+        LAERLIQCOLL,
+        NCLDTOP,
+        NLEV,
+        NSSOPT,
+        PHASEQI,
+        PHASEQL,
+        PHASEQR,
+        PHASEQS,
+        PHASEQV,
+        R4IES,
+        R4LES,
+        R5IES,
+        R5LES,
+        RALFDCP,
+        RALSDCP,
+        RALVDCP,
+        RAMID,
+        RAMIN,
+        RCCN,
+        RCL_APB1,
+        RCL_APB2,
+        RCL_APB3,
+        RCL_CDENOM1,
+        RCL_CDENOM2,
+        RCL_CDENOM3,
+        RCL_CONST1I,
+        RCL_CONST1R,
+        RCL_CONST1S,
+        RCL_CONST2I,
+        RCL_CONST2R,
+        RCL_CONST2S,
+        RCL_CONST3I,
+        RCL_CONST3R,
+        RCL_CONST3S,
+        RCL_CONST4I,
+        RCL_CONST4R,
+        RCL_CONST4S,
+        RCL_CONST5I,
+        RCL_CONST5R,
+        RCL_CONST5S,
+        RCL_CONST6I,
+        RCL_CONST6R,
+        RCL_CONST6S,
+        RCL_CONST7S,
+        RCL_CONST8S,
+        RCL_FAC1,
+        RCL_FAC2,
+        RCL_FZRAB,
+        RCL_KK_cloud_num_land,
+        RCL_KK_cloud_num_sea,
+        RCL_KKAac,
+        RCL_KKAau,
+        RCL_KKBac,
+        RCL_KKBaun,
+        RCL_KKBauq,
+        RCLCRIT_LAND,
+        RCLCRIT_SEA,
+        RCLDIFF,
+        RCLDIFF_CONVI,
+        RCLDTOPCF,
+        RCOVPMIN,
+        RD,
+        RDCP,
+        RDENSREF,
+        RDEPLIQREFDEPTH,
+        RDEPLIQREFRATE,
+        RETV,
+        RG,
+        RICEINIT,
+        RKCONV,
+        RKOOPTAU,
+        RLCRITSNOW,
+        RLDCP,
+        RLMIN,
+        RLSTT,
+        RLVTT,
+        RNICE,
+        RPECONS,
+        RPRC1,
+        RPRECRHMAX,
+        RSNOWLIN1,
+        RSNOWLIN2,
+        RTAUMEL,
+        RTHOMO,
+        RTT,
+        RV,
+        RVRFACTOR,
+        TW1,
+        TW2,
+        TW3,
+        TW4,
+        TW5,
+        VQI,
+        VQL,
+        VQR,
+        VQS,
+        VQV,
+        WARMRAIN,
+    )
+
+    with computation(FORWARD), interval(0, 1):
+        # zero arrays
+        out_rainfrac_toprfz[0, 0] = 0.0
+        tmp_cldtopdist[0, 0] = 0.0
+        tmp_covpmax[0, 0] = 0.0
+        tmp_covptot[0, 0] = 0.0
+        tmp_paphd[0, 0] = 0.0
+        tmp_rainliq[0, 0] = True
+        tmp_trpaus[0, 0] = 0.0
+
+    with computation(FORWARD), interval(...):
+        # === 1: initial values for variables
+        # --- initialization of output tendencies
+        out_tnd_loc_t[0, 0, 0] = 0
+        out_tnd_loc_a[0, 0, 0] = 0
+        out_tnd_loc_ql[0, 0, 0] = 0
+        out_tnd_loc_qr[0, 0, 0] = 0
+        out_tnd_loc_qi[0, 0, 0] = 0
+        out_tnd_loc_qs[0, 0, 0] = 0
+        out_tnd_loc_qv[0, 0, 0] = 0
+
+        # --- non CLV initialization
+        t = in_t[0, 0, 0] + dt * in_tnd_tmp_t[0, 0, 0]
+        a = in_a[0, 0, 0] + dt * in_tnd_tmp_a[0, 0, 0]
+        a0 = a
+
+        # --- initialization for CLV family
+        ql = in_ql[0, 0, 0] + dt * in_tnd_tmp_ql[0, 0, 0]
+        out_ql0[0, 0, 0] = ql
+        qi = in_qi[0, 0, 0] + dt * in_tnd_tmp_qi[0, 0, 0]
+        out_qi0[0, 0, 0] = qi
+        qr = in_qr[0, 0, 0] + dt * in_tnd_tmp_qr[0, 0, 0]
+        out_qr0[0, 0, 0] = qr
+        qs = in_qs[0, 0, 0] + dt * in_tnd_tmp_qs[0, 0, 0]
+        out_qs0[0, 0, 0] = qs
+        qv = in_qv[0, 0, 0] + dt * in_tnd_tmp_qv[0, 0, 0]
+
+        # --- zero arrays
+        out_lneg_ql[0, 0, 0] = 0.0
+        out_lneg_qi[0, 0, 0] = 0.0
+        out_lneg_qr[0, 0, 0] = 0.0
+        out_lneg_qs[0, 0, 0] = 0.0
+
+        # --- tidy up very small cloud cover or total cloud water
+        expr1 = ql + qi
+        if expr1 < RLMIN or a < RAMIN:
+            # evaporate small cloud liquid water amounts
+            out_lneg_ql[0, 0, 0] += ql
+            qadj = ql / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            qv += ql
+            ql = 0.0
+
+            # evaporate small cloud ice water amounts
+            out_lneg_qi[0, 0, 0] += qi
+            qadj = qi / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qi
+            qi = 0.0
+
+            # set cloud cover to zero
+            a = 0.0
+
+        # --- tidy up small CLV variables: ql
+        if ql < RLMIN:
+            out_lneg_ql[0, 0, 0] += ql
+            qadj = ql / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQL == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQL == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += ql
+            ql = 0.0
+
+        # --- tidy up small CLV variables: qi
+        if qi < RLMIN:
+            out_lneg_qi[0, 0, 0] += qi
+            qadj = qi / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQI == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQI == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qi
+            qi = 0.0
+
+        # --- tidy up small CLV variables: qr
+        if qr < RLMIN:
+            out_lneg_qr[0, 0, 0] += qr
+            qadj = qr / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQR == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQR == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qr
+            qr = 0.0
+
+        # --- tidy up small CLV variables: qs
+        if qs < RLMIN:
+            out_lneg_qs[0, 0, 0] += qs
+            qadj = qs / dt
+            out_tnd_loc_qv[0, 0, 0] += qadj
+            if __INLINED(PHASEQS == 1):
+                out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj
+            elif __INLINED(PHASEQS == 2):
+                out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj
+            qv += qs
+            qs = 0.0
+
+        # --- define saturation values
+        # --- old *diagnostic* mixed phase saturation
+        foealfa = f_foealfa(t)
+        out_foealfa[0, 0, 0] = foealfa
+        foeewmt = min(f_foeewm(t) / in_ap[0, 0, 0], 0.5)
+        qsmix = foeewmt / (1 - RETV * foeewmt)
+
+        # --- ice saturation T < 273K
+        # --- liquid water saturation for T > 273K
+        alfa = f_foedelta(t)
+        foeew = min((alfa * f_foeeliq(t) + (1 - alfa) * f_foeeice(t)) / in_ap[0, 0, 0], 0.5)
+        qsice = foeew / (1 - RETV * foeew)
+
+        # --- liquid water saturation
+        foeeliqt = min(f_foeeliq(t) / in_ap[0, 0, 0], 0.5)
+        qsliq = foeeliqt / (1 - RETV * foeeliqt)
+
+        # --- ensure cloud fraction is between 0 and 1
+        a = max(0, min(1, a))
+
+        # --- calculate liq/ice fractions (no longer a diagnostic relationship)
+        li = ql + qi
+        if li > RLMIN:
+            liqfrac = ql / li
+            icefrac = 1 - liqfrac
+        else:
+            liqfrac = 0.0
+            icefrac = 0.0
+
+    # === 2: constants and parameters
+    # --- find tropopause level
+    with computation(FORWARD), interval(0, 1):
+        tmp_trpaus[0, 0] = 0.1
+        tmp_paphd[0, 0] = 1 / tmp_aph_s[0, 0]
+    with computation(FORWARD), interval(0, -1):
+        sig = in_ap[0, 0, 0] * tmp_paphd[0, 0]
+        if sig > 0.1 and sig < 0.4 and t[0, 0, 0] > t[0, 0, 1]:
+            tmp_trpaus[0, 0] = sig
+
+    # === 3: physics
+    # --- main vertical loop
+    with computation(FORWARD):
+        with interval(0, NCLDTOP - 1):
+            # --- initialize variables
+            out_lude[0, 0, 0] = in_lude[0, 0, 0]
+            out_pfplsl[0, 0, 0] = 0.0
+            out_pfplsi[0, 0, 0] = 0.0
+            out_pfplsr[0, 0, 0] = 0.0
+            out_pfplss[0, 0, 0] = 0.0
+            pfplsv = 0.0
+            out_qln[0, 0, 0] = 0.0
+            out_qin[0, 0, 0] = 0.0
+            out_qrn[0, 0, 0] = 0.0
+            out_qsn[0, 0, 0] = 0.0
+            qvn = 0.0
+            anew = 0.0
+        with interval(NCLDTOP - 1, None):
+            # *** 3.0: initialize variables
+            # --- first guess microphysics
+            qlfg = ql
+            qifg = qi
+            qrfg = qr
+            qsfg = qs
+            qvfg = qv
+
+            convsink_ql = 0.0
+            convsink_qi = 0.0
+            convsink_qr = 0.0
+            convsink_qs = 0.0
+            convsrce_ql = 0.0
+            convsrce_qi = 0.0
+            convsrce_qr = 0.0
+            convsrce_qs = 0.0
+            convsrce_qv = 0.0
+            fallsrce_ql = 0.0
+            fallsrce_qi = 0.0
+            fallsrce_qr = 0.0
+            fallsrce_qs = 0.0
+            index1_ql = True
+            index1_qi = True
+            index1_qr = True
+            index1_qs = True
+            index1_qv = True
+            index3_ql_ql = False
+            index3_ql_qi = False
+            index3_ql_qr = False
+            index3_ql_qs = False
+            index3_ql_qv = False
+            index3_qi_ql = False
+            index3_qi_qi = False
+            index3_qi_qr = False
+            index3_qi_qs = False
+            index3_qi_qv = False
+            index3_qr_ql = False
+            index3_qr_qi = False
+            index3_qr_qr = False
+            index3_qr_qs = False
+            index3_qr_qv = False
+            index3_qs_ql = False
+            index3_qs_qi = False
+            index3_qs_qr = False
+            index3_qs_qs = False
+            index3_qs_qv = False
+            index3_qv_ql = False
+            index3_qv_qi = False
+            index3_qv_qr = False
+            index3_qv_qs = False
+            index3_qv_qv = False
+            lcust_ql = 0.0
+            lcust_qi = 0.0
+            lcust_qr = 0.0
+            lcust_qs = 0.0
+            lcust_qv = 0.0
+            ldefr = 0.0
+            lfinalsum = 0.0
+            order_ql = -999
+            order_qi = -999
+            order_qr = -999
+            order_qs = -999
+            order_qv = -999
+            psupsatsrce_ql = 0.0
+            psupsatsrce_qi = 0.0
+            psupsatsrce_qr = 0.0
+            psupsatsrce_qs = 0.0
+            qpretot = 0.0
+            solab = 0.0
+            solac = 0.0
+            solqa_ql_ql = 0.0
+            solqa_ql_qi = 0.0
+            solqa_ql_qr = 0.0
+            solqa_ql_qs = 0.0
+            solqa_ql_qv = 0.0
+            solqa_qi_ql = 0.0
+            solqa_qi_qi = 0.0
+            solqa_qi_qr = 0.0
+            solqa_qi_qs = 0.0
+            solqa_qi_qv = 0.0
+            solqa_qr_ql = 0.0
+            solqa_qr_qi = 0.0
+            solqa_qr_qr = 0.0
+            solqa_qr_qs = 0.0
+            solqa_qr_qv = 0.0
+            solqa_qs_ql = 0.0
+            solqa_qs_qi = 0.0
+            solqa_qs_qr = 0.0
+            solqa_qs_qs = 0.0
+            solqa_qs_qv = 0.0
+            solqa_qv_ql = 0.0
+            solqa_qv_qi = 0.0
+            solqa_qv_qr = 0.0
+            solqa_qv_qs = 0.0
+            solqa_qv_qv = 0.0
+            solqb_ql_ql = 0.0
+            solqb_ql_qi = 0.0
+            solqb_ql_qr = 0.0
+            solqb_ql_qs = 0.0
+            solqb_ql_qv = 0.0
+            solqb_qi_ql = 0.0
+            solqb_qi_qi = 0.0
+            solqb_qi_qr = 0.0
+            solqb_qi_qs = 0.0
+            solqb_qi_qv = 0.0
+            solqb_qr_ql = 0.0
+            solqb_qr_qi = 0.0
+            solqb_qr_qr = 0.0
+            solqb_qr_qs = 0.0
+            solqb_qr_qv = 0.0
+            solqb_qs_ql = 0.0
+            solqb_qs_qi = 0.0
+            solqb_qs_qr = 0.0
+            solqb_qs_qs = 0.0
+            solqb_qs_qv = 0.0
+            solqb_qv_ql = 0.0
+            solqb_qv_qi = 0.0
+            solqb_qv_qr = 0.0
+            solqb_qv_qs = 0.0
+            solqb_qv_qv = 0.0
+
+            # derived variables needed
+            dp = in_aph[0, 0, 1] - in_aph[0, 0, 0]
+            gdp = RG / dp
+            rho = in_ap[0, 0, 0] / (RD * t)
+            dtgdp = dt * gdp
+            rdtgdp = dp / (RG * dt)
+
+            # --- calculate dqs/dT correction factor
+            # liquid
+            facw = R5LES / (t - R4LES) ** 2
+            cor = 1 / (1 - RETV * foeeliqt)
+            dqsliqdt = facw * cor * qsliq
+            corqsliq = 1 + RALVDCP * dqsliqdt
+
+            # ice
+            faci = R5IES / (t - R4IES) ** 2
+            cor = 1 / (1 - RETV * foeew)
+            dqsicedt = faci * cor * qsice
+            corqsice = 1 + RALSDCP * dqsicedt
+
+            # diagnostic mixed
+            fac = out_foealfa[0, 0, 0] * facw + (1 - out_foealfa[0, 0, 0]) * faci
+            cor = 1 / (1 - RETV * foeewmt)
+            dqsmixdt = fac * cor * qsmix
+            corqsmix = 1 + f_foeldcpm(t) * dqsmixdt
+
+            # evaporation/sublimation limits
+            evaplimmix = max((qsmix - qv) / corqsmix, 0.0)
+            evaplimice = max((qsice - qv) / corqsice, 0.0)
+
+            # --- in-cloud condensate amount
+            tmpa = 1 / max(a, EPSEC)
+            liqcld = ql * tmpa
+            icecld = qi * tmpa
+            licld = liqcld + icecld
+
+            # --- evaporate very small amounts of liquid...
+            if ql < RLMIN:
+                solqa_qv_ql += ql
+                solqa_ql_qv -= ql
+
+            # --- ...and ice
+            if qi < RLMIN:
+                solqa_qv_qi += qi
+                solqa_qi_qv -= qi
+
+            # *** 3.1: ice supersaturation adjustment
+            # --- supersaturation limit (from Koop)
+            fokoop = f_fokoop(t)
+
+            if t >= RTT or NSSOPT == 0:
+                fac = 1.0
+                faci = 1.0
+            else:
+                fac = a + fokoop * (1 - a)
+                faci = dt / RKOOPTAU
+
+            # calculate supersaturation to add to cloud
+            if a > 1 - RAMIN:
+                supsat = max((qv - fac * qsice) / corqsice, 0.0)
+            else:
+                # calculate environmental humidity supersaturation
+                qp1env = (qv - a * qsice) / max(1 - a, EPSILON)
+                supsat = max((1 - a) * (qp1env - fac * qsice) / corqsice, 0.0)
+
+            # --- here the supersaturation is turned into liquid water
+            if supsat > EPSEC:
+                if t > RTHOMO:
+                    # turn supersaturation into liquid water
+                    solqa_ql_qv += supsat
+                    solqa_qv_ql -= supsat
+                    # include liquid in first guess
+                    qlfg += supsat
+                else:
+                    # turn supersaturation into ice water
+                    solqa_qi_qv += supsat
+                    solqa_qv_qi -= supsat
+                    # add ice to first guess for deposition term
+                    qifg += supsat
+
+                # increase cloud amount using RKOOPTAU timescale
+                solac = (1 - a) * faci
+
+            # --- include supersaturation from previous timestep
+            if in_supsat[0, 0, 0] > EPSEC:
+                if t > RTHOMO:
+                    # turn supersaturation into liquid water
+                    solqa_ql_ql += in_supsat[0, 0, 0]
+                    psupsatsrce_ql = in_supsat[0, 0, 0]
+                    # add liquid to first guess for deposition term
+                    qlfg += in_supsat[0, 0, 0]
+                else:
+                    # turn supersaturation into ice water
+                    solqa_qi_qi += in_supsat[0, 0, 0]
+                    psupsatsrce_qi = in_supsat[0, 0, 0]
+                    # add ice to first guess for deposition term
+                    qifg += in_supsat[0, 0, 0]
+
+                # increase cloud amount using RKOOPTAU timescale
+                solac = (1 - a) * faci
+
+            # *** 3.2: detrainment from convection
+            if tmp_klevel[0] < NLEV - 1:
+                out_lude[0, 0, 0] = in_lude[0, 0, 0] * dtgdp
+
+                if in_convection_on[0, 0] and out_lude[0, 0, 0] > RLMIN and in_lu[0, 0, 1] > EPSEC:
+                    solac += out_lude[0, 0, 0] / in_lu[0, 0, 1]
+                    # diagnostic temperature split
+                    convsrce_ql = out_foealfa[0, 0, 0] * out_lude[0, 0, 0]
+                    convsrce_qi = (1 - out_foealfa[0, 0, 0]) * out_lude[0, 0, 0]
+                    solqa_ql_ql += convsrce_ql
+                    solqa_qi_qi += convsrce_qi
+                else:
+                    out_lude[0, 0, 0] = 0.0
+
+                # convective snow detrainment source
+                if in_convection_on[0, 0]:
+                    solqa_qs_qs += in_snde[0, 0, 0] * dtgdp
+            else:
+                out_lude[0, 0, 0] = in_lude[0, 0, 0]
+
+            # *** 3.3: subsidence compensating convective updraughts
+            # --- subsidence source from layer above and evaporation of cloud within the layer
+            if tmp_klevel[0] > NCLDTOP - 1:
+                mf = max(0.0, (in_mfu + in_mfd) * dtgdp)
+                acust = mf * anew[0, 0, -1]
+
+                if __INLINED(not FALLQL and PHASEQL > 0):
+                    lcust_ql = mf * out_qln[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_ql += lcust_ql
+
+                if __INLINED(not FALLQI and PHASEQI > 0):
+                    lcust_qi = mf * out_qin[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qi += lcust_qi
+
+                if __INLINED(not FALLQR and PHASEQR > 0):
+                    lcust_qr = mf * out_qrn[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qr += lcust_qr
+
+                if __INLINED(not FALLQS and PHASEQS > 0):
+                    lcust_qs = mf * out_qsn[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qs += lcust_qs
+
+                if __INLINED(not FALLQV and PHASEQV > 0):
+                    lcust_qv = mf * qvn[0, 0, -1]
+                    # record total flux for enthalpy budget
+                    convsrce_qv += lcust_qv
+
+                # work out how much liquid evaporates at arrival point
+                dtdp = RDCP * 0.5 * (t[0, 0, -1] + t[0, 0, 0]) / in_aph[0, 0, 0]
+                dtforc = dtdp[0, 0, 0] * (in_ap[0, 0, 0] - in_ap[0, 0, -1])
+                dqs = anew[0, 0, -1] * dtforc * dqsmixdt
+
+                if __INLINED(not FALLQL and PHASEQL > 0):
+                    lfinal = max(0.0, lcust_ql - dqs)
+                    evap = min(lcust_ql - lfinal, evaplimmix)
+                    lfinal = lcust_ql - evap
+                    lfinalsum += lfinal
+                    solqa_ql_ql += lcust_ql
+                    solqa_qv_ql += evap
+                    solqa_ql_qv -= evap
+
+                if __INLINED(not FALLQI and PHASEQI > 0):
+                    lfinal = max(0.0, lcust_qi - dqs)
+                    evap = min(lcust_qi - lfinal, evaplimmix)
+                    lfinal = lcust_qi - evap
+                    lfinalsum += lfinal
+                    solqa_qi_qi += lcust_qi
+                    solqa_qv_qi += evap
+                    solqa_qi_qv -= evap
+
+                if __INLINED(not FALLQR and PHASEQR > 0):
+                    lfinal = max(0.0, lcust_qr - dqs)
+                    evap = min(lcust_qr - lfinal, evaplimmix)
+                    lfinal = lcust_qr - evap
+                    lfinalsum += lfinal
+                    solqa_qr_qr += lcust_qr
+                    solqa_qv_qr += evap
+                    solqa_qr_qv -= evap
+
+                if __INLINED(not FALLQS and PHASEQS > 0):
+                    lfinal = max(0.0, lcust_qs - dqs)
+                    evap = min(lcust_qs - lfinal, evaplimmix)
+                    lfinal = lcust_qs - evap
+                    lfinalsum += lfinal
+                    solqa_qs_qs += lcust_qs
+                    solqa_qv_qs += evap
+                    solqa_qs_qv -= evap
+
+                if __INLINED(not FALLQV and PHASEQV > 0):
+                    lfinal = max(0.0, lcust_qv - dqs)
+                    evap = min(lcust_qv - lfinal, evaplimmix)
+                    lfinal = lcust_qv - evap
+                    lfinalsum += lfinal
+                    solqa_qv_qv += lcust_qv
+
+                # reset the cloud contribution if no cloud water survives to this level
+                if lfinalsum < EPSEC:
+                    acust = 0.0
+                solac += acust
+
+            # --- subsidence sink of cloud to the layer below
+            if tmp_klevel[0] < NLEV - 1:
+                mfdn = max(0.0, (in_mfu[0, 0, 1] + in_mfd[0, 0, 1]) * dtgdp)
+                solab += mfdn
+                solqb_ql_ql += mfdn
+                solqb_qi_qi += mfdn
+
+                # record sink for cloud budget and enthalpy budget diagnostics
+                convsink_ql = mfdn
+                convsink_qi = mfdn
+
+            # *** 3.4: erosion of clouds by turbulent mixing
+            # --- define turbulent erosion rate
+            ldifdt = RCLDIFF * dt
+            if in_convection_type[0, 0] > 0 and out_lude[0, 0, 0] > EPSEC:
+                ldifdt *= RCLDIFF_CONVI
+
+            if li > EPSEC:
+                # calculate environmental humidity
+                e = ldifdt * max(qsmix - qv, 0.0)
+                leros = min(min(a * e, evaplimmix), li)
+                aeros = leros / licld
+
+                # erosion is -ve linear in L, A
+                solac -= aeros
+                solqa_qv_ql += liqfrac * leros
+                solqa_ql_qv -= liqfrac * leros
+                solqa_qv_qi += icefrac * leros
+                solqa_qi_qv -= icefrac * leros
+
+            # *** 3.5: condensation/evaporation due to dqsat/dT
+            dtdp = RDCP * t / in_ap[0, 0, 0]
+            dpmxdt = dp / dt
+            mfdn = in_mfu[0, 0, 1] + in_mfd[0, 0, 1] if tmp_klevel[0] < NLEV - 1 else 0.0
+            wtot = in_w[0, 0, 0] + 0.5 * RG * (in_mfu[0, 0, 0] + in_mfd[0, 0, 0] + mfdn)
+            wtot = min(dpmxdt, max(-dpmxdt, wtot))
+            zzdt = in_hrsw[0, 0, 0] + in_hrlw[0, 0, 0]
+            dtdiab = min(dpmxdt * dtdp, max(-dpmxdt * dtdp, zzdt)) * dt + RALFDCP * ldefr
+            dtforc = dtdp * wtot * dt + dtdiab
+            qold = qsmix
+            told = t
+            t = max(t + dtforc, 160.0)
+
+            qsmix, t = f_cuadjtq(in_ap, qsmix, t)
+
+            dqs = qsmix - qold
+            qsmix = qold
+            t = told
+
+            # ***: 3.5a: evaporation of clouds
+            if dqs > 0:
+                levap = min(min(a * min(dqs, licld), evaplimmix), max(qsmix - qv, 0.0))
+                solqa_qv_ql += liqfrac * levap
+                solqa_ql_qv -= liqfrac * levap
+                solqa_qv_qi += icefrac * levap
+                solqa_qi_qv -= icefrac * levap
+
+            # *** 3.5b: formation of clouds
+            # increase of cloud water in existing clouds
+            if a > EPSEC and dqs <= -RLMIN:
+                lcond1 = max(-dqs, 0.0)
+
+                # old limiter
+                if a > 0.99:
+                    cor = 1 / (1 - RETV * qsmix)
+                    cdmax = (qv - qsmix) / (1 + cor * qsmix * f_foedem(t))
+                else:
+                    cdmax = (qv - a * qsmix) / a
+
+                lcond1 = a * max(min(lcond1, cdmax), 0.0)
+                if lcond1 < RLMIN:
+                    lcond1 = 0.0
+
+                # --- all increase goes into liquid unless so cold cloud homogeneously freezes
+                if t > RTHOMO:
+                    solqa_ql_qv += lcond1
+                    solqa_qv_ql -= lcond1
+                    qlfg += lcond1
+                else:
+                    solqa_qi_qv += lcond1
+                    solqa_qv_qi -= lcond1
+                    qifg += lcond1
+
+            # generation of new clouds (da/dt > 0)
+            if dqs <= -RLMIN and a < 1 - EPSEC:
+                # --- critical relative humidity
+                rhc = RAMID
+                sigk = in_ap[0, 0, 0] / tmp_aph_s[0, 0]
+                if sigk > 0.8:
+                    rhc += (1 - RAMID) * ((sigk - 0.8) / 0.2) ** 2
+
+                # --- supersaturation options
+                if __INLINED(NSSOPT == 0):
+                    # no scheme
+                    qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a))
+                elif __INLINED(NSSOPT == 1):
+                    # Tompkins
+                    qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a))
+                elif __INLINED(NSSOPT == 2):
+                    # Lohmann and Karcher
+                    qe = qv
+                else:
+                    # Gierens
+                    qe = qv + li
+
+                if t >= RTT or NSSOPT == 0:
+                    # no ice supersaturation allowed
+                    fac = 1.0
+                else:
+                    # ice supersaturation
+                    fac = fokoop
+
+                if qe >= rhc * qsice * fac and qe < qsice * fac:
+                    acond = -(1 - a) * fac * dqs / max(2 * (fac * qsice - qe), EPSEC)
+                    acond = min(acond, 1 - a)
+
+                    # linear term
+                    lcond2 = -fac * dqs * 0.5 * acond
+
+                    # new limiter formulation
+                    zdl = 2 * (fac * qsice - qe) / max(EPSEC, 1 - a)
+                    expr2 = fac * dqs
+                    if expr2 < -zdl:
+                        lcondlim = (a - 1) * expr2 - fac * qsice + qv
+                        lcond2 = min(lcond2, lcondlim)
+                    lcond2 = max(lcond2, 0.0)
+
+                    expr10 = 1 - a
+                    if lcond2 < RLMIN or expr10 < EPSEC:
+                        lcond2 = 0.0
+                        acond = 0.0
+                    if lcond2 == 0.0:
+                        acond = 0.0
+
+                    # large-scale generation is linear in A and linear in L
+                    solac += acond
+
+                    # --- all increase goes into liquid unless so cold cloud homogeneously freezes
+                    if t > RTHOMO:
+                        solqa_ql_qv += lcond2
+                        solqa_qv_ql -= lcond2
+                        qlfg += lcond2
+                    else:  # homogeneous freezing
+                        solqa_qi_qv += lcond2
+                        solqa_qv_qi -= lcond2
+                        qifg += lcond2
+
+            # *** 3.6: growth of ice by vapour deposition
+            if __INLINED(DEPICE == 1):  # --- ice deposition following Rotstayn et al. (2001)
+                # --- calculate distance from cloud top
+                if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF:
+                    tmp_cldtopdist[0, 0] = 0.0
+                else:
+                    tmp_cldtopdist[0, 0] += dp / (rho * RG)
+
+                # --- only treat depositional growth if liquid present
+                if t < RTT and qlfg > RLMIN:
+                    vpice = f_foeeice(t) * RV / RD
+                    vpliq = vpice * fokoop
+                    icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639)
+
+                    # --- 0.024 is conductivity of air
+                    # --- 8.8 = 700 ** (1/3) = density of ice to the third
+                    add = RLSTT * (RLSTT / (RV * t) - 1) / (0.024 * t)
+                    bdd = RV * t * in_ap[0, 0, 0] / (2.21 * vpice)
+                    cvds = (
+                        7.8
+                        * (icenuclei / rho) ** 0.666
+                        * (vpliq - vpice)
+                        / (8.87 * (add + bdd) * vpice)
+                    )
+
+                    # --- RICEINIT = 1e-12 is initial mass of ice particle
+                    ice0 = max(icecld, icenuclei * RICEINIT / rho)
+
+                    # --- new value of ice
+                    inew = (0.666 * cvds * dt + ice0**0.666) ** 1.5
+
+                    # --- grid-mean deposition rate
+                    depos = max(a * (inew - ice0), 0.0)
+
+                    # --- limit deposition to liquid water amount
+                    depos = min(depos, qlfg)
+
+                    # --- at top of cloud, reduce deposition rate near cloud top
+                    infactor = min(icenuclei / 15000, 1.0)
+                    depos *= min(
+                        infactor
+                        + (1 - infactor)
+                        * (RDEPLIQREFRATE + tmp_cldtopdist[0, 0] / RDEPLIQREFDEPTH),
+                        1.0,
+                    )
+
+                    # --- add to matrix
+                    solqa_qi_ql += depos
+                    solqa_ql_qi -= depos
+                    qifg += depos
+                    qlfg -= depos
+            elif __INLINED(DEPICE == 2):  # --- ice deposition assuming ice PSD
+                # --- calculate distance from cloud top
+                if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF:
+                    tmp_cldtopdist = 0.0
+                else:
+                    tmp_cldtopdist += dp / (rho * RG)
+
+                # --- only treat depositional growth if liquid present
+                if t < RTT and qlfg > RLMIN:
+                    vpice = f_foeeice(t) * RV / RD
+                    vpliq = vpice * fokoop
+                    icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639)
+
+                    # --- RICEINIT=1e-12 is the initial mass of ice particle
+                    ice0 = max(icecld, icenuclei * RICEINIT / rho)
+
+                    # particle size distribution
+                    tcg = 1
+                    facx1i = 1
+                    apb = RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap * RCL_APB3 * t**3
+                    corrfac = (1 / rho) ** 0.5
+                    corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120)
+                    pr02 = rho * ice0 * RCL_CONST1I / (tcg * facx1i)
+                    term1 = (
+                        (vpliq - vpice)
+                        * t**2
+                        * vpice
+                        * corrfac2
+                        * tcg
+                        * RCL_CONST2I
+                        * facx1i
+                        / (rho * apb * vpice)
+                    )
+                    term2 = (
+                        0.65 * RCL_CONST6I * pr02**RCL_CONST4I
+                        + RCL_CONST3I
+                        * corrfac**0.5
+                        * rho**0.5
+                        * pr02**RCL_CONST5I
+                        / corrfac2**0.5
+                    )
+                    depos = max(a * term1 * term2 * dt, 0.0)
+
+                    # --- limit deposition to liquid water amount
+                    depos = min(depos, qlfg)
+
+                    # --- at top of cloud, reduce deposition rate near cloud top to account for
+                    # --- small scale turbulent processes
+                    infactor = min(icenuclei / 15000, 1.0)
+                    depos *= min(
+                        infactor
+                        + (1 - infactor) * (RDEPLIQREFRATE + tmp_cldtopdist / RDEPLIQREFDEPTH),
+                        1.0,
+                    )
+
+                    # --- add to matrix
+                    solqa_qi_ql += depos
+                    solqa_ql_qi -= depos
+                    qifg += depos
+                    qlfg -= depos
+
+            # === 4: precipitation processes
+            # --- revise in-cloud condensate amount
+            tmpa = 1 / max(a, EPSEC)
+            liqcld = qlfg * tmpa
+            icecld = qifg * tmpa
+
+            # *** 4.1a: sedimentation/falling of ql
+            if __INLINED(FALLQL):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_ql = out_pfplsl[0, 0, -1] * dtgdp
+                    solqa_ql_ql += fallsrce_ql
+                    qlfg += fallsrce_ql
+                    # use first guess precip
+                    qpretot += qlfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_ql = dtgdp * VQL * rho
+            else:
+                fallsink_ql = 0.0
+
+            # *** 4.1b: sedimentation/falling of qi
+            # --- source from layer above
+            if tmp_klevel[0] > NCLDTOP - 1:
+                fallsrce_qi = out_pfplsi[0, 0, -1] * dtgdp
+                solqa_qi_qi += fallsrce_qi
+                qifg += fallsrce_qi
+                # use first guess precip
+                qpretot += qifg
+
+            # --- sink to next layer, constant fall speed
+            if __INLINED(LAERICESED):
+                vqi = 0.002 * in_re_ice[0, 0, 0]
+            else:
+                vqi = VQI
+            fallsink_qi = dtgdp * vqi * rho
+
+            # *** 4.1c: sedimentation/falling of qr
+            if __INLINED(FALLQR):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_qr = out_pfplsr[0, 0, -1] * dtgdp
+                    solqa_qr_qr += fallsrce_qr
+                    qrfg += fallsrce_qr
+                    # use first guess precip
+                    qpretot += qrfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_qr = dtgdp * VQR * rho
+            else:
+                fallsink_qr = 0.0
+
+            # *** 4.1d: sedimentation/falling of qs
+            if __INLINED(FALLQS):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_qs = out_pfplss[0, 0, -1] * dtgdp
+                    solqa_qs_qs += fallsrce_qs
+                    qsfg += fallsrce_qs
+                    # use first guess precip
+                    qpretot += qsfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_qs = dtgdp * VQS * rho
+            else:
+                fallsink_qs = 0.0
+
+            # *** 4.1e: sedimentation/falling of qv
+            if __INLINED(FALLQV):
+                # --- source from layer above
+                if tmp_klevel[0] > NCLDTOP - 1:
+                    fallsrce_qv = pfplsv[0, 0, -1] * dtgdp
+                    solqa_qv_qv += fallsrce_qv
+                    qvfg += fallsrce_qv
+                    # use first guess precip
+                    qpretot += qvfg
+
+                # --- sink to next layer, constant fall speed
+                fallsink_qv = dtgdp * VQV * rho
+            else:
+                fallsink_qv = 0.0
+
+            # --- precip cover overlap using RAX-RAN Overlap
+            if qpretot > EPSEC:
+                tmp_covptot[0, 0] = 1 - (
+                    (1 - tmp_covptot[0, 0])
+                    * (1 - max(a[0, 0, 0], a[0, 0, -1]))
+                    / (1 - min(a[0, 0, -1], 1 - 1e-6))
+                )
+                tmp_covptot[0, 0] = max(tmp_covptot[0, 0], RCOVPMIN)
+                covpclr = max(0.0, tmp_covptot[0, 0] - a)
+                raincld = qrfg / tmp_covptot[0, 0]
+                snowcld = qsfg / tmp_covptot[0, 0]
+                tmp_covpmax[0, 0] = max(tmp_covptot[0, 0], tmp_covpmax[0, 0])
+            else:
+                raincld = 0.0
+                snowcld = 0.0
+                tmp_covptot[0, 0] = 0.0
+                covpclr = 0.0
+                tmp_covpmax[0, 0] = 0.0
+
+            # *** 4.2a: autoconversion to snow
+            if t <= RTT:
+                # --- snow autoconversion rate follow Lin et al. 1983
+                if icecld > EPSEC:
+                    co = dt * RSNOWLIN1 * exp(RSNOWLIN2 * (t - RTT))
+
+                    if __INLINED(LAERICEAUTO):
+                        lcrit = in_icrit_aer[0, 0, 0]
+                        co *= (RNICE / in_nice[0, 0, 0]) ** 0.333
+                    else:
+                        lcrit = RLCRITSNOW
+
+                    snowaut = co * (1 - exp(-((icecld / lcrit) ** 2)))
+                    solqb_qs_qi += snowaut
+
+            # *** 4.2b: autoconversion warm clouds
+            if liqcld > EPSEC:
+                if __INLINED(WARMRAIN == 1):  # --- warm-rain process follow Sundqvist (1989)
+                    co = RKCONV * dt
+
+                    if __INLINED(LAERLIQAUTOLSP):
+                        lcrit = in_lcrit_aer[0, 0, 0]
+                        co *= (RCCN / in_ccn[0, 0, 0]) ** 0.333
+                    else:
+                        lcrit = RCLCRIT_LAND if in_lsm[0, 0] > 0.5 else RCLCRIT_SEA
+
+                    # --- parameters for cloud collection by rain and snow
+                    precip = (out_pfplss[0, 0, -1] + out_pfplsr[0, 0, -1]) / max(
+                        EPSEC, tmp_covptot[0, 0]
+                    )
+                    cfpr = 1 + RPRC1 * sqrt(max(precip, 0.0))
+                    if __INLINED(LAERLIQCOLL):
+                        cfpr *= (RCCN / in_ccn[0, 0, 0]) ** 0.333
+
+                    co *= cfpr
+                    lcrit /= max(cfpr, EPSEC)
+
+                    rainaut = co
+                    if liqcld / lcrit < 20:
+                        rainaut *= 1 - exp(-((liqcld / lcrit) ** 2))
+
+                    # rain freezes instantly
+                    if t <= RTT:
+                        solqb_qs_ql += rainaut
+                    else:
+                        solqb_qr_ql += rainaut
+                elif __INLINED(
+                    WARMRAIN == 2
+                ):  # --- warm-rain process follow Khairoutdinov and Kogan (2000)
+                    if in_lsm[0, 0] > 0.5:
+                        const = RCL_KK_cloud_num_land
+                        lcrit = RCLCRIT_LAND
+                    else:
+                        const = RCL_KK_cloud_num_sea
+                        lcrit = RCLCRIT_SEA
+
+                    if liqcld > lcrit:
+                        rainaut = (
+                            1.5 * a * dt * RCL_KKAau * liqcld**RCL_KKBauq * const**RCL_KKBaun
+                        )
+                        rainaut = min(rainaut, qlfg)
+                        if rainaut < EPSEC:
+                            rainaut = 0.0
+                        rainacc = 2 * a * dt * RCL_KKAac * (liqcld * raincld) ** RCL_KKBac
+                        rainacc = min(rainacc, qlfg)
+                        if rainacc < EPSEC:
+                            rainacc = 0.0
+                    else:
+                        rainaut = 0.0
+                        rainacc = 0.0
+
+                    expr3 = rainaut + rainacc
+                    if t <= RTT:
+                        solqa_qs_ql += expr3
+                        solqa_ql_qs -= expr3
+                    else:
+                        solqa_qr_ql += expr3
+                        solqa_ql_qr -= expr3
+
+            # --- riming - collection of cloud liquid drops by snow and ice
+            if __INLINED(WARMRAIN > 1):
+                if t <= RTT and liqcld > EPSEC:
+                    # fallspeed air density correction
+                    fallcorr = (RDENSREF / rho) ** 0.4
+
+                    # --- riming of snow by cloud water - implicit in lwc
+                    if snowcld > EPSEC and tmp_covptot[0, 0] > 0.01:
+                        # calculate riming term
+                        snowrime = (
+                            0.3
+                            * tmp_covptot[0, 0]
+                            * dt
+                            * RCL_CONST7S
+                            * fallcorr
+                            * (rho * snowcld * RCL_CONST1S) ** RCL_CONST8S
+                        )
+
+                        # limit snow riming term
+                        snowrime = min(snowrime, 1.0)
+
+                        solqb_qs_ql += snowrime
+
+            # *** 4.3a: melting of snow and ice
+            icetot = qifg + qsfg
+            meltmax = 0.0
+
+            # if there are frozen hydrometeors present and dry-bulb temperature > 0degC
+            if icetot > EPSEC and t > RTT:
+                # calculate subsaturation
+                subsat = max(qsice - qv, 0.0)
+
+                # calculate difference between dry-bulb and the temperature at which the wet-buld=0degC
+                # using and approx
+                tdmtw0 = t - RTT - subsat * (TW1 + TW2 * (in_ap[0, 0, 0] - TW3) - TW4 * (t - TW5))
+
+                # ensure cons1 is positive
+                cons1 = abs(dt * (1 + 0.5 * tdmtw0) / RTAUMEL)
+                meltmax = max(tdmtw0 * cons1 * RLDCP, 0.0)
+
+            if meltmax > EPSEC and icetot > EPSEC:
+                # apply melting in same proportion as frozen hydrometeor fractions
+                alfa_qi = qifg / icetot
+                melt_qi = min(qifg, alfa_qi * meltmax)
+                alfa_qs = qsfg / icetot
+                melt_qs = min(qsfg, alfa_qs * meltmax)
+
+                # needed in first guess
+                qifg -= melt_qi
+                qrfg += melt_qi + melt_qs
+                qsfg -= melt_qs
+                solqa_qi_qr -= melt_qi
+                solqa_qr_qi += melt_qi
+                solqa_qr_qs += melt_qs
+                solqa_qs_qr -= melt_qs
+
+            # *** 4.3b: freezing of rain
+            if qr > EPSEC:
+                if t[0, 0, 0] <= RTT and t[0, 0, -1] > RTT:
+                    # base of melting layer/top of refreezing layer so store rain/snow fraction for
+                    # precip type diagnosis
+                    qpretot = max(qs + qr, EPSEC)
+                    out_rainfrac_toprfz[0, 0] = qr / qpretot
+                    tmp_rainliq[0, 0] = out_rainfrac_toprfz[0, 0] > 0.8
+
+                if t < RTT:
+                    if tmp_rainliq[0, 0]:
+                        # majority of raindrops completely melted
+                        # slope of rain partical size distribution
+                        lambda_ = (RCL_FAC1 / (rho * qr)) ** RCL_FAC2
+
+                        # calculate freezing rate based on Bigg (1953) and Wisner (1972)
+                        temp = RCL_FZRAB * (t - RTT)
+                        frz = dt * (RCL_CONST5R / rho) * (exp(temp) - 1) * lambda_**RCL_CONST6R
+                        frzmax = max(frz, 0.0)
+                    else:
+                        # majority of raindrops only partially melted
+                        cons1 = abs(dt * (1 + 0.5 * (RTT - t)) / RTAUMEL)
+                        frzmax = max((RTT - t) * cons1 * RLDCP, 0.0)
+
+                    if frzmax > EPSEC:
+                        frz = min(qr, frzmax)
+                        solqa_qs_qr += frz
+                        solqa_qr_qs -= frz
+
+            # *** 4.3c: freezing of liquid
+            frzmax = max((RTHOMO - t) * RLDCP, 0.0)
+            if frzmax > EPSEC and qlfg > EPSEC:
+                frz = min(qlfg, frzmax)
+                solqa_qi_ql += frz
+                solqa_ql_qi -= frz
+
+            # *** 4.4: evaporation of rain/snow
+            if __INLINED(EVAPRAIN == 1):  # --- rain evaporation scheme from Sundquist
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+                qe = (qv - a * qsliq) / max(EPSEC, 1 - a)
+
+                # --- humidity in moistest covpclr part of domain
+                qe = max(0.0, min(qe, qsliq))
+                lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq
+                if lo1:
+                    # note: preclr is a rain flux
+                    expr4 = tmp_covptot[0, 0] * dtgdp
+                    expr5 = max(abs(expr4), EPSILON)
+                    expr6 = expr5 if expr4 > 0 else -expr5
+                    preclr = qrfg * covpclr / expr6
+
+                    # --- actual microphysics formula in beta
+                    beta1 = (
+                        sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0])
+                        / RVRFACTOR
+                        * preclr
+                        / max(covpclr, EPSEC)
+                    )
+                    beta = RG * RPECONS * 0.5 * beta1**0.5777
+                    denom = 1 + beta * dt * corqsliq
+                    dpr = covpclr * beta * (qsliq - qe) / denom * dp / RG
+                    dpevap = dpr * dtgdp
+
+                    # --- add evaporation term to explicit sink
+                    evap = min(dpevap, qrfg)
+                    solqa_qv_qr += evap
+                    solqa_qr_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN,
+                        tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg),
+                    )
+
+                    # update fg field
+                    qrfg -= evap
+            elif __INLINED(
+                EVAPRAIN == 2
+            ):  # --- rain evaporation scheme based on Abel and Boutle (2013)
+                # --- calculate relative humidity limit for rain evaporation
+                # limit rh for rain evaporation dependent on precipitation fraction
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+
+                # further limit rh for rain evaporation to 80%
+                rh = min(0.8, rh)
+
+                qe = max(0.0, min(qv, qsliq))
+                lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq
+                if lo1:
+                    # --- Abel and Boutle (2012) evaporation
+                    # calculate local precipitation (kg/kg)
+                    preclr = qrfg / tmp_covptot[0, 0]
+
+                    # fallspeed air density correction
+                    fallcorr = (RDENSREF / rho) ** 0.4
+
+                    # saturation vapor pressure with respect to liquid phase
+                    esatliq = RV / RD * f_foeeliq(t)
+
+                    # slope of particle size distribution
+                    lambda_ = (RCL_FAC1 / (rho * preclr)) ** RCL_FAC2
+
+                    evap_denom = (
+                        RCL_CDENOM1 * esatliq
+                        - RCL_CDENOM2 * t * esatliq
+                        + RCL_CDENOM3 * t**3 * in_ap[0, 0, 0]
+                    )
+
+                    # temperature dependent conductivity
+                    corr2 = (t / 273) ** 1.5 * 393 / (t + 120)
+
+                    subsat = max(rh * qsliq - qe, 0.0)
+                    beta = (
+                        0.5
+                        / qsliq
+                        * t**2
+                        * esatliq
+                        * RCL_CONST1R
+                        * (corr2 / evap_denom)
+                        * (
+                            0.78 / lambda_**RCL_CONST4R
+                            + RCL_CONST2R
+                            * (rho * fallcorr) ** 0.5
+                            / (corr2**0.5 * lambda_**RCL_CONST3R)
+                        )
+                    )
+                    denom = 1 + beta * dt
+                    dpevap = covpclr * beta * dt * subsat / denom
+
+                    # --- add evaporation term to explicit sink
+                    evap = min(dpevap, qrfg)
+                    solqa_qv_qr += evap
+                    solqa_qr_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN,
+                        tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg),
+                    )
+
+                    # update fg field
+                    qrfg -= evap
+
+            # *** 4.5: evaporation of snow
+            if __INLINED(EVAPSNOW == 1):
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+                qe = (qv - a * qsice) / max(EPSEC, 1 - a)
+
+                # --- humidity in moistest covpclr part of domain
+                qe = max(0.0, min(qe, qsice))
+                lo1 = covpclr > EPSEC and qsfg > EPSEC and qe < rh * qsice
+                if lo1:
+                    expr7 = tmp_covptot[0, 0] * dtgdp
+                    expr8 = max(abs(expr7), EPSILON)
+                    expr9 = expr8 if expr7 > 0 else -expr8
+                    preclr = qsfg * covpclr / expr9
+
+                    # --- actual microphysics formula in beta
+                    beta1 = (
+                        sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0])
+                        / RVRFACTOR
+                        * preclr
+                        / max(covpclr, EPSEC)
+                    )
+                    beta = RG * RPECONS * beta1**0.5777
+                    denom = 1 + beta * dt * corqsice
+                    dpr = covpclr * beta * (qsice - qe) / denom * dp / RG
+                    dpevap = dpr * dtgdp
+
+                    # --- add evaporation term to explicit sink
+                    evap = min(dpevap, qsfg)
+                    solqa_qv_qs += evap
+                    solqa_qs_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN,
+                        tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qsfg),
+                    )
+
+                    # update first guess field
+                    qsfg -= evap
+            elif __INLINED(EVAPSNOW == 2):
+                # --- calculate relative humidity limit for snow evaporation
+                rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a)
+                rh = min(max(rh, RPRECRHMAX), 1.0)
+                qe = (qv - a * qsice) / max(EPSEC, 1 - a)
+
+                # --- humidity in moistest covpclr part of domain
+                qe = max(0.0, min(qe, qsice))
+                lo1 = covpclr > EPSEC and qs > EPSEC and qe < rh * qsice
+                if lo1:
+                    # calculate local precipitation (kg/kg)
+                    preclr = qsfg / tmp_covptot[0, 0]
+                    vpice = f_foeeice(t) * RV / RD
+
+                    # particle size distribution
+                    tcg = 1.0
+                    facx1s = 1.0
+                    apb = (
+                        RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap[0, 0, 0] * RCL_APB3 * t**3
+                    )
+                    corrfac = (1 / rho) ** 0.5
+                    corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120)
+                    pr02 = rho * preclr * RCL_CONST1S / (tcg * facx1s)
+                    term1 = (
+                        (qsice - qe)
+                        * t**2
+                        * vpice
+                        * corrfac2
+                        * tcg
+                        * RCL_CONST2S
+                        * facx1s
+                        / (rho * apb * qsice)
+                    )
+                    term2 = (
+                        0.65 * RCL_CONST6S * pr02**RCL_CONST4S
+                        + RCL_CONST3S
+                        * corrfac**0.5
+                        * rho**0.5
+                        * pr02**RCL_CONST5S
+                        / corrfac2**0.5
+                    )
+                    dpevap = max(covpclr * term1 * term2 * dt, 0.0)
+
+                    # --- limit evaporation to snow amount
+                    evap = min(min(dpevap, evaplimice), qs)
+                    solqa_qv_qs += evap
+                    solqa_qs_qv -= evap
+
+                    # --- reduce the total precip coverage proportional to evaporation
+                    tmp_covptot[0, 0] = max(
+                        RCOVPMIN, tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qs)
+                    )
+
+                    # update first guess field
+                    qsfg -= evap
+
+            # --- evaporate small precipitation amounts
+            if __INLINED(FALLQL):
+                if qlfg < RLMIN:
+                    solqa_qv_ql += qlfg
+                    solqa_ql_qv -= qlfg
+            if __INLINED(FALLQI):
+                if qifg < RLMIN:
+                    solqa_qv_qi += qifg
+                    solqa_qi_qv -= qifg
+            if __INLINED(FALLQR):
+                if qrfg < RLMIN:
+                    solqa_qv_qr += qrfg
+                    solqa_qr_qv -= qrfg
+            if __INLINED(FALLQS):
+                if qsfg < RLMIN:
+                    solqa_qv_qs += qsfg
+                    solqa_qs_qv -= qsfg
+
+            # === 5: solvers for A and L
+            # *** 5.1: solver for cloud cover
+            anew = min((a + solac) / (1 + solab), 1.0)
+            if anew < RAMIN:
+                anew = 0.0
+            da = anew - a0
+
+            # *** 5.2: solver for the microphysics
+            # --- collect sink terms and mark
+            sinksum_ql = -(solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv)
+            sinksum_qi = -(solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv)
+            sinksum_qr = -(solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv)
+            sinksum_qs = -(solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv)
+            sinksum_qv = -(solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv)
+
+            # --- calculate overshoot and scaling factor
+            max_ql = max(ql, EPSEC)
+            rat_ql = max(sinksum_ql, max_ql)
+            ratio_ql = max_ql / rat_ql
+            max_qi = max(qi, EPSEC)
+            rat_qi = max(sinksum_qi, max_qi)
+            ratio_qi = max_qi / rat_qi
+            max_qr = max(qr, EPSEC)
+            rat_qr = max(sinksum_qr, max_qr)
+            ratio_qr = max_qr / rat_qr
+            max_qs = max(qs, EPSEC)
+            rat_qs = max(sinksum_qs, max_qs)
+            ratio_qs = max_qs / rat_qs
+            max_qv = max(qv, EPSEC)
+            rat_qv = max(sinksum_qv, max_qv)
+            ratio_qv = max_qv / rat_qv
+
+            # --- now sort ratio to find out which species run out first
+            order_ql, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_ql,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qi, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qi,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qr, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qr,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qs, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qs,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+            order_qv, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0(
+                order_qv,
+                index1_ql,
+                index1_qi,
+                index1_qr,
+                index1_qs,
+                index1_qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+            )
+
+            # scale the sink terms, in the correct order, recalculating the scale factor each time
+            sinksum_ql = 0.0
+            sinksum_qi = 0.0
+            sinksum_qr = 0.0
+            sinksum_qs = 0.0
+            sinksum_qv = 0.0
+
+            # --- recalculate sum and scaling factor, and then scale
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_ql,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qi,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qr,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qs,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+            ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1(
+                order_qv,
+                index3_ql_ql,
+                index3_ql_qi,
+                index3_ql_qr,
+                index3_ql_qs,
+                index3_ql_qv,
+                index3_qi_ql,
+                index3_qi_qi,
+                index3_qi_qr,
+                index3_qi_qs,
+                index3_qi_qv,
+                index3_qr_ql,
+                index3_qr_qi,
+                index3_qr_qr,
+                index3_qr_qs,
+                index3_qr_qv,
+                index3_qs_ql,
+                index3_qs_qi,
+                index3_qs_qr,
+                index3_qs_qs,
+                index3_qs_qv,
+                index3_qv_ql,
+                index3_qv_qi,
+                index3_qv_qr,
+                index3_qv_qs,
+                index3_qv_qv,
+                ql,
+                qi,
+                qr,
+                qs,
+                qv,
+                ratio_ql,
+                ratio_qi,
+                ratio_qr,
+                ratio_qs,
+                ratio_qv,
+                sinksum_ql,
+                sinksum_qi,
+                sinksum_qr,
+                sinksum_qs,
+                sinksum_qv,
+                solqa_ql_ql,
+                solqa_ql_qi,
+                solqa_ql_qr,
+                solqa_ql_qs,
+                solqa_ql_qv,
+                solqa_qi_ql,
+                solqa_qi_qi,
+                solqa_qi_qr,
+                solqa_qi_qs,
+                solqa_qi_qv,
+                solqa_qr_ql,
+                solqa_qr_qi,
+                solqa_qr_qr,
+                solqa_qr_qs,
+                solqa_qr_qv,
+                solqa_qs_ql,
+                solqa_qs_qi,
+                solqa_qs_qr,
+                solqa_qs_qs,
+                solqa_qs_qv,
+                solqa_qv_ql,
+                solqa_qv_qi,
+                solqa_qv_qr,
+                solqa_qv_qs,
+                solqa_qv_qv,
+            )
+
+            # *** 5.2.2: solver
+            # --- set the lhs of equation
+            # --- diagonals: microphysical sink terms + transport
+            lhs_ql_ql = (
+                1
+                + fallsink_ql
+                + solqb_qv_ql
+                + solqb_ql_ql
+                + solqb_qi_ql
+                + solqb_qr_ql
+                + solqb_qs_ql
+            )
+            lhs_qi_qi = (
+                1
+                + fallsink_qi
+                + solqb_qv_qi
+                + solqb_ql_qi
+                + solqb_qi_qi
+                + solqb_qr_qi
+                + solqb_qs_qi
+            )
+            lhs_qr_qr = (
+                1
+                + fallsink_qr
+                + solqb_qv_qr
+                + solqb_ql_qr
+                + solqb_qi_qr
+                + solqb_qr_qr
+                + solqb_qs_qr
+            )
+            lhs_qs_qs = (
+                1
+                + fallsink_qs
+                + solqb_qv_qs
+                + solqb_ql_qs
+                + solqb_qi_qs
+                + solqb_qr_qs
+                + solqb_qs_qs
+            )
+            lhs_qv_qv = (
+                1
+                + fallsink_qv
+                + solqb_qv_qv
+                + solqb_ql_qv
+                + solqb_qi_qv
+                + solqb_qr_qv
+                + solqb_qs_qv
+            )
+
+            # --- non-diagonals: microphysical source terms
+            lhs_ql_qi = -solqb_ql_qi
+            lhs_ql_qr = -solqb_ql_qr
+            lhs_ql_qs = -solqb_ql_qs
+            lhs_ql_qv = -solqb_ql_qv
+            lhs_qi_ql = -solqb_qi_ql
+            lhs_qi_qr = -solqb_qi_qr
+            lhs_qi_qs = -solqb_qi_qs
+            lhs_qi_qv = -solqb_qi_qv
+            lhs_qr_ql = -solqb_qr_ql
+            lhs_qr_qi = -solqb_qr_qi
+            lhs_qr_qs = -solqb_qr_qs
+            lhs_qr_qv = -solqb_qr_qv
+            lhs_qs_ql = -solqb_qs_ql
+            lhs_qs_qi = -solqb_qs_qi
+            lhs_qs_qr = -solqb_qs_qr
+            lhs_qs_qv = -solqb_qs_qv
+            lhs_qv_ql = -solqb_qv_ql
+            lhs_qv_qi = -solqb_qv_qi
+            lhs_qv_qr = -solqb_qv_qr
+            lhs_qv_qs = -solqb_qv_qs
+
+            # --- set the rhs of equation
+            # --- sum the explicit source and sink
+            out_qln[0, 0, 0] = (
+                ql + solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv
+            )
+            out_qin[0, 0, 0] = (
+                qi + solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv
+            )
+            out_qrn[0, 0, 0] = (
+                qr + solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv
+            )
+            out_qsn[0, 0, 0] = (
+                qs + solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv
+            )
+            qvn = qv + solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv
+
+            # --- solve by LU decomposition
+            # non pivoting recursive factorization
+            lhs_qi_ql /= lhs_ql_ql  #             JN=1, JM=2
+            lhs_qi_qi -= lhs_qi_ql * lhs_ql_qi  # JN=1, JM=2, IK=2
+            lhs_qi_qr -= lhs_qi_ql * lhs_ql_qr  # JN=1, JM=2, IK=3
+            lhs_qi_qs -= lhs_qi_ql * lhs_ql_qs  # JN=1, JM=2, IK=4
+            lhs_qi_qv -= lhs_qi_ql * lhs_ql_qv  # JN=1, JM=2, IK=0
+            lhs_qr_ql /= lhs_ql_ql  #             JN=1, JM=3
+            lhs_qr_qi -= lhs_qr_ql * lhs_ql_qi  # JN=1, JM=3, IK=2
+            lhs_qr_qr -= lhs_qr_ql * lhs_ql_qr  # JN=1, JM=3, IK=3
+            lhs_qr_qs -= lhs_qr_ql * lhs_ql_qs  # JN=1, JM=3, IK=4
+            lhs_qr_qv -= lhs_qr_ql * lhs_ql_qv  # JN=1, JM=3, IK=0
+            lhs_qs_ql /= lhs_ql_ql  #             JN=1, JM=4
+            lhs_qs_qi -= lhs_qs_ql * lhs_ql_qi  # JN=1, JM=4, IK=2
+            lhs_qs_qr -= lhs_qs_ql * lhs_ql_qr  # JN=1, JM=4, IK=3
+            lhs_qs_qs -= lhs_qs_ql * lhs_ql_qs  # JN=1, JM=4, IK=4
+            lhs_qs_qv -= lhs_qs_ql * lhs_ql_qv  # JN=1, JM=4, IK=0
+            lhs_qv_ql /= lhs_ql_ql  #             JN=1, JM=0
+            lhs_qv_qi -= lhs_qv_ql * lhs_ql_qi  # JN=1, JM=0, IK=2
+            lhs_qv_qr -= lhs_qv_ql * lhs_ql_qr  # JN=1, JM=0, IK=3
+            lhs_qv_qs -= lhs_qv_ql * lhs_ql_qs  # JN=1, JM=0, IK=4
+            lhs_qv_qv -= lhs_qv_ql * lhs_ql_qv  # JN=1, JM=0, IK=0
+            lhs_qr_qi /= lhs_qi_qi  #             JN=2, JM=3
+            lhs_qr_qr -= lhs_qr_qi * lhs_qi_qr  # JN=2, JM=3, IK=3
+            lhs_qr_qs -= lhs_qr_qi * lhs_qi_qs  # JN=2, JM=3, IK=4
+            lhs_qr_qv -= lhs_qr_qi * lhs_qi_qv  # JN=2, JM=3, IK=0
+            lhs_qs_qi /= lhs_qi_qi  #             JN=2, JM=4
+            lhs_qs_qr -= lhs_qs_qi * lhs_qi_qr  # JN=2, JM=4, IK=3
+            lhs_qs_qs -= lhs_qs_qi * lhs_qi_qs  # JN=2, JM=4, IK=4
+            lhs_qs_qv -= lhs_qs_qi * lhs_qi_qv  # JN=2, JM=4, IK=0
+            lhs_qv_qi /= lhs_qi_qi  #             JN=2, JM=0
+            lhs_qv_qr -= lhs_qv_qi * lhs_qi_qr  # JN=2, JM=0, IK=3
+            lhs_qv_qs -= lhs_qv_qi * lhs_qi_qs  # JN=2, JM=0, IK=4
+            lhs_qv_qv -= lhs_qv_qi * lhs_qi_qv  # JN=2, JM=0, IK=0
+            lhs_qs_qr /= lhs_qr_qr  #             JN=3, JM=4
+            lhs_qs_qs -= lhs_qs_qr * lhs_qr_qs  # JN=3, JM=4, IK=4
+            lhs_qs_qv -= lhs_qs_qr * lhs_qr_qv  # JN=3, JM=4, IK=0
+            lhs_qv_qr /= lhs_qr_qr  #             JN=3, JM=0
+            lhs_qv_qs -= lhs_qv_qr * lhs_qr_qs  # JN=3, JM=0, IK=4
+            lhs_qv_qv -= lhs_qv_qr * lhs_qr_qv  # JN=3, JM=0, IK=0
+            lhs_qv_qs /= lhs_qs_qs  #             JN=4, JM=0
+            lhs_qv_qv -= lhs_qv_qs * lhs_qs_qv  # JN=4, JM=0, IK=0
+
+            # backsubstitution: step 1
+            out_qin[0, 0, 0] -= lhs_qi_ql * out_qln[0, 0, 0]
+            out_qrn[0, 0, 0] -= lhs_qr_ql * out_qln[0, 0, 0] + lhs_qr_qi * out_qin[0, 0, 0]
+            out_qsn[0, 0, 0] -= (
+                lhs_qs_ql * out_qln[0, 0, 0]
+                + lhs_qs_qi * out_qin[0, 0, 0]
+                + lhs_qs_qr * out_qrn[0, 0, 0]
+            )
+            qvn -= (
+                lhs_qv_ql * out_qln[0, 0, 0]
+                + lhs_qv_qi * out_qin[0, 0, 0]
+                + lhs_qv_qr * out_qrn[0, 0, 0]
+                + lhs_qv_qs * out_qsn[0, 0, 0]
+            )
+
+            # backsubstitution: step 2
+            qvn /= lhs_qv_qv
+            out_qsn[0, 0, 0] -= lhs_qs_qv * qvn
+            out_qsn[0, 0, 0] /= lhs_qs_qs
+            out_qrn[0, 0, 0] -= lhs_qr_qs * out_qsn[0, 0, 0] + lhs_qr_qv * qvn
+            out_qrn[0, 0, 0] /= lhs_qr_qr
+            out_qin[0, 0, 0] -= (
+                lhs_qi_qr * out_qrn[0, 0, 0] + lhs_qi_qs * out_qsn[0, 0, 0] + lhs_qi_qv * qvn
+            )
+            out_qin[0, 0, 0] /= lhs_qi_qi
+            out_qln[0, 0, 0] -= (
+                lhs_ql_qi * out_qin[0, 0, 0]
+                + lhs_ql_qr * out_qrn[0, 0, 0]
+                + lhs_ql_qs * out_qsn[0, 0, 0]
+                + lhs_ql_qv * qvn
+            )
+            out_qln[0, 0, 0] /= lhs_ql_ql
+
+            # ensure no small values (including negatives) remain in cloud variables
+            # nor precipitation rates
+            if out_qln[0, 0, 0] < EPSEC:
+                qvn += out_qln[0, 0, 0]
+                out_qln[0, 0, 0] = 0.0
+            if out_qin[0, 0, 0] < EPSEC:
+                qvn += out_qin[0, 0, 0]
+                out_qin[0, 0, 0] = 0.0
+            if out_qrn[0, 0, 0] < EPSEC:
+                qvn += out_qrn[0, 0, 0]
+                out_qrn[0, 0, 0] = 0.0
+            if out_qsn[0, 0, 0] < EPSEC:
+                qvn += out_qsn[0, 0, 0]
+                out_qsn[0, 0, 0] = 0.0
+
+            # *** 5.3: precipitation/sedimentation fluxes to next level diagnostic precipitation fluxes
+            out_pfplsl[0, 0, 0] = fallsink_ql * out_qln[0, 0, 0] * rdtgdp
+            out_pfplsi[0, 0, 0] = fallsink_qi * out_qin[0, 0, 0] * rdtgdp
+            out_pfplsr[0, 0, 0] = fallsink_qr * out_qrn[0, 0, 0] * rdtgdp
+            out_pfplss[0, 0, 0] = fallsink_qs * out_qsn[0, 0, 0] * rdtgdp
+            pfplsv = fallsink_qv * qvn * rdtgdp
+
+            # ensure precipitation fraction is zero if no precipitation
+            qpretot = out_pfplss[0, 0, 0] + out_pfplsr[0, 0, 0]
+            if qpretot < EPSEC:
+                tmp_covptot[0, 0] = 0.0
+
+            # === 6: update tendencies
+            # *** 6.1: temperature and CLV budgets
+            flux_ql = (
+                psupsatsrce_ql
+                + convsrce_ql
+                + fallsrce_ql
+                - (fallsink_ql + convsink_ql) * out_qln[0, 0, 0]
+            )
+            if __INLINED(PHASEQL == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qln[0, 0, 0] - ql - flux_ql) / dt
+            if __INLINED(PHASEQL == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qln[0, 0, 0] - ql - flux_ql) / dt
+            out_tnd_loc_ql[0, 0, 0] += (out_qln[0, 0, 0] - out_ql0[0, 0, 0]) / dt
+
+            flux_qi = (
+                psupsatsrce_qi
+                + convsrce_qi
+                + fallsrce_qi
+                - (fallsink_qi + convsink_qi) * out_qin[0, 0, 0]
+            )
+            if __INLINED(PHASEQI == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qin[0, 0, 0] - qi - flux_qi) / dt
+            if __INLINED(PHASEQI == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qin[0, 0, 0] - qi - flux_qi) / dt
+            out_tnd_loc_qi[0, 0, 0] += (out_qin[0, 0, 0] - out_qi0[0, 0, 0]) / dt
+
+            flux_qr = (
+                psupsatsrce_qr
+                + convsrce_qr
+                + fallsrce_qr
+                - (fallsink_qr + convsink_qr) * out_qrn[0, 0, 0]
+            )
+            if __INLINED(PHASEQR == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qrn[0, 0, 0] - qr - flux_qr) / dt
+            if __INLINED(PHASEQR == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qrn[0, 0, 0] - qr - flux_qr) / dt
+            out_tnd_loc_qr[0, 0, 0] += (out_qrn[0, 0, 0] - out_qr0[0, 0, 0]) / dt
+
+            flux_qs = (
+                psupsatsrce_qs
+                + convsrce_qs
+                + fallsrce_qs
+                - (fallsink_qs + convsink_qs) * out_qsn[0, 0, 0]
+            )
+            if __INLINED(PHASEQS == 1):
+                out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qsn[0, 0, 0] - qs - flux_qs) / dt
+            if __INLINED(PHASEQS == 2):
+                out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qsn[0, 0, 0] - qs - flux_qs) / dt
+            out_tnd_loc_qs[0, 0, 0] += (out_qsn[0, 0, 0] - out_qs0[0, 0, 0]) / dt
+
+            # *** 6.2: humidity budget
+            out_tnd_loc_qv[0, 0, 0] += (qvn - qv) / dt
+
+            # *** 6.3: cloud cover
+            out_tnd_loc_a[0, 0, 0] += da / dt
+
+            # --- copy precipitation fraction into output variable
+            out_covptot[0, 0, 0] = tmp_covptot[0, 0]
+
+
+@stencil_collection("cloudsc_fluxes")
+def cloudsc_fluxes(
+    in_aph: Field["float"],  # staggered
+    in_foealfa: Field["float"],
+    in_lneg_qi: Field["float"],
+    in_lneg_ql: Field["float"],
+    in_lneg_qr: Field["float"],
+    in_lneg_qs: Field["float"],
+    in_lude: Field["float"],
+    in_pfplsi: Field["float"],
+    in_pfplsl: Field["float"],
+    in_pfplsr: Field["float"],
+    in_pfplss: Field["float"],
+    in_qi0: Field["float"],
+    in_qin: Field["float"],
+    in_ql0: Field["float"],
+    in_qln: Field["float"],
+    in_qr0: Field["float"],
+    in_qrn: Field["float"],
+    in_qs0: Field["float"],
+    in_qsn: Field["float"],
+    in_vfi: Field["float"],
+    in_vfl: Field["float"],
+    out_fcqlng: Field["float"],  # staggered
+    out_fcqnng: Field["float"],  # staggered
+    out_fcqrng: Field["float"],  # staggered
+    out_fcqsng: Field["float"],  # staggered
+    out_fhpsl: Field["float"],  # staggered
+    out_fhpsn: Field["float"],  # staggered
+    out_fplsl: Field["float"],  # staggered
+    out_fplsn: Field["float"],  # staggered
+    out_fsqif: Field["float"],  # staggered
+    out_fsqitur: Field["float"],  # staggered
+    out_fsqlf: Field["float"],  # staggered
+    out_fsqltur: Field["float"],  # staggered
+    out_fsqrf: Field["float"],  # staggered
+    out_fsqsf: Field["float"],  # staggered
+    *,
+    dt: "float",
+):
+    from __externals__ import RG, RLSTT, RLVTT
+
+    # === 7: flux/diagnostics computations
+    with computation(FORWARD):
+        with interval(0, 1):
+            out_fplsl[0, 0, 0] = 0.0
+            out_fplsn[0, 0, 0] = 0.0
+            out_fhpsl[0, 0, 0] = 0.0
+            out_fhpsn[0, 0, 0] = 0.0
+            out_fsqlf[0, 0, 0] = 0.0
+            out_fsqif[0, 0, 0] = 0.0
+            out_fsqrf[0, 0, 0] = 0.0
+            out_fsqsf[0, 0, 0] = 0.0
+            out_fcqlng[0, 0, 0] = 0.0
+            out_fcqnng[0, 0, 0] = 0.0
+            out_fcqrng[0, 0, 0] = 0.0
+            out_fcqsng[0, 0, 0] = 0.0
+            out_fsqltur[0, 0, 0] = 0.0
+            out_fsqitur[0, 0, 0] = 0.0
+
+        with interval(1, None):
+            # --- copy general precip arrays back info PFP arrays for GRIB archiving
+            out_fplsl[0, 0, 0] = in_pfplsr[0, 0, -1] + in_pfplsl[0, 0, -1]
+            out_fplsn[0, 0, 0] = in_pfplss[0, 0, -1] + in_pfplsi[0, 0, -1]
+
+            # --- enthalpy flux due to precipitation
+            out_fhpsl[0, 0, 0] = -RLVTT * out_fplsl[0, 0, 0]
+            out_fhpsn[0, 0, 0] = -RLSTT * out_fplsn[0, 0, 0]
+
+            gdph_r = -(in_aph[0, 0, 0] - in_aph[0, 0, -1]) / (RG * dt)
+            out_fsqlf[0, 0, 0] = out_fsqlf[0, 0, -1]
+            out_fsqif[0, 0, 0] = out_fsqif[0, 0, -1]
+            out_fsqrf[0, 0, 0] = out_fsqlf[0, 0, -1]
+            out_fsqsf[0, 0, 0] = out_fsqif[0, 0, -1]
+            out_fcqlng[0, 0, 0] = out_fcqlng[0, 0, -1]
+            out_fcqnng[0, 0, 0] = out_fcqnng[0, 0, -1]
+            out_fcqrng[0, 0, 0] = out_fcqlng[0, 0, -1]
+            out_fcqsng[0, 0, 0] = out_fcqnng[0, 0, -1]
+            out_fsqltur[0, 0, 0] = out_fsqltur[0, 0, -1]
+            out_fsqitur[0, 0, 0] = out_fsqitur[0, 0, -1]
+
+            # liquid, LS scheme minus detrainment
+            out_fsqlf[0, 0, 0] += (
+                in_qln[0, 0, -1]
+                - in_ql0[0, 0, -1]
+                + in_vfl[0, 0, -1] * dt
+                - in_foealfa[0, 0, -1] * in_lude[0, 0, -1]
+            ) * gdph_r
+            # liquid, negative numbers
+            out_fcqlng[0, 0, 0] += in_lneg_ql[0, 0, -1] * gdph_r
+            # liquid, vertical diffusion
+            out_fsqltur[0, 0, 0] += in_vfl[0, 0, -1] * dt * gdph_r
+
+            # rain, LS scheme
+            out_fsqrf[0, 0, 0] += (in_qrn[0, 0, -1] - in_qr0[0, 0, -1]) * gdph_r
+            # rain, negative numbers
+            out_fcqrng[0, 0, 0] += in_lneg_qr[0, 0, -1] * gdph_r
+
+            # ice, LS scheme minus detrainment
+            out_fsqif[0, 0, 0] += (
+                in_qin[0, 0, -1]
+                - in_qi0[0, 0, -1]
+                + in_vfi[0, 0, -1] * dt
+                - (1 - in_foealfa[0, 0, -1]) * in_lude[0, 0, -1]
+            ) * gdph_r
+            # ice, negative numbers
+            out_fcqnng[0, 0, 0] += in_lneg_qi[0, 0, -1] * gdph_r
+            # ice, vertical diffusion
+            out_fsqitur[0, 0, 0] += in_vfi[0, 0, -1] * dt * gdph_r
+
+            # snow, LS scheme
+            out_fsqsf[0, 0, 0] += (in_qsn[0, 0, -1] - in_qs0[0, 0, -1]) * gdph_r
+            # snow, negative numbers
+            out_fcqsng[0, 0, 0] += in_lneg_qs[0, 0, -1] * gdph_r
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py
new file mode 100644
index 00000000..5b358fe1
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from gt4py import gtscript
+
+from cloudsc4py.framework.stencil import function_collection
+from cloudsc4py.physics._stencils.fcttre import f_foedem, f_foeewm, f_foeldcpm
+from cloudsc4py.utils.f2py import ported_function
+
+
+@function_collection("f_cuadjtq_5")
+@gtscript.function
+def f_cuadjtq_5(qp, qsmix, t):
+    from __externals__ import RETV
+
+    qsat = min(f_foeewm(t) * qp, 0.5)
+    cor = 1 / (1 - RETV * qsat)
+    qsat *= cor
+    cond = (qsmix - qsat) / (1 + qsat * cor * f_foedem(t))
+    t += f_foeldcpm(t) * cond
+    qsmix -= cond
+    return qsmix, t
+
+
+@ported_function(from_file="cloudsc_fortran/cloudsc2.F90", from_line=1297, to_line=1314)
+@function_collection("f_cuadjtq")
+@gtscript.function
+def f_cuadjtq(ap, qsmix, t):
+    qp = 1 / ap
+    qsmix, t = f_cuadjtq_5(qp, qsmix, t)
+    qsmix, t = f_cuadjtq_5(qp, qsmix, t)
+    return qsmix, t
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py
new file mode 100644
index 00000000..9be00540
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from gt4py import gtscript
+
+from cloudsc4py.framework.stencil import function_collection
+from cloudsc4py.physics._stencils.fcttre import f_foeeice, f_foeeliq
+from cloudsc4py.utils.f2py import ported_function
+
+
+@ported_function(from_file="common/include/fccld.func.h", from_line=26, to_line=27)
+@function_collection("f_fokoop")
+@gtscript.function
+def f_fokoop(t):
+    from __externals__ import RKOOP1, RKOOP2
+
+    return min(RKOOP1 - RKOOP2 * t, f_foeeliq(t) / f_foeeice(t))
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py
new file mode 100644
index 00000000..0a304421
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from gt4py import gtscript
+
+from cloudsc4py.framework.stencil import function_collection
+from cloudsc4py.utils.f2py import ported_function
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=39, to_line=41)
+@function_collection("f_foedelta")
+@gtscript.function
+def f_foedelta(t):
+    from __externals__ import RTT
+
+    return 1 if t > RTT else 0
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=82, to_line=84)
+@function_collection("f_foealfa")
+@gtscript.function
+def f_foealfa(t):
+    from __externals__ import RTICE, RTWAT, RTWAT_RTICE_R
+
+    return min(1.0, ((max(RTICE, min(RTWAT, t)) - RTICE) * RTWAT_RTICE_R) ** 2)
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=89, to_line=92)
+@function_collection("f_foeewm")
+@gtscript.function
+def f_foeewm(t):
+    from __externals__ import R2ES, R3IES, R3LES, R4IES, R4LES, RTT
+
+    return R2ES * (
+        f_foealfa(t) * exp(R3LES * (t - RTT) / (t - R4LES))
+        + (1 - f_foealfa(t)) * (exp(R3IES * (t - RTT) / (t - R4IES)))
+    )
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=100, to_line=101)
+@function_collection("f_foedem")
+@gtscript.function
+def f_foedem(t):
+    from __externals__ import R4IES, R4LES, R5ALSCP, R5ALVCP
+
+    return f_foealfa(t) * R5ALVCP * (1 / (t - R4LES) ** 2) + (1 - f_foealfa(t)) * R5ALSCP * (
+        1 / (t - R4IES) ** 2
+    )
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=103, to_line=104)
+@function_collection("f_foeldcpm")
+@gtscript.function
+def f_foeldcpm(t):
+    from __externals__ import RALSDCP, RALVDCP
+
+    return f_foealfa(t) * RALVDCP + (1 - f_foealfa(t)) * RALSDCP
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=161, to_line=164)
+@function_collection("f_foeeliq")
+@gtscript.function
+def f_foeeliq(t):
+    from __externals__ import R2ES, R3LES, R4LES, RTT
+
+    return R2ES * exp(R3LES * (t - RTT) / (t - R4LES))
+
+
+@ported_function(from_file="common/include/fcttre.func.h", from_line=161, to_line=164)
+@function_collection("f_foeeice")
+@gtscript.function
+def f_foeeice(t):
+    from __externals__ import R2ES, R3IES, R4IES, RTT
+
+    return R2ES * exp(R3IES * (t - RTT) / (t - R4IES))
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py
new file mode 100644
index 00000000..defc4c63
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py
@@ -0,0 +1,269 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from gt4py import gtscript
+
+from cloudsc4py.framework.stencil import function_collection
+
+
+@function_collection("f_helper_0")
+@gtscript.function
+def f_helper_0(
+    order,
+    index1_ql,
+    index1_qi,
+    index1_qr,
+    index1_qs,
+    index1_qv,
+    ratio_ql,
+    ratio_qi,
+    ratio_qr,
+    ratio_qs,
+    ratio_qv,
+):
+    minimum = 1e32
+
+    if index1_ql and ratio_ql < minimum:
+        order = 1
+        minimum = ratio_ql
+    if index1_qi and ratio_qi < minimum:
+        order = 2
+        minimum = ratio_qi
+    if index1_qr and ratio_qr < minimum:
+        order = 3
+        minimum = ratio_qr
+    if index1_qs and ratio_qs < minimum:
+        order = 4
+        minimum = ratio_qs
+    if index1_qv and ratio_qv < minimum:
+        order = 0
+
+    if order == 1:
+        index1_ql = False
+    if order == 2:
+        index1_qi = False
+    if order == 3:
+        index1_qr = False
+    if order == 4:
+        index1_qs = False
+    if order == 0:
+        index1_qv = False
+
+    return order, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv
+
+
+@function_collection("f_helper_1")
+@gtscript.function
+def f_helper_1(
+    order,
+    index3_ql_ql,
+    index3_ql_qi,
+    index3_ql_qr,
+    index3_ql_qs,
+    index3_ql_qv,
+    index3_qi_ql,
+    index3_qi_qi,
+    index3_qi_qr,
+    index3_qi_qs,
+    index3_qi_qv,
+    index3_qr_ql,
+    index3_qr_qi,
+    index3_qr_qr,
+    index3_qr_qs,
+    index3_qr_qv,
+    index3_qs_ql,
+    index3_qs_qi,
+    index3_qs_qr,
+    index3_qs_qs,
+    index3_qs_qv,
+    index3_qv_ql,
+    index3_qv_qi,
+    index3_qv_qr,
+    index3_qv_qs,
+    index3_qv_qv,
+    ql,
+    qi,
+    qr,
+    qs,
+    qv,
+    ratio_ql,
+    ratio_qi,
+    ratio_qr,
+    ratio_qs,
+    ratio_qv,
+    sinksum_ql,
+    sinksum_qi,
+    sinksum_qr,
+    sinksum_qs,
+    sinksum_qv,
+    solqa_ql_ql,
+    solqa_ql_qi,
+    solqa_ql_qr,
+    solqa_ql_qs,
+    solqa_ql_qv,
+    solqa_qi_ql,
+    solqa_qi_qi,
+    solqa_qi_qr,
+    solqa_qi_qs,
+    solqa_qi_qv,
+    solqa_qr_ql,
+    solqa_qr_qi,
+    solqa_qr_qr,
+    solqa_qr_qs,
+    solqa_qr_qv,
+    solqa_qs_ql,
+    solqa_qs_qi,
+    solqa_qs_qr,
+    solqa_qs_qs,
+    solqa_qs_qv,
+    solqa_qv_ql,
+    solqa_qv_qi,
+    solqa_qv_qr,
+    solqa_qv_qs,
+    solqa_qv_qv,
+):
+    from __externals__ import EPSEC
+
+    # recalculate sum and scaling factor
+    if order == 1:
+        index3_ql_ql = solqa_ql_ql < 0.0
+        index3_ql_qi = solqa_ql_qi < 0.0
+        index3_ql_qr = solqa_ql_qr < 0.0
+        index3_ql_qs = solqa_ql_qs < 0.0
+        index3_ql_qv = solqa_ql_qv < 0.0
+        sinksum_ql -= solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv
+        mm = max(ql, EPSEC)
+        rr = max(sinksum_ql, mm)
+        ratio_ql = mm / rr
+    elif order == 2:
+        index3_qi_ql = solqa_qi_ql < 0.0
+        index3_qi_qi = solqa_qi_qi < 0.0
+        index3_qi_qr = solqa_qi_qr < 0.0
+        index3_qi_qs = solqa_qi_qs < 0.0
+        index3_qi_qv = solqa_qi_qv < 0.0
+        sinksum_qi -= solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv
+        mm = max(qi, EPSEC)
+        rr = max(sinksum_qi, mm)
+        ratio_qi = mm / rr
+    elif order == 3:
+        index3_qr_ql = solqa_qr_ql < 0.0
+        index3_qr_qi = solqa_qr_qi < 0.0
+        index3_qr_qr = solqa_qr_qr < 0.0
+        index3_qr_qs = solqa_qr_qs < 0.0
+        index3_qr_qv = solqa_qr_qv < 0.0
+        sinksum_qr -= solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv
+        mm = max(qr, EPSEC)
+        rr = max(sinksum_qr, mm)
+        ratio_qr = mm / rr
+    elif order == 4:
+        index3_qs_ql = solqa_qs_ql < 0.0
+        index3_qs_qi = solqa_qs_qi < 0.0
+        index3_qs_qr = solqa_qs_qr < 0.0
+        index3_qs_qs = solqa_qs_qs < 0.0
+        index3_qs_qv = solqa_qs_qv < 0.0
+        sinksum_qs -= solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv
+        mm = max(qs, EPSEC)
+        rr = max(sinksum_qs, mm)
+        ratio_qs = mm / rr
+    elif order == 0:
+        index3_qv_ql = solqa_qv_ql < 0.0
+        index3_qv_qi = solqa_qv_qi < 0.0
+        index3_qv_qr = solqa_qv_qr < 0.0
+        index3_qv_qs = solqa_qv_qs < 0.0
+        index3_qv_qv = solqa_qv_qv < 0.0
+        sinksum_qv -= solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv
+        mm = max(qv, EPSEC)
+        rr = max(sinksum_qv, mm)
+        ratio_qv = mm / rr
+
+    # scale
+    if order == 1:
+        if index3_ql_ql:
+            solqa_ql_ql *= ratio_ql
+            solqa_ql_ql *= ratio_ql
+        if index3_ql_qi:
+            solqa_ql_qi *= ratio_ql
+            solqa_qi_ql *= ratio_ql
+        if index3_ql_qr:
+            solqa_ql_qr *= ratio_ql
+            solqa_qr_ql *= ratio_ql
+        if index3_ql_qs:
+            solqa_ql_qs *= ratio_ql
+            solqa_qs_ql *= ratio_ql
+        if index3_ql_qv:
+            solqa_ql_qv *= ratio_ql
+            solqa_qv_ql *= ratio_ql
+    elif order == 2:
+        if index3_qi_ql:
+            solqa_qi_ql *= ratio_qi
+            solqa_ql_qi *= ratio_qi
+        if index3_qi_qi:
+            solqa_qi_qi *= ratio_qi
+            solqa_qi_qi *= ratio_qi
+        if index3_qi_qr:
+            solqa_qi_qr *= ratio_qi
+            solqa_qr_qi *= ratio_qi
+        if index3_qi_qs:
+            solqa_qi_qs *= ratio_qi
+            solqa_qs_qi *= ratio_qi
+        if index3_qi_qv:
+            solqa_qi_qv *= ratio_qi
+            solqa_qv_qi *= ratio_qi
+    elif order == 3:
+        if index3_qr_ql:
+            solqa_qr_ql *= ratio_qr
+            solqa_ql_qr *= ratio_qr
+        if index3_qr_qi:
+            solqa_qr_qi *= ratio_qr
+            solqa_qi_qr *= ratio_qr
+        if index3_qr_qr:
+            solqa_qr_qr *= ratio_qr
+            solqa_qr_qr *= ratio_qr
+        if index3_qr_qs:
+            solqa_qr_qs *= ratio_qr
+            solqa_qs_qr *= ratio_qr
+        if index3_qr_qv:
+            solqa_qr_qv *= ratio_qr
+            solqa_qv_qr *= ratio_qr
+    elif order == 4:
+        if index3_qs_ql:
+            solqa_qs_ql *= ratio_qs
+            solqa_ql_qs *= ratio_qs
+        if index3_qs_qi:
+            solqa_qs_qi *= ratio_qs
+            solqa_qi_qs *= ratio_qs
+        if index3_qs_qr:
+            solqa_qs_qr *= ratio_qs
+            solqa_qr_qs *= ratio_qs
+        if index3_qs_qs:
+            solqa_qs_qs *= ratio_qs
+            solqa_qs_qs *= ratio_qs
+        if index3_qs_qv:
+            solqa_qs_qv *= ratio_qs
+            solqa_qv_qs *= ratio_qs
+    elif order == 0:
+        if index3_qv_ql:
+            solqa_qv_ql *= ratio_qv
+            solqa_ql_qv *= ratio_qv
+        if index3_qv_qi:
+            solqa_qv_qi *= ratio_qv
+            solqa_qi_qv *= ratio_qv
+        if index3_qv_qr:
+            solqa_qv_qr *= ratio_qv
+            solqa_qr_qv *= ratio_qv
+        if index3_qv_qs:
+            solqa_qv_qs *= ratio_qv
+            solqa_qs_qv *= ratio_qv
+        if index3_qv_qv:
+            solqa_qv_qv *= ratio_qv
+            solqa_qv_qv *= ratio_qv
+
+    return ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py b/src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py
new file mode 100644
index 00000000..f941c5bc
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from functools import cached_property
+from itertools import repeat
+import numpy as np
+import sys
+from typing import TYPE_CHECKING
+
+from cloudsc4py.framework.components import ImplicitTendencyComponent
+from cloudsc4py.framework.grid import I, J, K
+from cloudsc4py.framework.storage import managed_temporary_storage
+from cloudsc4py.utils.numpyx import assign
+
+if TYPE_CHECKING:
+    from datetime import timedelta
+    from typing import Dict
+
+    from sympl._core.typingx import PropertyDict
+
+    from cloudsc4py.framework.config import GT4PyConfig
+    from cloudsc4py.framework.grid import ComputationalGrid
+    from cloudsc4py.utils.iox import (
+        YoecldpParameters,
+        YoethfParameters,
+        YomcstParameters,
+        YrecldpParameters,
+    )
+    from cloudsc4py.utils.typingx import StorageDict
+
+
+class Cloudsc(ImplicitTendencyComponent):
+    def __init__(
+        self,
+        computational_grid: ComputationalGrid,
+        yoecldp_parameters: YoecldpParameters,
+        yoethf_parameters: YoethfParameters,
+        yomcst_parameters: YomcstParameters,
+        yrecldp_parameters: YrecldpParameters,
+        *,
+        enable_checks: bool = True,
+        gt4py_config: GT4PyConfig,
+    ) -> None:
+        super().__init__(computational_grid, enable_checks=enable_checks, gt4py_config=gt4py_config)
+
+        self.nlev = self.computational_grid.grids[I, J, K].shape[2]
+        externals = {}
+        externals.update(yoecldp_parameters.dict())
+        externals.update(yoethf_parameters.dict())
+        externals.update(yomcst_parameters.dict())
+        externals.update(yrecldp_parameters.dict())
+        externals.update(
+            {
+                "DEPICE": 1,
+                "EPSEC": 1e-14,
+                "EPSILON": 100 * sys.float_info.epsilon,
+                "EVAPRAIN": 2,
+                "EVAPSNOW": 1,
+                "FALLQV": False,
+                "FALLQL": False,
+                "FALLQI": False,
+                "FALLQR": True,
+                "FALLQS": True,
+                "MELTQV": -99,
+                "MELTQL": yoecldp_parameters.NCLDQI,
+                "MELTQI": yoecldp_parameters.NCLDQR,
+                "MELTQR": yoecldp_parameters.NCLDQS,
+                "MELTQS": yoecldp_parameters.NCLDQR,
+                "NLEV": self.nlev,
+                "PHASEQV": 0,
+                "PHASEQL": 1,
+                "PHASEQI": 2,
+                "PHASEQR": 1,
+                "PHASEQS": 2,
+                "RDCP": yomcst_parameters.RD / yomcst_parameters.RCPD,
+                "RLDCP": 1 / (yoethf_parameters.RALSDCP - yoethf_parameters.RALVDCP),
+                "TW1": 1329.31,
+                "TW2": 0.0074615,
+                "TW3": 0.85e5,
+                "TW4": 40.637,
+                "TW5": 275.0,
+                "VQV": 0.0,
+                "VQL": 0.0,
+                "VQI": yrecldp_parameters.RVICE,
+                "VQR": yrecldp_parameters.RVRAIN,
+                "VQS": yrecldp_parameters.RVSNOW,
+                "WARMRAIN": 2,
+            }
+        )
+
+        self.cloudsc = self.compile_stencil("cloudsc", externals)
+
+    @cached_property
+    def _input_properties(self) -> PropertyDict:
+        # todo(stubbiali): sort out units
+        return {
+            "b_convection_on": {"grid": (I, J), "units": ""},
+            "f_a": {"grid": (I, J, K), "units": ""},
+            "f_ap": {"grid": (I, J, K), "units": ""},
+            "f_aph": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_ccn": {"grid": (I, J, K), "units": ""},
+            "f_hrlw": {"grid": (I, J, K), "units": ""},
+            "f_hrsw": {"grid": (I, J, K), "units": ""},
+            "f_icrit_aer": {"grid": (I, J, K), "units": ""},
+            "f_lcrit_aer": {"grid": (I, J, K), "units": ""},
+            "f_lsm": {"grid": (I, J), "units": ""},
+            "f_lu": {"grid": (I, J, K), "units": ""},
+            "f_lude": {"grid": (I, J, K), "units": ""},
+            "f_mfd": {"grid": (I, J, K), "units": ""},
+            "f_mfu": {"grid": (I, J, K), "units": ""},
+            "f_nice": {"grid": (I, J, K), "units": ""},
+            "f_qi": {"grid": (I, J, K), "units": ""},
+            "f_ql": {"grid": (I, J, K), "units": ""},
+            "f_qr": {"grid": (I, J, K), "units": ""},
+            "f_qs": {"grid": (I, J, K), "units": ""},
+            "f_qv": {"grid": (I, J, K), "units": ""},
+            "f_re_ice": {"grid": (I, J, K), "units": ""},
+            "f_snde": {"grid": (I, J, K), "units": ""},
+            "f_supsat": {"grid": (I, J, K), "units": ""},
+            "f_t": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_a": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qi": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_ql": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qr": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qs": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qv": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_t": {"grid": (I, J, K), "units": ""},
+            "f_vfi": {"grid": (I, J, K), "units": ""},
+            "f_vfl": {"grid": (I, J, K), "units": ""},
+            "f_w": {"grid": (I, J, K), "units": ""},
+            "i_convection_type": {"grid": (I, J), "units": ""},
+        }
+
+    @cached_property
+    def _tendency_properties(self) -> PropertyDict:
+        # todo(stubbiali): sort out units
+        return {
+            "f_a": {"grid": (I, J, K), "units": "s^-1"},
+            "f_t": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qv": {"grid": (I, J, K), "units": "s^-1"},
+            "f_ql": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qi": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qr": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qs": {"grid": (I, J, K), "units": "s^-1"},
+        }
+
+    @cached_property
+    def _diagnostic_properties(self) -> PropertyDict:
+        # todo(stubbiali): sort out units
+        return {
+            "f_covptot": {"grid": (I, J, K), "units": ""},
+            "f_fcqlng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fcqnng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fcqrng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fcqsng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fhpsl": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fhpsn": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fplsl": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fplsn": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqif": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqitur": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqlf": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqltur": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqrf": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqsf": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_rainfrac_toprfz": {"grid": (I, J), "units": ""},
+        }
+
+    def array_call(
+        self,
+        state: StorageDict,
+        timestep: timedelta,
+        out_tendencies: StorageDict,
+        out_diagnostics: StorageDict,
+        overwrite_tendencies: Dict[str, bool],
+    ) -> None:
+        with managed_temporary_storage(
+            self.computational_grid,
+            *repeat(((I, J), "float"), 6),
+            ((I, J), "bool"),
+            ((K,), "int"),
+            gt4py_config=self.gt4py_config,
+        ) as (aph_s, cldtopdist, covpmax, covptot, paphd, trpaus, rainliq, klevel):
+            inputs = {
+                "in_" + name.split("_", maxsplit=1)[1]: state[name]
+                for name in self.input_properties
+            }
+            tendencies = {
+                "out_tnd_loc_" + name.split("_", maxsplit=1)[1]: out_tendencies[name]
+                for name in self.tendency_properties
+            }
+            diagnostics = {
+                "out_" + name.split("_", maxsplit=1)[1]: out_diagnostics[name]
+                for name in self.diagnostic_properties
+            }
+            temporaries = {
+                "tmp_aph_s": aph_s,
+                "tmp_cldtopdist": cldtopdist,
+                "tmp_covpmax": covpmax,
+                "tmp_covptot": covptot,
+                "tmp_klevel": klevel,
+                "tmp_paphd": paphd,
+                "tmp_rainliq": rainliq,
+                "tmp_trpaus": trpaus,
+            }
+            aph_s[...] = state["f_aph"][..., self.nlev]
+            assign(klevel, np.arange(self.nlev + 1))
+            self.cloudsc(
+                **inputs,
+                **tendencies,
+                **diagnostics,
+                **temporaries,
+                dt=timestep.total_seconds(),
+                origin=(0, 0, 0),
+                domain=self.computational_grid.grids[I, J, K - 1 / 2].shape,
+                validate_args=self.gt4py_config.validate_args,
+                exec_info=self.gt4py_config.exec_info,
+            )
diff --git a/src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py b/src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py
new file mode 100644
index 00000000..d29ea96d
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py
@@ -0,0 +1,318 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from functools import cached_property
+from itertools import repeat
+import numpy as np
+import sys
+from typing import TYPE_CHECKING
+
+from cloudsc4py.framework.components import ImplicitTendencyComponent
+from cloudsc4py.framework.grid import I, J, K
+from cloudsc4py.framework.storage import managed_temporary_storage
+from cloudsc4py.utils.numpyx import assign
+
+if TYPE_CHECKING:
+    from datetime import timedelta
+    from typing import Dict
+
+    from sympl._core.typingx import PropertyDict
+
+    from cloudsc4py.framework.config import GT4PyConfig
+    from cloudsc4py.framework.grid import ComputationalGrid
+    from cloudsc4py.utils.iox import (
+        YoecldpParameters,
+        YoethfParameters,
+        YomcstParameters,
+        YrecldpParameters,
+    )
+    from cloudsc4py.utils.typingx import StorageDict
+
+
+class Cloudsc(ImplicitTendencyComponent):
+    def __init__(
+        self,
+        computational_grid: ComputationalGrid,
+        yoecldp_parameters: YoecldpParameters,
+        yoethf_parameters: YoethfParameters,
+        yomcst_parameters: YomcstParameters,
+        yrecldp_parameters: YrecldpParameters,
+        *,
+        enable_checks: bool = True,
+        gt4py_config: GT4PyConfig,
+    ) -> None:
+        super().__init__(computational_grid, enable_checks=enable_checks, gt4py_config=gt4py_config)
+
+        self.nlev = self.computational_grid.grids[I, J, K].shape[2]
+        externals = {}
+        externals.update(yoecldp_parameters.dict())
+        externals.update(yoethf_parameters.dict())
+        externals.update(yomcst_parameters.dict())
+        externals.update(yrecldp_parameters.dict())
+        externals.update(
+            {
+                "DEPICE": 1,
+                "EPSEC": 1e-14,
+                "EPSILON": 100 * sys.float_info.epsilon,
+                "EVAPRAIN": 2,
+                "EVAPSNOW": 1,
+                "FALLQV": False,
+                "FALLQL": False,
+                "FALLQI": False,
+                "FALLQR": True,
+                "FALLQS": True,
+                "MELTQV": -99,
+                "MELTQL": yoecldp_parameters.NCLDQI,
+                "MELTQI": yoecldp_parameters.NCLDQR,
+                "MELTQR": yoecldp_parameters.NCLDQS,
+                "MELTQS": yoecldp_parameters.NCLDQR,
+                "NLEV": self.nlev,
+                "PHASEQV": 0,
+                "PHASEQL": 1,
+                "PHASEQI": 2,
+                "PHASEQR": 1,
+                "PHASEQS": 2,
+                "RDCP": yomcst_parameters.RD / yomcst_parameters.RCPD,
+                "RLDCP": 1 / (yoethf_parameters.RALSDCP - yoethf_parameters.RALVDCP),
+                "TW1": 1329.31,
+                "TW2": 0.0074615,
+                "TW3": 0.85e5,
+                "TW4": 40.637,
+                "TW5": 275.0,
+                "VQV": 0.0,
+                "VQL": 0.0,
+                "VQI": yrecldp_parameters.RVICE,
+                "VQR": yrecldp_parameters.RVRAIN,
+                "VQS": yrecldp_parameters.RVSNOW,
+                "WARMRAIN": 2,
+            }
+        )
+
+        self.cloudsc_tendencies = self.compile_stencil("cloudsc_tendencies", externals)
+        self.cloudsc_fluxes = self.compile_stencil("cloudsc_fluxes", externals)
+
+    @cached_property
+    def _input_properties(self) -> PropertyDict:
+        # todo(stubbiali): sort out units
+        return {
+            "b_convection_on": {"grid": (I, J), "units": ""},
+            "f_a": {"grid": (I, J, K), "units": ""},
+            "f_ap": {"grid": (I, J, K), "units": ""},
+            "f_aph": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_ccn": {"grid": (I, J, K), "units": ""},
+            "f_hrlw": {"grid": (I, J, K), "units": ""},
+            "f_hrsw": {"grid": (I, J, K), "units": ""},
+            "f_icrit_aer": {"grid": (I, J, K), "units": ""},
+            "f_lcrit_aer": {"grid": (I, J, K), "units": ""},
+            "f_lsm": {"grid": (I, J), "units": ""},
+            "f_lu": {"grid": (I, J, K), "units": ""},
+            "f_lude": {"grid": (I, J, K), "units": ""},
+            "f_mfd": {"grid": (I, J, K), "units": ""},
+            "f_mfu": {"grid": (I, J, K), "units": ""},
+            "f_nice": {"grid": (I, J, K), "units": ""},
+            "f_qi": {"grid": (I, J, K), "units": ""},
+            "f_ql": {"grid": (I, J, K), "units": ""},
+            "f_qr": {"grid": (I, J, K), "units": ""},
+            "f_qs": {"grid": (I, J, K), "units": ""},
+            "f_qv": {"grid": (I, J, K), "units": ""},
+            "f_re_ice": {"grid": (I, J, K), "units": ""},
+            "f_snde": {"grid": (I, J, K), "units": ""},
+            "f_supsat": {"grid": (I, J, K), "units": ""},
+            "f_t": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_a": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qi": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_ql": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qr": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qs": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_qv": {"grid": (I, J, K), "units": ""},
+            "f_tnd_tmp_t": {"grid": (I, J, K), "units": ""},
+            "f_vfi": {"grid": (I, J, K), "units": ""},
+            "f_vfl": {"grid": (I, J, K), "units": ""},
+            "f_w": {"grid": (I, J, K), "units": ""},
+            "i_convection_type": {"grid": (I, J), "units": ""},
+        }
+
+    @cached_property
+    def _tendency_properties(self) -> PropertyDict:
+        # todo(stubbiali): sort out units
+        return {
+            "f_a": {"grid": (I, J, K), "units": "s^-1"},
+            "f_t": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qv": {"grid": (I, J, K), "units": "s^-1"},
+            "f_ql": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qi": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qr": {"grid": (I, J, K), "units": "s^-1"},
+            "f_qs": {"grid": (I, J, K), "units": "s^-1"},
+        }
+
+    @cached_property
+    def _diagnostic_properties(self) -> PropertyDict:
+        # todo(stubbiali): sort out units
+        return {
+            "f_covptot": {"grid": (I, J, K), "units": ""},
+            "f_fcqlng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fcqnng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fcqrng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fcqsng": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fhpsl": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fhpsn": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fplsl": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fplsn": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqif": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqitur": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqlf": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqltur": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqrf": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_fsqsf": {"grid": (I, J, K - 1 / 2), "units": ""},
+            "f_rainfrac_toprfz": {"grid": (I, J), "units": ""},
+        }
+
+    def array_call(
+        self,
+        state: StorageDict,
+        timestep: timedelta,
+        out_tendencies: StorageDict,
+        out_diagnostics: StorageDict,
+        overwrite_tendencies: Dict[str, bool],
+    ) -> None:
+        with managed_temporary_storage(
+            self.computational_grid,
+            *repeat(((I, J), "float"), 6),
+            ((I, J), "bool"),
+            ((K,), "int"),
+            *repeat(((I, J, K), "float"), 18),
+            gt4py_config=self.gt4py_config,
+        ) as (
+            aph_s,
+            cldtopdist,
+            covpmax,
+            covptot,
+            paphd,
+            trpaus,
+            rainliq,
+            klevel,
+            foealfa,
+            lneg_qi,
+            lneg_ql,
+            lneg_qr,
+            lneg_qs,
+            lude,
+            pfplsi,
+            pfplsl,
+            pfplsr,
+            pfplss,
+            qi0,
+            qin,
+            ql0,
+            qln,
+            qr0,
+            qrn,
+            qs0,
+            qsn,
+        ):
+            inputs = {
+                "in_" + name.split("_", maxsplit=1)[1]: state[name]
+                for name in self.input_properties
+            }
+            tendencies = {
+                "out_tnd_loc_" + name.split("_", maxsplit=1)[1]: out_tendencies[name]
+                for name in self.tendency_properties
+            }
+            diagnostics = {
+                "out_" + name.split("_", maxsplit=1)[1]: out_diagnostics[name]
+                for name in self.diagnostic_properties
+            }
+            temporaries = {
+                "tmp_aph_s": aph_s,
+                "tmp_cldtopdist": cldtopdist,
+                "tmp_covpmax": covpmax,
+                "tmp_covptot": covptot,
+                "tmp_klevel": klevel,
+                "tmp_paphd": paphd,
+                "tmp_rainliq": rainliq,
+                "tmp_trpaus": trpaus,
+            }
+            aph_s[...] = state["f_aph"][..., self.nlev]
+            assign(klevel, np.arange(self.nlev + 1))
+
+            inputs1 = inputs.copy()
+            vfi = inputs1.pop("in_vfi")
+            vfl = inputs1.pop("in_vfl")
+            diagnostics1 = {
+                "out_covptot": diagnostics["out_covptot"],
+                "out_foealfa": foealfa,
+                "out_lneg_qi": lneg_qi,
+                "out_lneg_ql": lneg_ql,
+                "out_lneg_qr": lneg_qr,
+                "out_lneg_qs": lneg_qs,
+                "out_lude": lude,
+                "out_pfplsi": pfplsi,
+                "out_pfplsl": pfplsl,
+                "out_pfplsr": pfplsr,
+                "out_pfplss": pfplss,
+                "out_qi0": qi0,
+                "out_qin": qin,
+                "out_ql0": ql0,
+                "out_qln": qln,
+                "out_qr0": qr0,
+                "out_qrn": qrn,
+                "out_qs0": qs0,
+                "out_qsn": qsn,
+                "out_rainfrac_toprfz": diagnostics["out_rainfrac_toprfz"],
+            }
+            self.cloudsc_tendencies(
+                **inputs1,
+                **tendencies,
+                **diagnostics1,
+                **temporaries,
+                dt=timestep.total_seconds(),
+                origin=(0, 0, 0),
+                domain=self.computational_grid.grids[I, J, K].shape,
+                validate_args=self.gt4py_config.validate_args,
+                exec_info=self.gt4py_config.exec_info,
+            )
+
+            inputs2 = {
+                "in_aph": inputs["in_aph"],
+                "in_foealfa": foealfa,
+                "in_lneg_qi": lneg_qi,
+                "in_lneg_ql": lneg_ql,
+                "in_lneg_qr": lneg_qr,
+                "in_lneg_qs": lneg_qs,
+                "in_lude": lude,
+                "in_pfplsi": pfplsi,
+                "in_pfplsl": pfplsl,
+                "in_pfplsr": pfplsr,
+                "in_pfplss": pfplss,
+                "in_qi0": qi0,
+                "in_qin": qin,
+                "in_ql0": ql0,
+                "in_qln": qln,
+                "in_qr0": qr0,
+                "in_qrn": qrn,
+                "in_qs0": qs0,
+                "in_qsn": qsn,
+                "in_vfi": vfi,
+                "in_vfl": vfl,
+            }
+            outputs2 = diagnostics.copy()
+            outputs2.pop("out_covptot")
+            outputs2.pop("out_rainfrac_toprfz")
+            self.cloudsc_fluxes(
+                **inputs2,
+                **outputs2,
+                dt=timestep.total_seconds(),
+                origin=(0, 0, 0),
+                domain=self.computational_grid.grids[I, J, K - 1 / 2].shape,
+                validate_args=self.gt4py_config.validate_args,
+                exec_info=self.gt4py_config.exec_info,
+            )
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/__init__.py b/src/cloudsc_python/src/cloudsc4py/utils/__init__.py
new file mode 100644
index 00000000..95e3c8ad
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/f2py.py b/src/cloudsc_python/src/cloudsc4py/utils/f2py.py
new file mode 100644
index 00000000..f8ad6b9d
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/f2py.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Sequence
+    from typing import Optional, Union
+
+
+PORTED_OBJECTS = {}
+
+
+def ported_object(
+    handle: Optional[Callable] = None,
+    from_file: Optional[Union[str, Sequence[str]]] = None,
+    from_line: Optional[int] = None,
+    to_line: Optional[int] = None,
+) -> Callable:
+    if from_line is not None and to_line is not None:
+        assert from_line <= to_line
+
+    def core(obj):
+        PORTED_OBJECTS[obj.__name__] = obj
+        setattr(obj, "from_file", from_file)
+        setattr(obj, "from_line", from_line)
+        setattr(obj, "to_line", to_line)
+        return obj
+
+    if handle is not None:
+        return core(handle)
+    else:
+        return core
+
+
+# convenient aliases to improve readability
+ported_class = ported_object
+ported_function = ported_object
+ported_method = ported_object
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/iox.py b/src/cloudsc_python/src/cloudsc4py/utils/iox.py
new file mode 100644
index 00000000..be1efd68
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/iox.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from datetime import timedelta
+from functools import lru_cache
+import h5py
+import numpy as np
+from pydantic import BaseModel
+from typing import TYPE_CHECKING
+
+from cloudsc4py.utils.f2py import ported_method
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from typing import Optional, Type
+
+    from cloudsc4py.framework.config import DataTypes
+
+
+class YoecldpParameters(BaseModel):
+    NCLDQI: int
+    NCLDQL: int
+    NCLDQR: int
+    NCLDQS: int
+    NCLDQV: int
+    NCLV: int
+
+
+class YoethfParameters(BaseModel):
+    R2ES: float
+    R3IES: float
+    R3LES: float
+    R4IES: float
+    R4LES: float
+    R5ALSCP: float
+    R5ALVCP: float
+    R5IES: float
+    R5LES: float
+    RALFDCP: float
+    RALSDCP: float
+    RALVDCP: float
+    RKOOP1: float
+    RKOOP2: float
+    RTICE: float
+    RTICECU: float
+    RTWAT: float
+    RTWAT_RTICECU_R: float
+    RTWAT_RTICE_R: float
+
+
+class YomcstParameters(BaseModel):
+    RCPD: float
+    RD: float
+    RETV: float
+    RG: float
+    RLMLT: float
+    RLSTT: float
+    RLVTT: float
+    RTT: float
+    RV: float
+
+
+class YrecldpParameters(BaseModel):
+    LAERICEAUTO: bool
+    LAERICESED: bool
+    LAERLIQAUTOCP: bool
+    LAERLIQAUTOCPB: bool
+    LAERLIQAUTOLSP: bool
+    LAERLIQCOLL: bool
+    LCLDBUDGET: bool
+    LCLDEXTRA: bool
+    NAECLBC: int
+    NAECLDU: int
+    NAECLOM: int
+    NAECLSS: int
+    NAECLSU: int
+    NAERCLD: int
+    NBETA: int
+    NCLDDIAG: int
+    NCLDTOP: int
+    NSHAPEP: int
+    NSHAPEQ: int
+    NSSOPT: int
+    RAMID: float
+    RAMIN: float
+    RCCN: float
+    RCCNOM: float
+    RCCNSS: float
+    RCCNSU: float
+    RCLCRIT: float
+    RCLCRIT_LAND: float
+    RCLCRIT_SEA: float
+    RCLDIFF: float
+    RCLDIFF_CONVI: float
+    RCLDMAX: float
+    RCLDTOPCF: float
+    RCLDTOPP: float
+    RCL_AI: float
+    RCL_APB1: float
+    RCL_APB2: float
+    RCL_APB3: float
+    RCL_AR: float
+    RCL_AS: float
+    RCL_BI: float
+    RCL_BR: float
+    RCL_BS: float
+    RCL_CDENOM1: float
+    RCL_CDENOM2: float
+    RCL_CDENOM3: float
+    RCL_CI: float
+    RCL_CONST1I: float
+    RCL_CONST1R: float
+    RCL_CONST1S: float
+    RCL_CONST2I: float
+    RCL_CONST2R: float
+    RCL_CONST2S: float
+    RCL_CONST3I: float
+    RCL_CONST3R: float
+    RCL_CONST3S: float
+    RCL_CONST4I: float
+    RCL_CONST4R: float
+    RCL_CONST4S: float
+    RCL_CONST5I: float
+    RCL_CONST5R: float
+    RCL_CONST5S: float
+    RCL_CONST6I: float
+    RCL_CONST6R: float
+    RCL_CONST6S: float
+    RCL_CONST7S: float
+    RCL_CONST8S: float
+    RCL_CR: float
+    RCL_CS: float
+    RCL_DI: float
+    RCL_DR: float
+    RCL_DS: float
+    RCL_DYNVISC: float
+    RCL_FAC1: float
+    RCL_FAC2: float
+    RCL_FZRAB: float
+    RCL_FZRBB: float
+    RCL_KA273: float
+    RCL_KKAac: float
+    RCL_KKAau: float
+    RCL_KKBac: float
+    RCL_KKBaun: float
+    RCL_KKBauq: float
+    RCL_KK_cloud_num_land: float
+    RCL_KK_cloud_num_sea: float
+    RCL_SCHMIDT: float
+    RCL_X1I: float
+    RCL_X1R: float
+    RCL_X1S: float
+    RCL_X2I: float
+    RCL_X2R: float
+    RCL_X2S: float
+    RCL_X3I: float
+    RCL_X3S: float
+    RCL_X41: float
+    RCL_X4R: float
+    RCL_X4S: float
+    RCOVPMIN: float
+    RDENSREF: float
+    RDENSWAT: float
+    RDEPLIQREFDEPTH: float
+    RDEPLIQREFRATE: float
+    RICEHI1: float
+    RICEHI2: float
+    RICEINIT: float
+    RKCONV: float
+    RKOOPTAU: float
+    RLCRITSNOW: float
+    RLMIN: float
+    RNICE: float
+    RPECONS: float
+    RPRC1: float
+    RPRC2: float
+    RPRECRHMAX: float
+    RSNOWLIN1: float
+    RSNOWLIN2: float
+    RTAUMEL: float
+    RTHOMO: float
+    RVICE: float
+    RVRAIN: float
+    RVRFACTOR: float
+    RVSNOW: float
+
+
+class HDF5Reader:
+    f: h5py.File
+    data_types: DataTypes
+
+    def __init__(self, filename: str, data_types: DataTypes) -> None:
+        self.f = h5py.File(filename)
+        self.data_types = data_types
+
+    def __del__(self) -> None:
+        self.f.close()
+
+    def get_field(self, name: str) -> np.ndarray:
+        ds = self.f.get(name, None)
+        if ds is None:
+            raise RuntimeError(f"Unknown field `{name}`.")
+
+        if ds.ndim == 1:
+            return self._get_field_1d(ds, name)
+        elif ds.ndim == 2:
+            return self._get_field_2d(ds, name)
+        elif ds.ndim == 3:
+            return self._get_field_3d(ds, name)
+        else:
+            raise RuntimeError(f"The field `{name}` has unexpected shape {ds.shape}.")
+
+    @lru_cache
+    def get_nlev(self) -> int:
+        return self.f["KLEV"][0]
+
+    @lru_cache
+    def get_nlon(self) -> int:
+        return self.f["KLON"][0]
+
+    def get_timestep(self) -> timedelta:
+        return timedelta(seconds=self._get_parameter_f("PTSPHY"))
+
+    @ported_method(from_file="common/module/yoecldp.F90", from_line=86, to_line=91)
+    def get_yoecldp_parameters(self) -> YoecldpParameters:
+        return YoecldpParameters(
+            **{"NCLV": 5, "NCLDQL": 1, "NCLDQI": 2, "NCLDQR": 3, "NCLDQS": 4, "NCLDQV": 5}
+        )
+
+    @ported_method(from_file="common/module/yoethf.F90", from_line=79, to_line=99)
+    def get_yoethf_parameters(self) -> YoethfParameters:
+        return self._initialize_parameters(YoethfParameters)
+
+    @ported_method(from_file="common/module/yomcst.F90", from_line=167, to_line=177)
+    def get_yomcst_parameters(self) -> YomcstParameters:
+        return self._initialize_parameters(YomcstParameters)
+
+    @ported_method(from_file="common/module/yoecldp.F90", from_line=242, to_line=370)
+    def get_yrecldp_parameters(self) -> YrecldpParameters:
+        return self._initialize_parameters(
+            YrecldpParameters, get_parameter_name=lambda attr_name: "YRECLDP_" + attr_name
+        )
+
+    def _get_field_1d(self, ds: h5py.Dataset, name: str) -> np.ndarray:
+        nlon = self.get_nlon()
+        nlev = self.get_nlev()
+        if nlon <= ds.shape[0] <= nlon + 1 or nlev <= ds.shape[0] <= nlev + 1:
+            return ds[:]
+        else:
+            raise RuntimeError(
+                f"The field `{name}` is expected to have shape ({nlon}(+1),) or "
+                f"({nlev}(+1),), but has shape {ds.shape}."
+            )
+
+    def _get_field_2d(self, ds, name):
+        nlon = self.get_nlon()
+        nlev = self.get_nlev()
+        if nlon <= ds.shape[0] <= nlon + 1 and nlev <= ds.shape[1] <= nlev + 1:
+            return ds[...]
+        elif nlon <= ds.shape[1] <= nlon + 1 and nlev <= ds.shape[0] <= nlev + 1:
+            return np.transpose(ds[...])
+        else:
+            raise RuntimeError(
+                f"The field `{name}` is expected to have shape "
+                f"({nlon}(+1), {nlev}(+1)) or ({nlev}(+1), {nlon}(+1)), "
+                f"but has shape {ds.shape}."
+            )
+
+    def _get_field_3d(self, ds, name):
+        nlon = self.get_nlon()
+        nlev = self.get_nlev()
+
+        if nlon in ds.shape:
+            axes = [ds.shape.index(nlon)]
+        elif nlon + 1 in ds.shape:
+            axes = [ds.shape.index(nlon + 1)]
+        else:
+            raise RuntimeError(f"The field `{name}` has unexpected shape {ds.shape}.")
+
+        if nlev in ds.shape:
+            axes += [ds.shape.index(nlev)]
+        elif nlev + 1 in ds.shape:
+            axes += [ds.shape.index(nlev + 1)]
+        else:
+            raise RuntimeError(f"The field `{name}` has unexpected shape {ds.shape}.")
+
+        axes += tuple({0, 1, 2} - set(axes))
+
+        return np.transpose(ds[...], axes=axes)
+
+    def _initialize_parameters(
+        self,
+        parameter_cls: Type[BaseModel],
+        get_parameter_name: Optional[Callable[[str], str]] = None,
+    ):
+        init_dict = {}
+        for attr_name, metadata in parameter_cls.schema()["properties"].items():
+            param_name = (
+                get_parameter_name(attr_name) if get_parameter_name is not None else attr_name
+            )
+            param_type = metadata["type"]
+            if param_type == "boolean":
+                init_dict[attr_name] = self._get_parameter_b(param_name)
+            elif param_type == "number":
+                init_dict[attr_name] = self._get_parameter_f(param_name)
+            elif param_type == "integer":
+                init_dict[attr_name] = self._get_parameter_i(param_name)
+            else:
+                raise ValueError(f"Invalid parameter type `{param_type}`.")
+        return parameter_cls(**init_dict)
+
+    def _get_parameter_b(self, name: str) -> bool:
+        return self.data_types.bool(self.f.get(name, [True])[0])
+
+    def _get_parameter_f(self, name: str) -> float:
+        return self.data_types.float(self.f.get(name, [0.0])[0])
+
+    def _get_parameter_i(self, name: str) -> int:
+        return self.data_types.int(self.f.get(name, [0])[0])
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/numpyx.py b/src/cloudsc_python/src/cloudsc4py/utils/numpyx.py
new file mode 100644
index 00000000..0e263962
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/numpyx.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+import numpy as np
+from typing import TYPE_CHECKING
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = np
+
+if TYPE_CHECKING:
+    from cloudsc4py.utils.typingx import Storage
+
+
+def to_numpy(storage: Storage) -> np.ndarray:
+    try:
+        return storage.get()
+    except AttributeError:
+        return storage
+
+
+def assign(lhs: Storage, rhs: Storage) -> None:
+    if isinstance(lhs, cp.ndarray) and isinstance(rhs, np.ndarray):
+        lhs[...] = cp.asarray(rhs)
+    elif isinstance(lhs, np.ndarray) and isinstance(rhs, cp.ndarray):
+        lhs[...] = rhs.get()
+    else:
+        lhs[...] = rhs
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/timing.py b/src/cloudsc_python/src/cloudsc4py/utils/timing.py
new file mode 100644
index 00000000..65d3cd9a
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/timing.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
+from sympl._core.time import Timer
+
+if TYPE_CHECKING:
+    from typing import Type
+
+
+class timing:
+    def __init__(self, label: str) -> None:
+        self.label = label
+
+    def __enter__(self) -> Type[Timer]:
+        Timer.start(self.label)
+        return Timer
+
+    def __exit__(self, exc_type, exc_value, exc_tb) -> None:
+        Timer.stop()
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/typingx.py b/src/cloudsc_python/src/cloudsc4py/utils/typingx.py
new file mode 100644
index 00000000..a0a2cfdc
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/typingx.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import numpy as np
+from typing import Dict, TypeVar, Union
+
+from sympl import DataArray as SymplDataArray
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = np
+
+
+DataArray = SymplDataArray
+DataArrayDict = Dict[str, DataArray]
+ParameterDict = Dict[str, Union[bool, float, int]]
+Storage = Union[np.ndarray, cp.ndarray]
+StorageDict = Dict[str, Storage]
+Range = TypeVar("Range")
diff --git a/src/cloudsc_python/src/cloudsc4py/utils/validation.py b/src/cloudsc_python/src/cloudsc4py/utils/validation.py
new file mode 100644
index 00000000..ab8b7aa1
--- /dev/null
+++ b/src/cloudsc_python/src/cloudsc4py/utils/validation.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+
+# (C) Copyright 2018- ECMWF.
+# (C) Copyright 2022- ETH Zurich.
+
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from __future__ import annotations
+import numpy as np
+from typing import TYPE_CHECKING
+
+from cloudsc4py.utils.numpyx import to_numpy
+
+if TYPE_CHECKING:
+    from typing import Tuple
+
+    from sympl._core.data_array import DataArray
+    from sympl._core.typingx import DataArrayDict
+
+    from cloudsc4py.utils.typingx import Storage
+
+
+def validate_storage_2d(src: Storage, trg: Storage) -> bool:
+    src_np = to_numpy(src)
+    trg_np = to_numpy(trg)
+    mi = min(src_np.shape[0], trg_np.shape[0])
+    mj = min(src_np.shape[1], trg_np.shape[1])
+    return np.allclose(src_np[:mi, :mj], trg_np[:mi, :mj], atol=1e-18, rtol=1e-12)
+
+
+def validate_storage_3d(src: Storage, trg: Storage) -> bool:
+    src_np = to_numpy(src)
+    trg_np = to_numpy(trg)
+    mi = min(src_np.shape[0], trg_np.shape[0])
+    mj = min(src_np.shape[1], trg_np.shape[1])
+    mk = min(src_np.shape[2], trg_np.shape[2])
+    return np.allclose(src_np[:mi, :mj, :mk], trg_np[:mi, :mj, :mk], atol=1e-18, rtol=1e-12)
+
+
+def validate_field(src: DataArray, trg: DataArray) -> bool:
+    if src.ndim == 2:
+        return validate_storage_2d(src.data, trg.data)
+    elif src.ndim == 3:
+        return validate_storage_3d(src.data, trg.data)
+    else:
+        raise ValueError("The field to validate must be either 2-d or 3-d.")
+
+
+def validate(src: DataArrayDict, trg: DataArrayDict) -> Tuple[str]:
+    return tuple(
+        name
+        for name in src
+        if name in trg and name != "time" and not validate_field(src[name], trg[name])
+    )
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index d2aaff84..14395f6e 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -8,98 +8,87 @@
 
 ecbuild_enable_fortran(REQUIRED MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/module)
 
-list(APPEND CLOUDSC_COMMON_SOURCES
-  module/parkind1.F90
-  module/yoecldp.F90
-  module/yomcst.F90
-  module/yoethf.F90
-  module/yoephli.F90
-  module/yomphyder.F90
-  module/fcttre_mod.F90
-  module/fccld_mod.F90
-  module/routines.F90
-  module/abor1.F90
-  module/timer_mod.F90
-  module/mycpu.c
-  module/ec_pmon_mod.F90
-  module/file_io_mod.F90
-  module/expand_mod.F90
-  module/validate_mod.F90
-  module/cloudsc_global_state_mod.F90
-  module/cloudsc_mpi_mod.F90
-)
-
 
-ecbuild_add_library( TARGET cloudsc-common-lib
-  SOURCES ${CLOUDSC_COMMON_SOURCES}
-  TYPE SHARED
-  DEFINITIONS ${CLOUDSC_DEFINITIONS}
+list(APPEND CLOUDSC_COMMON_SOURCES
+    module/parkind1.F90
+    module/yoecldp.F90
+    module/yomcst.F90
+    module/yoethf.F90
+    module/yoephli.F90
+    module/yomphyder.F90
+    module/routines.F90
+    module/abor1.F90
+    module/timer_mod.F90
+    module/mycpu.c
+    module/ec_pmon_mod.F90
+    module/file_io_mod.F90
+    module/expand_mod.F90
+    module/validate_mod.F90
+    module/cloudsc_global_state_mod.F90
+    module/cloudsc_mpi_mod.F90
 )
-target_include_directories( cloudsc-common-lib PUBLIC $<INSTALL_INTERFACE:include> )
-target_include_directories( cloudsc-common-lib PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
-target_include_directories( cloudsc-common-lib PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/module>)
 
-if( HAVE_OMP AND TARGET OpenMP::OpenMP_Fortran )
-  target_link_libraries( cloudsc-common-lib PRIVATE OpenMP::OpenMP_Fortran )
+if(NOT ENABLE_USE_STMT_FUNC)
+    list(APPEND CLOUDSC_COMMON_SOURCES
+        module/fcttre_mod.F90
+        module/fccld_mod.F90
+    )
 endif()
 
-if( HAVE_MPI AND TARGET MPI::MPI_Fortran )
-  target_sources( cloudsc-common-lib PRIVATE module/cloudsc_mpif.F90 )
-  target_link_libraries( cloudsc-common-lib PRIVATE MPI::MPI_Fortran )
-endif()
-
-if ( HAVE_HDF5 )
-  target_sources( cloudsc-common-lib PRIVATE module/hdf5_file_mod.F90 )
-  target_include_directories( cloudsc-common-lib PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-  target_link_libraries( cloudsc-common-lib PRIVATE ${HDF5_LIBRARIES} )
-endif()
-
-if( HAVE_ACC AND TARGET OpenACC::OpenACC_Fortran )
-  target_link_libraries( cloudsc-common-lib PRIVATE OpenACC::OpenACC_Fortran )
-endif()
+list(APPEND CLOUDSC_CUDA_SOURCES
+    module/fccld_mod.cuf.F90
+    module/fcttre_mod.cuf.F90
+    module/yoethf.cuf.F90
+    module/yomcst.cuf.F90
+)
 
-if( TARGET Serialbox::Serialbox_Fortran )
-  target_link_libraries( cloudsc-common-lib PRIVATE Serialbox::Serialbox_Fortran )
+if( HAVE_CUDA )
+    # ========================================================================
+    # Compile CUDA fortran files with -MCuda.
+    #
+    # This is necessary since CMake's CUDA languages does not natively
+    # understand CUDA-Fortran (.cuf) yet. So we simply emulate .cuf with
+    # .cuf.F90 files and force the compile flags here.
+    # ========================================================================
+    cloudsc_add_compile_options(
+        SOURCES ${CLOUDSC_CUDA_SOURCES}
+        FLAGS "-Mcuda=maxregcount:128"
+    )
+
+    # Add CUDA-specific flags to the library if enabled
+    list(APPEND CLOUDSC_COMMON_SOURCES ${CLOUDSC_CUDA_SOURCES} )
 endif()
 
 
 if( HAVE_ACC )
-  # ========================================================================
-  # For OpenACC runs we can ensure global module variables from our utility
-  # modules are copied to devices via `!$acc declare copyin(<module_vars>)`
-  # However, this requries static linking as per this thread:
-  # https://www.pgroup.com/userforum/viewtopic.php?t=7296
-  # ========================================================================
+    # ========================================================================
+    # For OpenACC runs we can ensure global module variables from our utility
+    # modules are copied to devices via `!$acc declare copyin(<module_vars>)`
+    # However, this requries static linking as per this thread:
+    # https://www.pgroup.com/userforum/viewtopic.php?t=7296
+    # ========================================================================
+    set( LIBRARY_TYPE STATIC )
+else()
+    set( LIBRARY_TYPE SHARED )
+endif()
 
-  ecbuild_add_library( TARGET cloudsc-common-lib-static
-    SOURCES ${CLOUDSC_COMMON_SOURCES}
-    TYPE STATIC
+ecbuild_add_library( TARGET cloudsc-common-lib
+    TYPE ${LIBRARY_TYPE}
     DEFINITIONS ${CLOUDSC_DEFINITIONS}
-  )
-  target_include_directories( cloudsc-common-lib-static PUBLIC $<INSTALL_INTERFACE:include> )
-  target_include_directories( cloudsc-common-lib-static PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
-  target_include_directories( cloudsc-common-lib-static PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/module>)
-
-  if( HAVE_OMP AND TARGET OpenMP::OpenMP_Fortran )
-    target_link_libraries( cloudsc-common-lib-static PRIVATE OpenMP::OpenMP_Fortran )
-  endif()
-
-  if( HAVE_MPI AND TARGET MPI::MPI_Fortran )
-    target_sources( cloudsc-common-lib-static PRIVATE module/cloudsc_mpif.F90 )
-    target_link_libraries( cloudsc-common-lib-static PRIVATE MPI::MPI_Fortran )
-  endif()
-
-  if ( HAVE_HDF5 )
-    target_sources( cloudsc-common-lib-static PRIVATE module/hdf5_file_mod.F90 )
-    target_include_directories( cloudsc-common-lib-static PRIVATE ${HDF5_Fortran_INCLUDE_DIRS} )
-    target_link_libraries( cloudsc-common-lib-static PRIVATE ${HDF5_LIBRARIES} )
-  endif()
-
-  if( HAVE_ACC AND TARGET OpenACC::OpenACC_Fortran )
-    target_link_libraries( cloudsc-common-lib-static PRIVATE OpenACC::OpenACC_Fortran )
-  endif()
-
-  if( TARGET Serialbox::Serialbox_Fortran )
-    target_link_libraries( cloudsc-common-lib-static PRIVATE Serialbox::Serialbox_Fortran )
-  endif()
-endif()
+    SOURCES
+        ${CLOUDSC_COMMON_SOURCES}
+        $<${HAVE_MPI}:module/cloudsc_mpif.F90>
+        $<${HAVE_HDF5}:module/hdf5_file_mod.F90>
+    PRIVATE_INCLUDES
+        $<${HAVE_HDF5}:${HDF5_Fortran_INCLUDE_DIRS}>
+    PUBLIC_INCLUDES
+        $<INSTALL_INTERFACE:include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/module>
+    PUBLIC_LIBS
+        $<${HAVE_ACC}:OpenACC::OpenACC_Fortran>
+        $<${HAVE_OMP}:OpenMP::OpenMP_Fortran>
+        $<${HAVE_MPI}:MPI::MPI_Fortran>
+        $<${HAVE_HDF5}:hdf5::hdf5_fortran>
+        $<${HAVE_SERIALBOX}:Serialbox::Serialbox_Fortran>
+)
diff --git a/src/common/module/expand_mod.F90 b/src/common/module/expand_mod.F90
index 6ff13169..109dd169 100644
--- a/src/common/module/expand_mod.F90
+++ b/src/common/module/expand_mod.F90
@@ -181,8 +181,8 @@ subroutine expand_l1(buffer, field, nlon, nproma, ngptot, nblocks)
        bsize = min(nproma, ngptot - gidx + 1)  ! Size of the field block
 
        ! First read, might not be aligned
-       bidx = mod(gidx,nlon)
-       bend = min(nlon, mod(gidx, nlon)+bsize-1)
+       bidx = mod(gidx-1,nlon)+1
+       bend = min(nlon,bidx+bsize-1)
        fidx = 1
        fend = bend - bidx + 1
        field(fidx:fend,b) = buffer(bidx:bend)
@@ -207,14 +207,14 @@ subroutine expand_i1(buffer, field, nlon, nproma, ngptot, nblocks)
     integer(kind=jpim), intent(in) :: nlon, nproma, ngptot, nblocks
     integer :: b, gidx, bsize, fidx, fend, bidx, bend
 
-!$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime) 
+!$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime)
     do b=1, nblocks
        gidx = (b-1)*nproma + 1  ! Global starting index of the block in the general domain
        bsize = min(nproma, ngptot - gidx + 1)  ! Size of the field block
 
        ! First read, might not be aligned
-       bidx = mod(gidx,nlon)
-       bend = min(nlon, mod(gidx, nlon)+bsize-1)
+       bidx = mod(gidx-1,nlon)+1
+       bend = min(nlon,bidx+bsize-1)
        fidx = 1
        fend = bend - bidx + 1
        field(fidx:fend,b) = buffer(bidx:bend)
@@ -240,14 +240,14 @@ subroutine expand_r1(buffer, field, nlon, nproma, ngptot, nblocks)
     integer(kind=jpim), intent(in) :: nlon, nproma, ngptot, nblocks
     integer :: b, gidx, bsize, fidx, fend, bidx, bend
 
-!$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime) 
+!$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime)
     do b=1, nblocks
        gidx = (b-1)*nproma + 1  ! Global starting index of the block in the general domain
        bsize = min(nproma, ngptot - gidx + 1)  ! Size of the field block
 
        ! First read, might not be aligned
-       bidx = mod(gidx,nlon)
-       bend = min(nlon, mod(gidx, nlon)+bsize-1)
+       bidx = mod(gidx-1,nlon)+1
+       bend = min(nlon,bidx+bsize-1)
        fidx = 1
        fend = bend - bidx + 1
        field(fidx:fend,b) = buffer(bidx:bend)
@@ -264,7 +264,7 @@ subroutine expand_r1(buffer, field, nlon, nproma, ngptot, nblocks)
        ! Zero out the remainder of last block
        field(bsize+1:nproma,b) = 0.0_JPRB
     end do
-!$omp end parallel do    
+!$omp end parallel do
   end subroutine expand_r1
 
   subroutine expand_r2(buffer, field, nlon, nproma, nlev, ngptot, nblocks)
@@ -280,8 +280,8 @@ subroutine expand_r2(buffer, field, nlon, nproma, nlev, ngptot, nblocks)
        bsize = min(nproma, ngptot - gidx + 1)  ! Size of the field block
 
        ! First read, might not be aligned
-       bidx = mod(gidx,nlon)
-       bend = min(nlon, mod(gidx, nlon)+bsize-1)
+       bidx = mod(gidx-1,nlon)+1
+       bend = min(nlon,bidx+bsize-1)
        fidx = 1
        fend = bend - bidx + 1
        field(fidx:fend,:,b) = buffer(bidx:bend,:)
@@ -307,14 +307,14 @@ subroutine expand_r3(buffer, field, nlon, nproma, nlev, ndim, ngptot, nblocks)
     integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks
     integer :: b, gidx, bsize, fidx, fend, bidx, bend
 
-!$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime) 
+!$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime)
     do b=1, nblocks
        gidx = (b-1)*nproma + 1  ! Global starting index of the block in the general domain
        bsize = min(nproma, ngptot - gidx + 1)  ! Size of the field block
 
        ! First read, might not be aligned
-       bidx = mod(gidx,nlon)
-       bend = min(nlon, mod(gidx, nlon)+bsize-1)
+       bidx = mod(gidx-1,nlon)+1
+       bend = min(nlon,bidx+bsize-1)
        fidx = 1
        fend = bend - bidx + 1
        field(fidx:fend,:,:,b) = buffer(bidx:bend,:,:)
diff --git a/src/common/module/fccld_mod.F90 b/src/common/module/fccld_mod.F90
index 09c69a22..f0b76d64 100644
--- a/src/common/module/fccld_mod.F90
+++ b/src/common/module/fccld_mod.F90
@@ -34,7 +34,6 @@ MODULE FCCLD_MOD
   PURE ELEMENTAL FUNCTION FOKOOP(PTARE)
     REAL(KIND=JPRB) :: FOKOOP
     REAL(KIND=JPRB), VALUE :: PTARE
-    !$acc routine seq
 
     FOKOOP = MIN(RKOOP1-RKOOP2*PTARE,FOEELIQ(PTARE)/FOEEICE(PTARE))
   END FUNCTION FOKOOP
diff --git a/src/common/module/fccld_mod.cuf.F90 b/src/common/module/fccld_mod.cuf.F90
new file mode 100644
index 00000000..9522d2e7
--- /dev/null
+++ b/src/common/module/fccld_mod.cuf.F90
@@ -0,0 +1,42 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+
+MODULE FCCLD_CUF_MOD
+  !*
+  !     ------------------------------------------------------------------
+  !     This COMDECK defines functions to be used in the cloud scheme
+  !       other than the standard saturation vapour pressure
+  !
+  !       FKOOP modifies the ice saturation mixing ratio for homogeneous 
+  !       nucleation
+  !
+  !     note: PTARE is temperature and is definited in frttre.h 
+  !           which MUST be included before this function block
+  !
+  !     **********************************************
+  !     KOOP formula for homogeneous nucleation of ice 
+  !     **********************************************
+  !
+  !               INPUT : PTARE = TEMPERATURE 
+  USE PARKIND1,       ONLY : JPIM, JPRB
+  USE YOETHF_CUF,     RKOOP1=>RKOOP1_D, RKOOP2=>RKOOP2_D
+  USE FCTTRE_CUF_MOD, ONLY : FOEELIQ, FOEEICE
+
+  IMPLICIT NONE
+  CONTAINS
+
+  ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOKOOP(PTARE)
+    REAL(KIND=JPRB) :: FOKOOP
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOKOOP = MIN(RKOOP1-RKOOP2*PTARE,FOEELIQ(PTARE)/FOEEICE(PTARE))
+  END FUNCTION FOKOOP
+
+END MODULE FCCLD_CUF_MOD
diff --git a/src/common/module/fcttre_mod.F90 b/src/common/module/fcttre_mod.F90
index cbf74835..81c0c1cb 100644
--- a/src/common/module/fcttre_mod.F90
+++ b/src/common/module/fcttre_mod.F90
@@ -284,9 +284,9 @@ PURE ELEMENTAL FUNCTION FOELSON(PTARE)
     !$acc routine seq
 
     FOELSON = EXP( -6096.9385_JPRB/PTARE + 21.2409642_JPRB &
-	             - 2.711193E-2_JPRB * PTARE    &
+                     - 2.711193E-2_JPRB * PTARE    &
                      + 1.673952E-5_JPRB * PTARE**2 &
-		     + 2.433502_JPRB * LOG(PTARE))
+                     + 2.433502_JPRB * LOG(PTARE))
   END FUNCTION FOELSON
 
   PURE ELEMENTAL FUNCTION FOELES_V(PTARE)
diff --git a/src/common/module/fcttre_mod.cuf.F90 b/src/common/module/fcttre_mod.cuf.F90
new file mode 100644
index 00000000..3b59eb1d
--- /dev/null
+++ b/src/common/module/fcttre_mod.cuf.F90
@@ -0,0 +1,302 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+
+MODULE FCTTRE_CUF_MOD
+!     This COMDECK includes the Thermodynamical functions for the cy39
+!       ECMWF Physics package.
+!       Consistent with YOMCST Basic physics constants, assuming the
+!       partial pressure of water vapour is given by a first order
+!       Taylor expansion of Qs(T) w.r.t. to Temperature, using constants
+!       in YOETHF
+!       Two sets of functions are available. In the first set only the
+!       cases water or ice are distinguished by temperature.  This set 
+!       consists of the functions FOEDELTA,FOEEW,FOEDE and FOELH.
+!       The second set considers, besides the two cases water and ice 
+!       also a mix of both for the temperature range RTICE < T < RTWAT.
+!       This set contains FOEALFA,FOEEWM,FOEDEM,FOELDCPM and FOELHM.
+!       FKOOP modifies the ice saturation mixing ratio for homogeneous 
+!       nucleation. FOE_DEWM_DT provides an approximate first derivative
+!       of FOEEWM.
+
+!       Depending on the consideration of mixed phases either the first 
+!       set (e.g. surface, post-processing) or the second set 
+!       (e.g. clouds, condensation, convection) should be used.
+
+!     ------------------------------------------------------------------
+
+  USE PARKIND1, ONLY : JPIM, JPRB
+
+  USE YOMCST_CUF, ONLY: RG=>RG_D, RD=>RD_D, RCPD=>RCPD_D, RETV=>RETV_D, &
+   & RLVTT=>RLVTT_D, RLSTT=>RLSTT_D, RLMLT=>RLMLT_D, RTT=>RTT_D, RV=>RV_D
+  USE YOETHF_CUF, ONLY : &
+   & R2ES=>R2ES_D, R3LES=>R3LES_D, R3IES=>R3IES_D, R4LES=>R4LES_D,  &
+   & R4IES=>R4IES_D, R5LES=>R5LES_D, R5IES=>R5IES_D,  &
+   & R5ALVCP=>R5ALVCP_D, R5ALSCP=>R5ALSCP_D, RALVDCP=>RALVDCP_D,  &
+   & RALSDCP=>RALSDCP_D, RALFDCP=>RALFDCP_D, RTWAT=>RTWAT_D, RTICE=>RTICE_D, RTICECU=>RTICECU_D,  &
+   & RTWAT_RTICE_R=>RTWAT_RTICE_R_D, RTWAT_RTICECU_R=>RTWAT_RTICECU_R_D, RKOOP1=>RKOOP1_D, RKOOP2=>RKOOP2 
+
+  IMPLICIT NONE
+  CONTAINS
+
+  !     *****************************************************************
+
+  !                NO CONSIDERATION OF MIXED PHASES
+
+  !     *****************************************************************
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEDELTA(PTARE)
+    REAL(KIND=JPRB) :: FOEDELTA
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEDELTA = MAX (0.0_JPRB,SIGN(1.0_JPRB,PTARE-RTT))
+  END FUNCTION FOEDELTA
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEW(PTARE)
+    REAL(KIND=JPRB) :: FOEEW
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEEW = R2ES*EXP (&
+     &(R3LES*FOEDELTA(PTARE)+R3IES*(1.0_JPRB-FOEDELTA(PTARE)))*(PTARE-RTT)&
+     &/ (PTARE-(R4LES*FOEDELTA(PTARE)+R4IES*(1.0_JPRB-FOEDELTA(PTARE)))))
+  END FUNCTION FOEEW
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEDE(PTARE)
+    REAL(KIND=JPRB) :: FOEDE
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEDE = (FOEDELTA(PTARE)*R5ALVCP+(1.0_JPRB-FOEDELTA(PTARE))*R5ALSCP)&
+     &/ (PTARE-(R4LES*FOEDELTA(PTARE)+R4IES*(1.0_JPRB-FOEDELTA(PTARE))))**2
+  END FUNCTION FOEDE
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEDESU(PTARE)
+    REAL(KIND=JPRB) :: FOEDESU
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEDESU = (FOEDELTA(PTARE)*R5LES+(1.0_JPRB-FOEDELTA(PTARE))*R5IES)&
+     &/ (PTARE-(R4LES*FOEDELTA(PTARE)+R4IES*(1.0_JPRB-FOEDELTA(PTARE))))**2
+  END FUNCTION FOEDESU
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELH(PTARE)
+    REAL(KIND=JPRB) :: FOELH
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELH = FOEDELTA(PTARE)*RLVTT + (1.0_JPRB-FOEDELTA(PTARE))*RLSTT
+  END FUNCTION FOELH
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELDCP(PTARE)
+    REAL(KIND=JPRB) :: FOELDCP
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELDCP = FOEDELTA(PTARE)*RALVDCP + (1.0_JPRB-FOEDELTA(PTARE))*RALSDCP
+  END FUNCTION FOELDCP
+
+  !     *****************************************************************
+
+  !           CONSIDERATION OF MIXED PHASES
+
+  !     *****************************************************************
+
+  !     FOEALFA is calculated to distinguish the three cases:
+
+  !                       FOEALFA=1            water phase
+  !                       FOEALFA=0            ice phase
+  !                       0 < FOEALFA < 1      mixed phase
+
+  !               INPUT : PTARE = TEMPERATURE
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEALFA(PTARE)
+    REAL(KIND=JPRB) :: FOEALFA
+    REAL(KIND=JPRB), VALUE  :: PTARE
+
+    FOEALFA = MIN(1.0_JPRB,((MAX(RTICE,MIN(RTWAT,PTARE))-RTICE)&
+     &*RTWAT_RTICE_R)**2) 
+  END FUNCTION FOEALFA
+
+  !     Pressure of water vapour at saturation
+  !        INPUT : PTARE = TEMPERATURE
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEWM(PTARE)
+    REAL(KIND=JPRB) :: FOEEWM
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEEWM = R2ES *&
+     &(FOEALFA(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))+&
+     &(1.0_JPRB-FOEALFA(PTARE))*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES)))
+  END FUNCTION FOEEWM
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOE_DEWM_DT(PTARE)
+    REAL(KIND=JPRB) :: FOE_DEWM_DT
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOE_DEWM_DT = R2ES * ( &
+     & R3LES*FOEALFA(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES)) &
+     &    *(RTT-R4LES)/(PTARE-R4LES)**2 + &
+     & R3IES*(1.0-FOEALFA(PTARE))*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES)) &
+     &    *(RTT-R4IES)/(PTARE-R4IES)**2)
+  END FUNCTION FOE_DEWM_DT
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEDEM(PTARE)
+    REAL(KIND=JPRB) :: FOEDEM
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEDEM = FOEALFA(PTARE)*R5ALVCP*(1.0_JPRB/(PTARE-R4LES)**2)+&
+             &(1.0_JPRB-FOEALFA(PTARE))*R5ALSCP*(1.0_JPRB/(PTARE-R4IES)**2)
+  END FUNCTION FOEDEM
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELDCPM(PTARE)
+    REAL(KIND=JPRB) :: FOELDCPM
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELDCPM = FOEALFA(PTARE)*RALVDCP+(1.0_JPRB-FOEALFA(PTARE))*RALSDCP
+  END FUNCTION FOELDCPM
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELHM(PTARE)
+    REAL(KIND=JPRB) :: FOELHM
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELHM = FOEALFA(PTARE)*RLVTT+(1.0_JPRB-FOEALFA(PTARE))*RLSTT
+  END FUNCTION FOELHM
+
+  !     Temperature normalization for humidity background change of variable
+  !        INPUT : PTARE = TEMPERATURE
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOETB(PTARE)
+    REAL(KIND=JPRB) :: FOETB
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOETB = FOEALFA(PTARE)*R3LES*(RTT-R4LES)*(1.0_JPRB/(PTARE-R4LES)**2)+&
+     &(1.0_JPRB-FOEALFA(PTARE))*R3IES*(RTT-R4IES)*(1.0_JPRB/(PTARE-R4IES)**2)
+  END FUNCTION FOETB
+
+  !     ------------------------------------------------------------------
+  !     *****************************************************************
+
+  !           CONSIDERATION OF DIFFERENT MIXED PHASE FOR CONV
+
+  !     *****************************************************************
+
+  !     FOEALFCU is calculated to distinguish the three cases:
+
+  !                       FOEALFCU=1            water phase
+  !                       FOEALFCU=0            ice phase
+  !                       0 < FOEALFCU < 1      mixed phase
+
+  !               INPUT : PTARE = TEMPERATURE
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEALFCU(PTARE)
+    REAL(KIND=JPRB) :: FOEALFCU
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEALFCU = MIN(1.0_JPRB,((MAX(RTICECU,MIN(RTWAT,PTARE))&
+     &-RTICECU)*RTWAT_RTICECU_R)**2) 
+  END FUNCTION FOEALFCU
+
+  !     Pressure of water vapour at saturation
+  !        INPUT : PTARE = TEMPERATURE
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEWMCU(PTARE)
+    REAL(KIND=JPRB) :: FOEEWMCU
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEEWMCU = R2ES *&
+     &(FOEALFCU(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))+&
+     &(1.0_JPRB-FOEALFCU(PTARE))*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES)))
+  END FUNCTION FOEEWMCU
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEDEMCU(PTARE)
+    REAL(KIND=JPRB) :: FOEDEMCU
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEDEMCU = FOEALFCU(PTARE)*R5ALVCP*(1.0_JPRB/(PTARE-R4LES)**2)+&
+     &(1.0_JPRB-FOEALFCU(PTARE))*R5ALSCP*(1.0_JPRB/(PTARE-R4IES)**2)
+  END FUNCTION FOEDEMCU
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELDCPMCU(PTARE)
+    REAL(KIND=JPRB) :: FOELDCPMCU
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELDCPMCU = FOEALFCU(PTARE)*RALVDCP+(1.0_JPRB-FOEALFCU(PTARE))*RALSDCP
+  END FUNCTION FOELDCPMCU
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELHMCU(PTARE)
+    REAL(KIND=JPRB) :: FOELHMCU
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELHMCU = FOEALFCU(PTARE)*RLVTT+(1.0_JPRB-FOEALFCU(PTARE))*RLSTT
+  END FUNCTION FOELHMCU
+
+  !     ------------------------------------------------------------------
+
+  !     Pressure of water vapour at saturation
+  !     This one is for the WMO definition of saturation, i.e. always
+  !     with respect to water.
+  !     
+  !     Duplicate to FOEELIQ and FOEEICE for separate ice variable
+  !     FOEELIQ always respect to water 
+  !     FOEEICE always respect to ice 
+  !     (could use FOEEW and FOEEWMO, but naming convention unclear)
+  !     FOELSON returns e wrt liquid water using D Sonntag (1994, Met. Zeit.)
+  !      - now recommended for use with radiosonde data (WMO CIMO guide, 2014)
+  !      unlike the FOEE functions does not include 1/(RETV+1.0_JPRB) factor
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEWMO(PTARE)
+    REAL(KIND=JPRB) :: FOEEWMO
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEEWMO = R2ES*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))
+  END FUNCTION FOEEWMO
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEELIQ(PTARE)
+    REAL(KIND=JPRB) :: FOEELIQ
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEELIQ = R2ES*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))
+  END FUNCTION FOEELIQ
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEICE(PTARE)
+    REAL(KIND=JPRB) :: FOEEICE
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEEICE = R2ES*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES))
+  END FUNCTION FOEEICE
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELSON(PTARE)
+    REAL(KIND=JPRB) :: FOELSON
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELSON = EXP( -6096.9385_JPRB/PTARE + 21.2409642_JPRB &
+                     - 2.711193E-2_JPRB * PTARE    &
+                     + 1.673952E-5_JPRB * PTARE**2 &
+                     + 2.433502_JPRB * LOG(PTARE))
+  END FUNCTION FOELSON
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOELES_V(PTARE)
+    REAL(KIND=JPRB) :: FOELES_V
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOELES_V=R3LES*(PTARE-RTT)/(PTARE-R4LES)
+  END FUNCTION FOELES_V
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEIES_V(PTARE)
+    REAL(KIND=JPRB) :: FOEIES_V
+    REAL(KIND=JPRB), VALUE :: PTARE
+
+    FOEIES_V=R3IES*(PTARE-RTT)/(PTARE-R4IES)
+  END FUNCTION FOEIES_V
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEWM_V(PTARE,EXP1,EXP2)
+    REAL(KIND=JPRB) :: FOEEWM_V
+    REAL(KIND=JPRB), VALUE :: PTARE, EXP1, EXP2
+
+    FOEEWM_V=R2ES*(FOEALFA(PTARE)*EXP1+(1.0_JPRB-FOEALFA(PTARE))*EXP2)
+  END FUNCTION FOEEWM_V
+
+ATTRIBUTES(DEVICE)  PURE ELEMENTAL FUNCTION FOEEWMCU_V(PTARE,EXP1,EXP2)
+    REAL(KIND=JPRB) :: FOEEWMCU_V
+    REAL(KIND=JPRB), VALUE :: PTARE, EXP1, EXP2
+
+    FOEEWMCU_V = R2ES*(FOEALFCU(PTARE)*EXP1+(1.0_JPRB-FOEALFCU(PTARE))*EXP2)
+  END FUNCTION FOEEWMCU_V
+
+END MODULE FCTTRE_CUF_MOD
diff --git a/src/common/module/mycpu.c b/src/common/module/mycpu.c
index 1aed0e4d..8c6e8506 100644
--- a/src/common/module/mycpu.c
+++ b/src/common/module/mycpu.c
@@ -8,9 +8,11 @@
  * nor does it submit to any jurisdiction.
  */
 
-//#define _GNU_SOURCE
-
+#if defined(__APPLE__)
+static int sched_getcpu() { return 0; }
+#else
 #include <sched.h>
+#endif
 
 /*
  * Find the core the thread belongs to
@@ -20,7 +22,10 @@ int mycpu_ ()
 {
   /* int sched_getcpu(void); */
   int cpu;
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wimplicit-function-declaration"
   cpu = sched_getcpu();
+#pragma clang diagnostic pop
   return cpu;
 }
 int mycpu() { return mycpu_(); }
diff --git a/src/common/module/timer_mod.F90 b/src/common/module/timer_mod.F90
index 642c7e60..f0045c35 100644
--- a/src/common/module/timer_mod.F90
+++ b/src/common/module/timer_mod.F90
@@ -129,7 +129,7 @@ SUBROUTINE PERFORMANCE_TIMER_PRINT_PERFORMANCE(SELF, NPROMA, NGPBLKS, NGPTOT)
 1000  format(1x,5a10,1x,a4,' : ',2a10)
 1001  format(1x,5i10,1x,i4,' : ',2i10,:,' @ rank#',i0,':core#',i0)
 1002  format(1x,5i10,1x,i4,' : ',2i10,  ' : TOTAL @ rank#', i0)
-1003  format(1x,i6,' x',i2,4i10,1x,i4,' : ',2i10,' : TOTAL')
+1003  format(1x,i4,' x',i4,4i10,1x,i4,' : ',2i10,' : TOTAL')
       DO TID=0, SELF%NUMOMP-1
          TLOC = SELF%TTHREAD(TID)
          ZFRAC = REAL(SELF%IGPC(TID),JPRB)/REAL(NGPTOT,JPRB)
diff --git a/src/common/module/yoethf.F90 b/src/common/module/yoethf.F90
index 0561e5ec..c3b56253 100644
--- a/src/common/module/yoethf.F90
+++ b/src/common/module/yoethf.F90
@@ -72,6 +72,10 @@ MODULE YOETHF
 !$acc   r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, &
 !$acc   rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2)
 
+!$omp declare target(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies)
+!$omp declare target(  r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu)
+!$omp declare target(  rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2)
+
 !       ----------------------------------------------------------------
 
 CONTAINS
@@ -96,6 +100,10 @@ SUBROUTINE YOETHF_LOAD_PARAMETERS()
     CALL LOAD_SCALAR('RTWAT_RTICECU_R', RTWAT_RTICECU_R)
     CALL LOAD_SCALAR('RKOOP1', RKOOP1)
     CALL LOAD_SCALAR('RKOOP2', RKOOP2)
+
+!$acc update device(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies, &
+!$acc   r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, &
+!$acc   rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2)
   END SUBROUTINE YOETHF_LOAD_PARAMETERS
 
 END MODULE YOETHF
diff --git a/src/common/module/yoethf.cuf.F90 b/src/common/module/yoethf.cuf.F90
new file mode 100644
index 00000000..eac79ad3
--- /dev/null
+++ b/src/common/module/yoethf.cuf.F90
@@ -0,0 +1,88 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+MODULE YOETHF_CUF
+
+USE PARKIND1, ONLY : JPIM, JPRB
+
+USE YOETHF, ONLY: R2ES, R3LES, R3IES, R4LES, R4IES, R5LES, R5IES, R5ALVCP, &
+ & R5ALSCP, RALVDCP, RALSDCP, RALFDCP, RTWAT, RTICE,  &
+ & RTICECU, RTWAT_RTICE_R, RTWAT_RTICECU_R, RKOOP1, RKOOP2, &
+ & RVTMP2,RHOH2O,RTBER,RTBERCU
+
+IMPLICIT NONE
+
+SAVE
+
+!     ------------------------------------------------------------------
+!*     *YOETHF* DERIVED CONSTANTS SPECIFIC TO ECMWF THERMODYNAMICS
+!     ------------------------------------------------------------------
+
+REAL(KIND=JPRB) , CONSTANT :: R2ES_D
+REAL(KIND=JPRB) , CONSTANT :: R3LES_D
+REAL(KIND=JPRB) , CONSTANT :: R3IES_D
+REAL(KIND=JPRB) , CONSTANT :: R4LES_D
+REAL(KIND=JPRB) , CONSTANT :: R4IES_D
+REAL(KIND=JPRB) , CONSTANT :: R5LES_D
+REAL(KIND=JPRB) , CONSTANT :: R5IES_D
+REAL(KIND=JPRB) , CONSTANT :: RVTMP2_D
+REAL(KIND=JPRB) , CONSTANT :: RHOH2O_D
+REAL(KIND=JPRB) , CONSTANT :: R5ALVCP_D
+REAL(KIND=JPRB) , CONSTANT :: R5ALSCP_D
+REAL(KIND=JPRB) , CONSTANT :: RALVDCP_D
+REAL(KIND=JPRB) , CONSTANT :: RALSDCP_D
+REAL(KIND=JPRB) , CONSTANT :: RALFDCP_D
+REAL(KIND=JPRB) , CONSTANT :: RTWAT_D
+REAL(KIND=JPRB) , CONSTANT :: RTBER_D
+REAL(KIND=JPRB) , CONSTANT :: RTBERCU_D
+REAL(KIND=JPRB) , CONSTANT :: RTICE_D
+REAL(KIND=JPRB) , CONSTANT :: RTICECU_D
+REAL(KIND=JPRB) , CONSTANT :: RTWAT_RTICE_R_D
+REAL(KIND=JPRB) , CONSTANT :: RTWAT_RTICECU_R_D
+REAL(KIND=JPRB) , CONSTANT :: RKOOP1_D
+REAL(KIND=JPRB) , CONSTANT :: RKOOP2_D
+
+!     J.-J. MORCRETTE                   91/07/14  ADAPTED TO I.F.S.
+
+!      NAME     TYPE      PURPOSE
+!      ----     ----      -------
+
+!     *R__ES*   REAL      *CONSTANTS USED FOR COMPUTATION OF SATURATION
+!                         MIXING RATIO OVER LIQUID WATER(*R_LES*) OR
+!                         ICE(*R_IES*).
+!     *RVTMP2*  REAL      *RVTMP2=RCPV/RCPD-1.
+!     *RHOH2O*  REAL      *DENSITY OF LIQUID WATER.   (RATM/100.)
+!     *R5ALVCP* REAL      *R5LES*RLVTT/RCPD
+!     *R5ALSCP* REAL      *R5IES*RLSTT/RCPD
+!     *RALVDCP* REAL      *RLVTT/RCPD
+!     *RALSDCP* REAL      *RLSTT/RCPD
+!     *RALFDCP* REAL      *RLMLT/RCPD
+!     *RTWAT*   REAL      *RTWAT=RTT
+!     *RTBER*   REAL      *RTBER=RTT-0.05
+!     *RTBERCU  REAL      *RTBERCU=RTT-5.0
+!     *RTICE*   REAL      *RTICE=RTT-0.1
+!     *RTICECU* REAL      *RTICECU=RTT-23.0
+!     *RKOOP?   REAL      *CONSTANTS TO DESCRIBE KOOP FORM FOR NUCLEATION
+!     *RTWAT_RTICE_R*   REAL      *RTWAT_RTICE_R=1./(RTWAT-RTICE)
+!     *RTWAT_RTICECU_R* REAL      *RTWAT_RTICECU_R=1./(RTWAT-RTICECU)
+
+
+!       ----------------------------------------------------------------
+
+CONTAINS
+
+ATTRIBUTES(HOST)  SUBROUTINE YOETHF_UPDATE_DEVICE()
+
+  R2ES_D=R2ES; R3LES_D=R3LES; R3IES_D=R3IES; R4LES_D=R4LES; R4IES_D=R4IES; R5LES_D=R5LES; R5IES_D=R5IES;
+  RVTMP2_D=RVTMP2; RHOH2O_D=RHOH2O; R5ALVCP_D=R5ALVCP; R5ALSCP_D=R5ALSCP; RALVDCP_D=RALVDCP; RALSDCP_D=RALSDCP;
+  RALFDCP_D=RALFDCP; RTWAT_D=RTWAT; RTBER_D=RTBER; RTBERCU_D=RTBERCU; RTICE_D=RTICE; RTICECU_D=RTICECU;
+  RTWAT_RTICE_R_D=RTWAT_RTICE_R; RTWAT_RTICECU_R_D=RTWAT_RTICECU_R; RKOOP1_D=RKOOP1; RKOOP2_D=RKOOP2
+END SUBROUTINE YOETHF_UPDATE_DEVICE
+
+END MODULE YOETHF_CUF
diff --git a/src/common/module/yomcst.F90 b/src/common/module/yomcst.F90
index 6de0fc5e..e922a9bd 100644
--- a/src/common/module/yomcst.F90
+++ b/src/common/module/yomcst.F90
@@ -36,7 +36,7 @@ MODULE YOMCST
 ! A1.1 Astronomical constants
 ! * RDAY         : duration of the solar day
 ! * RDAYI        : invariant time unit of 86400s
-! * RHOUR        : duration of the solar hour 
+! * RHOUR        : duration of the solar hour
 ! * REA          : astronomical unit (mean distance Earth-sun)
 ! * REPSM        : polar axis tilting angle
 ! * RSIYEA       : duration of the sideral year
@@ -159,6 +159,8 @@ MODULE YOMCST
 REAL(KIND=JPRB) :: RSNAN
 
 !$acc declare copyin(rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, rv)
+!$omp declare target(rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, rv)
+
 
 !    ------------------------------------------------------------------
 
@@ -174,6 +176,7 @@ SUBROUTINE YOMCST_LOAD_PARAMETERS()
     CALL LOAD_SCALAR('RLMLT', RLMLT)
     CALL LOAD_SCALAR('RTT', RTT)
     CALL LOAD_SCALAR('RV', RV)
+!$acc update device(rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, rv)
   END SUBROUTINE YOMCST_LOAD_PARAMETERS
 
 END MODULE YOMCST
diff --git a/src/common/module/yomcst.cuf.F90 b/src/common/module/yomcst.cuf.F90
new file mode 100644
index 00000000..3a452c10
--- /dev/null
+++ b/src/common/module/yomcst.cuf.F90
@@ -0,0 +1,38 @@
+! (C) Copyright 1988- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+
+MODULE YOMCST_CUF
+
+USE PARKIND1,    ONLY : JPRB
+USE YOMCST, ONLY: RG, RD, RCPD, RETV, RLVTT, RLSTT, RLMLT, RTT, RV
+
+IMPLICIT NONE
+
+REAL(KIND=JPRB), CONSTANT :: RG_D
+REAL(KIND=JPRB), CONSTANT :: RD_D
+REAL(KIND=JPRB), CONSTANT :: RV_D
+REAL(KIND=JPRB), CONSTANT :: RCPD_D
+REAL(KIND=JPRB), CONSTANT :: RETV_D
+REAL(KIND=JPRB), CONSTANT :: RTT_D
+REAL(KIND=JPRB), CONSTANT :: RLVTT_D
+REAL(KIND=JPRB), CONSTANT :: RLSTT_D
+REAL(KIND=JPRB), CONSTANT :: RLMLT_D
+
+
+!    ------------------------------------------------------------------
+
+CONTAINS
+
+ATTRIBUTES(HOST)  SUBROUTINE YOMCST_UPDATE_DEVICE()
+  RG_D=RG; RD_D=RD; RCPD_D=RCPD; RETV_D=RETV; RLVTT_D=RLVTT;
+  RLSTT_D=RLSTT; RLMLT_D=RLMLT; RTT_D=RTT; RV_D=RV
+END SUBROUTINE YOMCST_UPDATE_DEVICE
+
+
+END MODULE YOMCST_CUF
diff --git a/src/prototype1/CMakeLists.txt b/src/prototype1/CMakeLists.txt
index 1fd11c5c..678cae99 100644
--- a/src/prototype1/CMakeLists.txt
+++ b/src/prototype1/CMakeLists.txt
@@ -58,22 +58,34 @@ if( HAVE_CLOUDSC_PROTOTYPE1 )
             ${dwarf_P_cloudMicrophysics_IFSScheme_srcs}
             ${dwarf_P_cloudMicrophysics_IFSScheme_support_srcs}
         DEFINITIONS ${CLOUDSC_DEFINITIONS}
+    PUBLIC_LIBS
+        $<${HAVE_OMP}:OpenMP::OpenMP_Fortran>
+        $<${HAVE_SERIALBOX}:Serialbox::Serialbox_Fortran>
     )
 
-    if( TARGET Serialbox::Serialbox_Fortran )
-        target_link_libraries( dwarf-P-cloudMicrophysics-IFSScheme-libs PRIVATE Serialbox::Serialbox_Fortran )
-    endif()
-
-    if( HAVE_OMP AND TARGET OpenMP::OpenMP_Fortran )
-        target_link_libraries( dwarf-P-cloudMicrophysics-IFSScheme-libs PRIVATE OpenMP::OpenMP_Fortran )
-    endif()
-
     ecbuild_add_executable(TARGET dwarf-P-cloudMicrophysics-IFSScheme
         SOURCES cloudsc_dwarf.F90
+        LIBS dwarf-P-cloudMicrophysics-IFSScheme-libs
     )
-    target_link_libraries( dwarf-P-cloudMicrophysics-IFSScheme dwarf-P-cloudMicrophysics-IFSScheme-libs )
 
     # Create symlink for the input data
     execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
         ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/cloudsc.bin ${CMAKE_CURRENT_BINARY_DIR}/../../../cloudsc.bin )
+
+    ecbuild_add_test(
+        TARGET dwarf-P-cloudMicrophysics-IFSScheme-serial
+        COMMAND bin/dwarf-P-cloudMicrophysics-IFSScheme
+        ARGS 1 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 1
+    )
+
+    ecbuild_add_test(
+        TARGET dwarf-P-cloudMicrophysics-IFSScheme-omp
+        COMMAND bin/dwarf-P-cloudMicrophysics-IFSScheme
+        ARGS 4 100 16
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
+        OMP 4
+        CONDITION HAVE_OMP
+    )
 endif()
diff --git a/src/prototype1/cloudsc/cloudsc_driver.F90 b/src/prototype1/cloudsc/cloudsc_driver.F90
index 8dc416d6..c1777cf1 100644
--- a/src/prototype1/cloudsc/cloudsc_driver.F90
+++ b/src/prototype1/cloudsc/cloudsc_driver.F90
@@ -134,7 +134,7 @@ SUBROUTINE CLOUDSC_DRIVER
 CALL GET_ENVIRONMENT_VARIABLE('CLOUDSC_WRITE_REFERENCE', write_reference)
 
 open(iu,file='cloudsc.bin',status='old',&
-     & access='stream', form='unformatted')
+     & access='stream', form='unformatted', convert='BIG_ENDIAN')
 
 read(iu) KLON,KLEV,KFLDX
 write(0,*) 'KLON,KLEV,KFLDX,NCLV=',KLON,KLEV,KFLDX,NCLV
diff --git a/src/prototype1/support/mycpu.c b/src/prototype1/support/mycpu.c
index 1aed0e4d..8c6e8506 100644
--- a/src/prototype1/support/mycpu.c
+++ b/src/prototype1/support/mycpu.c
@@ -8,9 +8,11 @@
  * nor does it submit to any jurisdiction.
  */
 
-//#define _GNU_SOURCE
-
+#if defined(__APPLE__)
+static int sched_getcpu() { return 0; }
+#else
 #include <sched.h>
+#endif
 
 /*
  * Find the core the thread belongs to
@@ -20,7 +22,10 @@ int mycpu_ ()
 {
   /* int sched_getcpu(void); */
   int cpu;
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wimplicit-function-declaration"
   cpu = sched_getcpu();
+#pragma clang diagnostic pop
   return cpu;
 }
 int mycpu() { return mycpu_(); }