From c9b66ed15302624437f07292aa4a9d4f41ae968c Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Wed, 28 Feb 2024 00:59:08 +0000
Subject: [PATCH 1/7] Services: Jhub updates.

---
 services/jupyter-hub/Dockerfile_singleuser       | 9 ++++++++-
 services/jupyter-hub/Dockerfile_singleuser_gpu   | 2 +-
 services/jupyter-hub/config/jupyterhub_config.py | 3 ++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/services/jupyter-hub/Dockerfile_singleuser b/services/jupyter-hub/Dockerfile_singleuser
index 968c60d7..69ebacdb 100644
--- a/services/jupyter-hub/Dockerfile_singleuser
+++ b/services/jupyter-hub/Dockerfile_singleuser
@@ -89,6 +89,13 @@ RUN apt-get update && apt-get upgrade -y && \
     libxcursor1 libxcomposite1 libasound2 libxi6 libxtst6 \
     r-base
 
+# python 3.11
+RUN apt-get install python3.11-full python3.11-dev
+
+# be careful, this conflicts
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
+
+
 RUN apt-get update && apt-get upgrade -y 
 
 # Microsoft repos
@@ -132,7 +139,7 @@ RUN pip3 install --no-cache-dir --upgrade pip
 RUN pip3 install --no-cache-dir setuptools wheel virtualenv
 
 # jupyterhub stuff
-RUN pip3 install --no-cache-dir ipywidgets jupyter jupyterhub jupyterlab jupyterlab-git dask-labextension importlib_metadata
+RUN pip3 install --no-cache-dir ipywidgets jupyter jupyterhub jupyterlab jupyterlab-git importlib_metadata bitstream
 RUN pip3 install --no-cache-dir jupyterlab_widgets jupyter_contrib_core jupyter_contrib_nbextensions jupyter-server-proxy fastbook
 RUN pip3 install --no-cache-dir docker docker-compose dockerspawner jupyterhub-firstuseauthenticator jupyterhub-systemdspawner jupyterhub-jwtauthenticator jupyterhub-client jupyterhub-kerberosauthenticator 
 RUN pip3 install --no-cache-dir jupyterhub-nanowireauthenticator jupyterhub-ldapauthenticator jupyterhub-kubespawner jupyterhub-nativeauthenticator
diff --git a/services/jupyter-hub/Dockerfile_singleuser_gpu b/services/jupyter-hub/Dockerfile_singleuser_gpu
index 086cad5c..b1bf2b5e 100644
--- a/services/jupyter-hub/Dockerfile_singleuser_gpu
+++ b/services/jupyter-hub/Dockerfile_singleuser_gpu
@@ -154,7 +154,7 @@ RUN pip3 install --no-cache-dir --upgrade pip
 RUN pip3 install --no-cache-dir  setuptools wheel virtualenv
 
 # jupyterhub stuff
-RUN pip3 install --no-cache-dir ipywidgets jupyter jupyterhub jupyterlab jupyterlab-git dask-labextension importlib_metadata
+RUN pip3 install --no-cache-dir ipywidgets jupyter jupyterhub jupyterlab jupyterlab-git importlib_metadata bitstream
 RUN pip3 install --no-cache-dir jupyterlab_widgets jupyter_contrib_core jupyter_contrib_nbextensions jupyter-server-proxy fastbook
 RUN pip3 install --no-cache-dir docker docker-compose dockerspawner jupyterhub-firstuseauthenticator jupyterhub-systemdspawner jupyterhub-jwtauthenticator jupyterhub-client jupyterhub-kerberosauthenticator 
 RUN pip3 install --no-cache-dir jupyterhub-nanowireauthenticator jupyterhub-ldapauthenticator jupyterhub-kubespawner jupyterhub-nativeauthenticator
diff --git a/services/jupyter-hub/config/jupyterhub_config.py b/services/jupyter-hub/config/jupyterhub_config.py
index 69b223bc..532c913a 100644
--- a/services/jupyter-hub/config/jupyterhub_config.py
+++ b/services/jupyter-hub/config/jupyterhub_config.py
@@ -148,7 +148,8 @@ def start(self):
         if self.user.name not in whitelist:
             whitelist.add(self.user.name)
             with open(userlist_path , "a") as f:
-                f.write(self.user.name + "\n")
+                f.write("\n")
+                f.write(self.user.name)
 
         if self.user.name in list(team_map.keys()):
             for team in team_map[self.user.name]:

From 2719caf263ddb711457deb253cdbf3cbc2d8ee0f Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 7 Mar 2024 14:11:31 +0000
Subject: [PATCH 2/7] CU:8693yammy | NiFi scripts: Added Cerner blob
 decompression util.

---
 nifi/user-scripts/utils/cerner_blob.py | 124 +++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 nifi/user-scripts/utils/cerner_blob.py

diff --git a/nifi/user-scripts/utils/cerner_blob.py b/nifi/user-scripts/utils/cerner_blob.py
new file mode 100644
index 00000000..d2586790
--- /dev/null
+++ b/nifi/user-scripts/utils/cerner_blob.py
@@ -0,0 +1,124 @@
+from typing import List
+
+
+class LzwItem:
+    def __init__(self, _prefix: int = 0, _suffix: int = 0) -> None:
+        self.prefix = _prefix
+        self.suffix = _suffix
+
+
+class DecompressLzwCernerBlob:
+    def __init__(self) -> None:
+        self.MAX_CODES: int = 8192
+        self.tmp_decompression_buffer: List[int] = [0] * self.MAX_CODES
+        self.lzw_lookup_table: List[LzwItem] = [LzwItem()] * self.MAX_CODES
+        self.tmp_buffer_index: int = 0
+        self.current_byte_buffer_index: int = 0
+
+        # starts after 256, since 256 is the ASCII alphabet
+        self.code_count: int = 257
+        self.output_stream = bytearray()
+
+    def save_to_lookup_table(self, compressed_code: int):
+        self.tmp_buffer_index = -1
+        while compressed_code >= 258:
+            self.tmp_decompression_buffer[self.tmp_buffer_index] = \
+                self.lzw_lookup_table[compressed_code].suffix
+            compressed_code = self.lzw_lookup_table[compressed_code].prefix
+
+        self.tmp_buffer_index += 1
+        self.tmp_decompression_buffer[self.tmp_buffer_index] = compressed_code
+
+        for i in reversed(list(range(self.tmp_buffer_index + 1))):
+            self.output_stream.append(self.tmp_decompression_buffer[i])
+
+    def decompress(self, input_stream=bytearray()):
+
+        byte_buffer_index: int = 0
+
+        # used for bit shifts
+        shift: int = 1
+        current_shift: int = 1
+
+        previous_code: int = 0
+        middle_code: int = 0
+        lookup_index: int = 0
+
+        skip_flag: bool = False
+
+        first_code = input_stream[byte_buffer_index]
+
+        while True:
+            if current_shift >= 9:
+
+                current_shift -= 8
+
+                if first_code != 0:
+                    byte_buffer_index += 1
+                    middle_code = input_stream[byte_buffer_index]
+
+                    first_code = (first_code << current_shift +
+                                  8) | (middle_code << current_shift)
+
+                    byte_buffer_index += 1
+                    middle_code = input_stream[byte_buffer_index]
+
+                    tmp_code = middle_code >> (8 - current_shift)
+                    lookup_index = first_code | tmp_code
+
+                    skip_flag = True
+                else:
+                    byte_buffer_index += 1
+                    first_code = input_stream[byte_buffer_index]
+                    byte_buffer_index += 1
+                    middle_code = input_stream[byte_buffer_index]
+            else:
+                byte_buffer_index += 1
+                middle_code = input_stream[byte_buffer_index]
+
+            if not skip_flag:
+                lookup_index = (first_code << current_shift) | (
+                    middle_code >> 8 - current_shift)
+
+                if lookup_index == 256:
+                    shift = 1
+                    current_shift += 1
+                    first_code = input_stream[byte_buffer_index]
+
+                    self.tmp_decompression_buffer = [0] * self.MAX_CODES
+                    self.tmp_buffer_index = 0
+
+                    self.lzw_lookup_table = [LzwItem()] * self.MAX_CODES
+                    self.code_count = 257
+                    continue
+
+                elif lookup_index == 257:  # EOF marker
+                    return self.output_stream
+
+            skip_flag = False
+
+            # skipit part
+            if previous_code == 0:
+                self.tmp_decompression_buffer[0] = lookup_index
+            if lookup_index < self.code_count:
+                self.save_to_lookup_table(lookup_index)
+                if self.code_count < self.MAX_CODES:
+                    self.lzw_lookup_table[self.code_count] = LzwItem(
+                        previous_code,
+                        self.tmp_decompression_buffer[self.tmp_buffer_index])
+                    self.code_count += 1
+            else:
+                self.lzw_lookup_table[self.code_count] = LzwItem(
+                    previous_code,
+                    self.tmp_decompression_buffer[self.tmp_buffer_index])
+                self.code_count += 1
+                self.save_to_lookup_table(lookup_index)
+            # end of skipit
+
+            first_code = (middle_code & (0xff >> current_shift))
+            current_shift += shift
+
+            if self.code_count in [511, 1023, 2047, 4095]:
+                shift += 1
+                current_shift += 1
+            previous_code = lookup_index

From 5e804d3784cb53968a025582c11f7d5b7f774666 Mon Sep 17 00:00:00 2001
From: Git bot <bot@noreply.github.com>
Date: Thu, 7 Mar 2024 14:11:54 +0000
Subject: [PATCH 3/7] Auto updated submodule references

---
 services/jupyter-hub/notebooks/working_with_cogstack | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/jupyter-hub/notebooks/working_with_cogstack b/services/jupyter-hub/notebooks/working_with_cogstack
index 00a11154..404a2468 160000
--- a/services/jupyter-hub/notebooks/working_with_cogstack
+++ b/services/jupyter-hub/notebooks/working_with_cogstack
@@ -1 +1 @@
-Subproject commit 00a11154ca42134948cc6558c8fc638983198797
+Subproject commit 404a24687f9a80b534ccd67fb8c0dcbc50997d49

From 703b0e928fcc98987ce929c7fbc57523ab3e7a29 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 7 Mar 2024 14:22:38 +0000
Subject: [PATCH 4/7] CU: 8693yammy | NiFi scripts: added parser script for
 cerner blobs.

---
 nifi/user-scripts/parse-cerner-blob.py | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 nifi/user-scripts/parse-cerner-blob.py

diff --git a/nifi/user-scripts/parse-cerner-blob.py b/nifi/user-scripts/parse-cerner-blob.py
new file mode 100644
index 00000000..a7edceaa
--- /dev/null
+++ b/nifi/user-scripts/parse-cerner-blob.py
@@ -0,0 +1,27 @@
+import sys
+from .utils.cerner_blob import DecompressLzwCernerBlob
+
+# This needs to be investigated, records might have different charsets,
+#   currently only tested with "iso-8859-1"
+#   other frequently used encodings: "utf-16le", "utf-16be"
+# In some cases you will need to figure this out yourself, depending on
+#   the data source
+INPUT_CHARSET = "iso-8859-1"
+
+# expected (optional)
+OUTPUT_CHARSET = "windows-1252"
+
+input_cerner_blob = bytearray(sys.stdin.read(), encoding=INPUT_CHARSET)
+
+for arg in sys.argv:
+    _arg = arg.split("=", 1)
+
+    if _arg[0] == "input_charset":
+        INPUT_CHARSET = str(_arg[1]).lower()
+    elif _arg[0] == "output_charset":
+        OUTPUT_CHARSET = str(_arg[1]).lower()
+
+decompress_blob = DecompressLzwCernerBlob()
+decompress_blob.decompress(input_cerner_blob)
+
+sys.stdout.write(decompress_blob.output_stream.decode(encoding=OUTPUT_CHARSET))

From 33eff5fb04e935a1e7be789687898c416403c56a Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 7 Mar 2024 17:59:04 +0000
Subject: [PATCH 5/7] NiFi scripts: fixed import + overflow issue.

---
 nifi/user-scripts/parse-cerner-blob.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nifi/user-scripts/parse-cerner-blob.py b/nifi/user-scripts/parse-cerner-blob.py
index a7edceaa..5e3ba37c 100644
--- a/nifi/user-scripts/parse-cerner-blob.py
+++ b/nifi/user-scripts/parse-cerner-blob.py
@@ -1,5 +1,5 @@
 import sys
-from .utils.cerner_blob import DecompressLzwCernerBlob
+from utils.cerner_blob import DecompressLzwCernerBlob
 
 # This needs to be investigated, records might have different charsets,
 #   currently only tested with "iso-8859-1"
@@ -11,7 +11,7 @@
 # expected (optional)
 OUTPUT_CHARSET = "windows-1252"
 
-input_cerner_blob = bytearray(sys.stdin.read(), encoding=INPUT_CHARSET)
+input_cerner_blob = str(sys.stdin.buffer.read(), INPUT_CHARSET).encode(INPUT_CHARSET)
 
 for arg in sys.argv:
     _arg = arg.split("=", 1)

From cbe733a7302877400bdb36c86bdfaa47b9dda65f Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Thu, 7 Mar 2024 23:00:51 +0000
Subject: [PATCH 6/7] NiFi Scripts: fixed cerner decompression problem.

---
 nifi/user-scripts/utils/cerner_blob.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nifi/user-scripts/utils/cerner_blob.py b/nifi/user-scripts/utils/cerner_blob.py
index d2586790..04f5f366 100644
--- a/nifi/user-scripts/utils/cerner_blob.py
+++ b/nifi/user-scripts/utils/cerner_blob.py
@@ -22,6 +22,7 @@ def __init__(self) -> None:
     def save_to_lookup_table(self, compressed_code: int):
         self.tmp_buffer_index = -1
         while compressed_code >= 258:
+            self.tmp_buffer_index += 1
             self.tmp_decompression_buffer[self.tmp_buffer_index] = \
                 self.lzw_lookup_table[compressed_code].suffix
             compressed_code = self.lzw_lookup_table[compressed_code].prefix

From 12bea4451882c71c72ffd3ec9fcbaf40131a1e18 Mon Sep 17 00:00:00 2001
From: vladd-bit <vlad.a.dinu@gmail.com>
Date: Fri, 8 Mar 2024 11:16:40 +0000
Subject: [PATCH 7/7] NiFi scripts: added multiple output(s) for  cerner
 decompression.

---
 nifi/user-scripts/parse-cerner-blob.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/nifi/user-scripts/parse-cerner-blob.py b/nifi/user-scripts/parse-cerner-blob.py
index 5e3ba37c..ac692aa7 100644
--- a/nifi/user-scripts/parse-cerner-blob.py
+++ b/nifi/user-scripts/parse-cerner-blob.py
@@ -11,6 +11,11 @@
 # expected (optional)
 OUTPUT_CHARSET = "windows-1252"
 
+# possible values:
+#   - binary: output binary code
+#   - string: output string after decompression 
+OUTPUT_MODE = "binary"
+
 input_cerner_blob = str(sys.stdin.buffer.read(), INPUT_CHARSET).encode(INPUT_CHARSET)
 
 for arg in sys.argv:
@@ -20,8 +25,13 @@
         INPUT_CHARSET = str(_arg[1]).lower()
     elif _arg[0] == "output_charset":
         OUTPUT_CHARSET = str(_arg[1]).lower()
+    elif _arg[0] == "output_mode":
+        OUTPUT_MODE = str(_arg[1]).lower()
 
 decompress_blob = DecompressLzwCernerBlob()
 decompress_blob.decompress(input_cerner_blob)
 
-sys.stdout.write(decompress_blob.output_stream.decode(encoding=OUTPUT_CHARSET))
+if OUTPUT_MODE == "binary":
+    sys.stdout.buffer.write(bytes(decompress_blob.output_stream))
+else:
+    sys.stdout.write(decompress_blob.output_stream.decode(OUTPUT_CHARSET))