CU: 8693yammy | NiFi scripts: added parser script for cerner blobs.

CogStack · Mar 7, 2024 · 703b0e9 · 703b0e9
1 parent 2719caf
commit 703b0e9
Showing 1 changed file with 27 additions and 0 deletions.
diff --git a/nifi/user-scripts/parse-cerner-blob.py b/nifi/user-scripts/parse-cerner-blob.py
@@ -0,0 +1,27 @@
+import sys
+from .utils.cerner_blob import DecompressLzwCernerBlob
+
+# This needs to be investigated, records might have different charsets,
+#   currently only tested with "iso-8859-1"
+#   other frequently used encodings: "utf-16le", "utf-16be"
+# In some cases you will need to figure this out yourself, depending on
+#   the data source
+INPUT_CHARSET = "iso-8859-1"
+
+# expected (optional)
+OUTPUT_CHARSET = "windows-1252"
+
+input_cerner_blob = bytearray(sys.stdin.read(), encoding=INPUT_CHARSET)
+
+for arg in sys.argv:
+    _arg = arg.split("=", 1)
+
+    if _arg[0] == "input_charset":
+        INPUT_CHARSET = str(_arg[1]).lower()
+    elif _arg[0] == "output_charset":
+        OUTPUT_CHARSET = str(_arg[1]).lower()
+
+decompress_blob = DecompressLzwCernerBlob()
+decompress_blob.decompress(input_cerner_blob)
+
+sys.stdout.write(decompress_blob.output_stream.decode(encoding=OUTPUT_CHARSET))