Merge pull request #10 from Clinical-Genomics-Lund/add-get-sample-name

Add get_sample_name to Missing
SMD-Bioinformatics-Lund · Oct 15, 2024 · f37ebed · f37ebed
2 parents ae4ad3d + 34ffbce
commit f37ebed
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 4 deletions.
diff --git a/jasentool/main.py b/jasentool/main.py
@@ -100,7 +100,7 @@ def missing(self, options):
             log_fpath = os.path.splitext(options.missing_log)[0] + ".log"
             empty_fpath = os.path.splitext(options.output_file)[0] + "_empty.csv"
             meta_dict = db.find(options.db_collection, {"metadata.QC": "OK"}, db.get_meta_fields())
-            analysis_dir_fnames = missing.parse_dir(options.analysis_dir)
+            analysis_dir_fnames = missing.parse_dir(options.analysis_dir, options.alter_sample_id)
             csv_dict, missing_samples_txt = missing.find_missing(meta_dict, analysis_dir_fnames, options.restore_dir)
             empty_files_dict, csv_dict = missing.remove_empty_files(csv_dict)
             utils.write_out_csv(csv_dict, options.assay, options.platform, options.output_file, options.alter_sample_id)

diff --git a/jasentool/missing.py b/jasentool/missing.py
@@ -2,6 +2,7 @@
 
 import os
 import re
+import json
 # import pymongo
 
 class Missing:
@@ -92,7 +93,7 @@ def parse_sample_sheet(sample_sheet, restore_dir, id_seqrun_dict):
                             #print(f"WARN: The following sample({sample_id}) seqrun ({seqrun}) doesn't match cgviz ({id_seqrun_dict[sample_id]})")
                             continue
                     except KeyError:
-                        #print(f"WARN: The following sample({sample_id}) isn't OK'd in cgviz") 
+                        #print(f"WARN: The following sample({sample_id}) isn't OK'd in cgviz")
                         continue
                     species = line.split(",")[-1].split("_")[2]
                     try:
@@ -234,9 +235,34 @@ def check_format(fpath):
         return fpath
 
     @staticmethod
-    def parse_dir(dir_fpath):
+    def get_sample_name(json_fpath):
+        """Reads a JSON file and retrieves the 'sample_name' from the JSON structure."""
+        try:
+            with open(json_fpath, 'r') as file:
+                result_json = json.load(file)
+
+            sample_name = result_json["sample_name"]
+            return sample_name
+        except KeyError as e:
+            print(f"KeyError: {e} {json_fpath}")
+            return None
+        except json.JSONDecodeError:
+            print(f"JSONError: {json_fpath}")
+            return None
+
+    @staticmethod
+    def parse_dir(dir_fpath, alter_sample_id):
         """Return filenames in directory"""
-        return [filename.split("_")[0] for filename in os.listdir(dir_fpath)]
+        dir_fpaths = []
+        for filename in os.listdir(dir_fpath):
+            if filename.endswith(".json"):
+                if alter_sample_id:
+                    sample_name = Missing.get_sample_name(os.path.join(dir_fpath, filename))
+                    if sample_name:
+                        dir_fpaths.append(sample_name)
+                else:
+                    dir_fpaths.append(filename.split("_")[0])
+        return dir_fpaths
 
     @staticmethod
     def filter_csv_dict(csv_dict, missing_samples):