Skip to content

Commit

Permalink
Merge pull request #10 from Clinical-Genomics-Lund/add-get-sample-name
Browse files Browse the repository at this point in the history
Add get_sample_name to Missing
  • Loading branch information
ryanjameskennedy authored Oct 15, 2024
2 parents ae4ad3d + 34ffbce commit f37ebed
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
2 changes: 1 addition & 1 deletion jasentool/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def missing(self, options):
log_fpath = os.path.splitext(options.missing_log)[0] + ".log"
empty_fpath = os.path.splitext(options.output_file)[0] + "_empty.csv"
meta_dict = db.find(options.db_collection, {"metadata.QC": "OK"}, db.get_meta_fields())
analysis_dir_fnames = missing.parse_dir(options.analysis_dir)
analysis_dir_fnames = missing.parse_dir(options.analysis_dir, options.alter_sample_id)
csv_dict, missing_samples_txt = missing.find_missing(meta_dict, analysis_dir_fnames, options.restore_dir)
empty_files_dict, csv_dict = missing.remove_empty_files(csv_dict)
utils.write_out_csv(csv_dict, options.assay, options.platform, options.output_file, options.alter_sample_id)
Expand Down
32 changes: 29 additions & 3 deletions jasentool/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import re
import json
# import pymongo

class Missing:
Expand Down Expand Up @@ -92,7 +93,7 @@ def parse_sample_sheet(sample_sheet, restore_dir, id_seqrun_dict):
#print(f"WARN: The following sample({sample_id}) seqrun ({seqrun}) doesn't match cgviz ({id_seqrun_dict[sample_id]})")
continue
except KeyError:
#print(f"WARN: The following sample({sample_id}) isn't OK'd in cgviz")
#print(f"WARN: The following sample({sample_id}) isn't OK'd in cgviz")
continue
species = line.split(",")[-1].split("_")[2]
try:
Expand Down Expand Up @@ -234,9 +235,34 @@ def check_format(fpath):
return fpath

@staticmethod
def parse_dir(dir_fpath):
def get_sample_name(json_fpath):
"""Reads a JSON file and retrieves the 'sample_name' from the JSON structure."""
try:
with open(json_fpath, 'r') as file:
result_json = json.load(file)

sample_name = result_json["sample_name"]
return sample_name
except KeyError as e:
print(f"KeyError: {e} {json_fpath}")
return None
except json.JSONDecodeError:
print(f"JSONError: {json_fpath}")
return None

@staticmethod
def parse_dir(dir_fpath, alter_sample_id):
"""Return filenames in directory"""
return [filename.split("_")[0] for filename in os.listdir(dir_fpath)]
dir_fpaths = []
for filename in os.listdir(dir_fpath):
if filename.endswith(".json"):
if alter_sample_id:
sample_name = Missing.get_sample_name(os.path.join(dir_fpath, filename))
if sample_name:
dir_fpaths.append(sample_name)
else:
dir_fpaths.append(filename.split("_")[0])
return dir_fpaths

@staticmethod
def filter_csv_dict(csv_dict, missing_samples):
Expand Down

0 comments on commit f37ebed

Please sign in to comment.