Change id format for fix and missing

SMD-Bioinformatics-Lund · Jul 1, 2024 · e51788a · e51788a
1 parent 00eee84
commit e51788a
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 3 deletions.
diff --git a/jasentool/fix.py b/jasentool/fix.py
@@ -14,7 +14,7 @@ def fix_csv(input_file, output_fpath):
         with open(input_file, 'r', encoding="utf-8") as csvfile:
             samples = pd.read_csv(csvfile)
             samples.insert(2, 'sample_name', samples['id'])
-            samples['id'] = samples['id'].str.lower() + "_" + samples['sequencing_run'].str.lower()
+            samples['id'] = samples['clarity_sample_id'].str.lower() + "_" + samples['sequencing_run'].str.lower()
             samples['assay'] = samples['species']
             for assay, df_assay in samples.groupby('assay'):
                 out_fpath = f'{os.path.splitext(output_fpath)[0]}_{assay}.csv'

diff --git a/jasentool/utils.py b/jasentool/utils.py
@@ -15,12 +15,14 @@ class Utils:
     def write_out_csv(csv_dict, assay, platform, out_fpath):
         """Write out file as csv"""
         with open(out_fpath, 'w+', encoding="utf-8") as csvfile:
-            fieldnames = ["id", "clarity_sample_id", "group", "species", "assay",
+            fieldnames = ["id", "clarity_sample_id", "sample_name", "group", "species", "assay",
                           "platform", "sequencing_run", "read1", "read2"] #header
             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
             writer.writeheader()
             for sample in csv_dict:
-                row_dict = {"id": sample, "clarity_sample_id": csv_dict[sample][0],
+                row_dict = {"id": csv_dict[sample][0].lower() + "_" + csv_dict[sample][3].lower(),
+                            "clarity_sample_id": csv_dict[sample][0],
+                            "sample_name": sample,
                             "group": csv_dict[sample][1], "species": csv_dict[sample][2],
                             "assay": assay, "platform": platform,
                             "sequencing_run": csv_dict[sample][3],