Skip to content

Commit

Permalink
refactor genotyping scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
riasc committed Jan 16, 2024
1 parent d78a57f commit 3e29219
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 0 deletions.
28 changes: 28 additions & 0 deletions workflow/scripts/genotyping/combine_all_alleles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import sys

def main():
infiles = sys.argv[1].split(' ')
alleles = {}
for mhc in infiles:
fh_in = open(mhc, 'r')
# skip header
for line in fh_in:
cols = line.rstrip().split('\t')
source = cols[0]
mhc = cols[1]

if mhc not in alleles:
alleles[mhc] = source
else:
alleles[mhc] += ',' + source
fh_in.close()

outfile = sys.argv[2]
fh_out = open(outfile, 'w')
for allele in alleles:
fh_out.write(f'{alleles[allele]}\t{allele}\n')
fh_out.close()


main()
32 changes: 32 additions & 0 deletions workflow/scripts/genotyping/merge_predicted_mhcI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import sys
import re
from pathlib import Path

def main():
files = sys.argv[1]
alleles = {}
for file in files.rstrip().split(' '):
filestem = Path(file).stem
se = re.search(r'^(.+)_(RNA|DNA)_(SE|PE)', filestem)
group = se.group(1)

fh = open(file, 'r')
for line in fh:
al = line.rstrip()
if al not in alleles:
alleles[al] = []

alleles[al].append(group)


out = open(sys.argv[2], 'w')
for al in dict(sorted(alleles.items())):
for i,v in enumerate(alleles[al]):
if i == 0:
out.write(v)
else:
out.write(f',{v}')
out.write(f'\t{al}\n')

main()

0 comments on commit 3e29219

Please sign in to comment.