Skip to content

Commit

Permalink
Add multi-sample VCF support with simple build autodetect, manual select
Browse files Browse the repository at this point in the history
  • Loading branch information
alinja committed May 23, 2021
1 parent 8669436 commit cba8e33
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 37 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ CrossMap (`pip3 install CrossMap`), conversion chain file (`crossmap/GRCh38_to_N
the ISOGG spreadsheet in csv format (`'SNP Index - Human.csv'`). Outputs a `haploy_map.txt`
file which is used by haploy_find.py. See haploy.py for details.

## haploy_anno_import.py

This example script is used to import your own annotations that can be attached to the reported tree nodes. As an example the script will
import YFull person IDs, and also some selected FTDNA project files. Open project chart tables in a browser, select page
size so that everything fits in one page, and edit the corresponding lines at the end of the script. Snipsa will load any
files starting with haploy_annodb. An example file is included.

## haplomt_find.py

This small tool reads a raw SNP data file and lists MT chromosome haplogroup information. You must first initialize the mutation
Expand Down
4 changes: 2 additions & 2 deletions haplomt.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ def print_data(do_print=True):
return rep


def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force=''):
def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', vcf_sample='', force_build=0):
rep=''
snpset, meta = snpload.load(fname, ['MT'])
snpset, meta = snpload.load(fname, ['MT'], vcf_sample=vcf_sample, force_build=force_build)
if 'MT' not in snpset:
return "No MT data found\n"

Expand Down
12 changes: 10 additions & 2 deletions haplomt_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@
force=''
filt=''
all=False
force_build=0
vcf_sample=''

parser = argparse.ArgumentParser()
parser.add_argument('-s', '--single', help='Analyse a path for single group')
parser.add_argument('-a', '--all', action='store_true', help='Show listing of all found mutations')
parser.add_argument('-n', '--num', help='Show num best matches')
parser.add_argument('-v', '--vcf-sample', help='VCF sample select (regexp)')
parser.add_argument('-b', '--build', help='Force build36/37&38 input')
parser.add_argument('file', nargs='+')

args = parser.parse_args()
Expand All @@ -27,6 +31,10 @@
n_multi = int(args.num)
if args.all:
all=True
if args.vcf_sample:
vcf_sample = args.vcf_sample
if args.build:
force_build = int(args.build)

if len(args.file) < 1:
print(sys.argv[0]+" <filename>")
Expand All @@ -37,7 +45,7 @@
print("DB loaded!")

print("Loading chr data...")
rep = haplomt.report(args.file[0], n_single, do_all=all, filt=filt, force=force)
rep = haplomt.report(args.file[0], n_single, do_all=all, filt=filt, force=force, vcf_sample=vcf_sample, force_build=force_build)
print(rep)

else:
Expand All @@ -48,7 +56,7 @@
lookfor = args.file[0].split(',')
for fname in args.file[1:]:
#try:
snpset, meta = snpload.load(fname, ['MT'])
snpset, meta = snpload.load(fname, ['MT'], vcf_sample=vcf_sample, force_build=force_build)

if 'MT' not in snpset:
print('%s: no MT data'%fname)
Expand Down
6 changes: 3 additions & 3 deletions haploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,9 @@ def path_str(ut, n):

return rep

def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', min_match_level=0):
def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', min_match_level=0, vcf_sample='', force_build=0):
rep=''
snpset, meta = snpload.load(fname, ['Y'])
snpset, meta = snpload.load(fname, ['Y'], vcf_sample=vcf_sample, force_build=force_build)
if 'Y' not in snpset:
return "No Y data found\n"

Expand Down Expand Up @@ -714,7 +714,7 @@ def show_db2():
print(m)

blacklist_etc='M8990' #str etc
blacklist_yb='Z1908 Y13952 Z6171 PF1401 M11813 YSC0001289 BY2821 M11836 M3745 M11838 M11843'
blacklist_yb='M3745'
blacklist_yf='Z8834 Z7451 YP1757 YP2129 YP1822 YP1795 YP2228 YP1809 YP2229 YP1948 YP2226 YP1827 L508'
blacklist_yf+=' Y125394 Y125393 Y125392 Y125391 Y125390 Y125389 Y125396 Y125397 Y125408 [report-spacer] V1896 PAGE65.1 Y2363 PF3515 PF3512 PF3507 PF3596 Z6023 M547 A3073 Z1716 PF5827 PF1534 PF6011 PF2634 PF2635'
blacklist_rootambi='BY229589 Z2533 DFZ77 M11801 FT227770 Y3946 Y125419 FT227767 Y1578 CTS12490 FT227774 YP1740 Y125394 Y125393 Y125392 Y125391 Y125390' #TODO
Expand Down
13 changes: 11 additions & 2 deletions haploy_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@
min_match_level=0
min_tree_load_level=0
new_yfind=1
force_build=0
vcf_sample=''

parser = argparse.ArgumentParser()
parser.add_argument('-s', '--single', help='Analyse a path for single group')
parser.add_argument('-a', '--all', action='store_true', help='Show listing of all found mutations')
parser.add_argument('-n', '--num', help='Show num best matches')
parser.add_argument('-q', '--quick', help='Quick mode')
parser.add_argument('-v', '--vcf-sample', help='VCF sample select (regexp)')
parser.add_argument('-b', '--build', help='Force build36/37&38 input')
parser.add_argument('file', nargs='+')

args = parser.parse_args()
Expand All @@ -34,6 +38,10 @@
if args.quick:
min_match_level=int(args.quick)
min_tree_load_level=int(args.quick)
if args.vcf_sample:
vcf_sample = args.vcf_sample
if args.build:
force_build = int(args.build)

if len(args.file) < 2:

Expand All @@ -42,7 +50,7 @@
haploy.load_db2j(min_tree_load_level=min_tree_load_level)
print("DB loaded!")
haploy.load_annotations('haploy_annodb_*.txt')
rep = haploy.report(args.file[0], n_single, do_all=all, filt=filt, force=force, min_match_level=min_match_level)
rep = haploy.report(args.file[0], n_single, do_all=all, filt=filt, force=force, min_match_level=min_match_level, vcf_sample=vcf_sample, force_build=force_build)
print(rep)
else:
# keep old one available
Expand All @@ -69,7 +77,8 @@
haploy.load_db2j(min_tree_load_level=min_tree_load_level)
print("DB loaded!")
for fname in args.file[1:]:
snpset, meta = snpload.load(fname, ['Y'])
#TODO: loop over vcf samples
snpset, meta = snpload.load(fname, ['Y'], vcf_sample=vcf_sample, force_build=force_build)

if 'Y' not in snpset:
print('%s: no Y data'%fname)
Expand Down
34 changes: 28 additions & 6 deletions snipsa-gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,16 @@ def handle_findmt():
do_uptree = report_snps.get()
do_all = report_all.get()
force = pathvar.get()
vcf_sample = vcfvar.get()
buildstr = buildvar.get()
force_build=0
if buildstr == 'Build36': force_build = 36
if buildstr == 'Build37': force_build = 37
if buildstr == 'Build38': force_build = 38
fname = fnamevar.get()
print("Reporting file: "+fname)
num = int(numbox.get())
rep = haplomt.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force)
rep = haplomt.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force, vcf_sample=vcf_sample, force_build=force_build)
print("Done")
scr.config(state=tk.NORMAL)
scr.delete('1.0', tk.END)
Expand All @@ -64,10 +70,16 @@ def handle_findy():
do_uptree = report_snps.get()
do_all = report_all.get()
force = pathvar.get()
vcf_sample = vcfvar.get()
buildstr = buildvar.get()
force_build=0
if buildstr == 'Build36': force_build = 36
if buildstr == 'Build37': force_build = 37
if buildstr == 'Build38': force_build = 38
fname = fnamevar.get()
print("Reporting file: "+fname)
num = int(numbox.get())
rep = haploy.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force)
rep = haploy.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force, vcf_sample=vcf_sample, force_build=force_build)
print("Done")
scr.config(state=tk.NORMAL)
scr.delete('1.0', tk.END)
Expand All @@ -86,12 +98,25 @@ def handle_findy():
# File
fframe=tk.Frame(hdrframe)
fframe.pack(fill='both')
if bam_support:
cbutton3 = tk.Button(fframe, text="Import BAM", command=handle_bam_select)
cbutton3.pack(side=tk.LEFT)
button = tk.Button(fframe, text="Choose file", command=handle_file_select)
button.pack(side=tk.LEFT)
fnamevar = tk.StringVar()
fnamevar.set("No file selected")
fnamelabel=tk.Label(fframe, textvariable=fnamevar, anchor='w')
fnamelabel.pack(side=tk.RIGHT, fill='both', expand=True)
fnamelabel.pack(side=tk.LEFT, fill='both', expand=True)
vcflabel=tk.Label(fframe, text='VCF sample:', anchor='w')
vcflabel.pack(side=tk.LEFT, fill='both')
vcfvar = tk.StringVar()
vcfbox = tk.Entry(fframe, textvariable=vcfvar,width=10)
vcfbox.pack(side=tk.LEFT)
buildvar = tk.StringVar()
buildchoices = {'Auto', 'Build36', 'Build37', 'Build38'}
buildvar.set('Auto')
builddropdown = tk.OptionMenu(fframe, buildvar, *buildchoices)
builddropdown.pack(side=tk.LEFT)

# Settings
sframe=tk.Frame(hdrframe)
Expand Down Expand Up @@ -123,9 +148,6 @@ def handle_findy():
cbutton1.pack(side=tk.LEFT, fill='x', expand=True)
cbutton2 = tk.Button(cframe, text="Find Y", command=handle_findy)
cbutton2.pack(side=tk.LEFT, fill='x', expand=True)
if bam_support:
cbutton3 = tk.Button(cframe, text="Import BAM", command=handle_bam_select)
cbutton3.pack(side=tk.LEFT, fill='x', expand=True)

# Result area
scr=scrolledtext.ScrolledText(window, wrap=tk.WORD)
Expand Down
Loading

0 comments on commit cba8e33

Please sign in to comment.