diff --git a/README.md b/README.md
index b0e8cd5..d23b0c7 100755
--- a/README.md
+++ b/README.md
@@ -34,6 +34,13 @@ CrossMap (`pip3 install CrossMap`), conversion chain file (`crossmap/GRCh38_to_N
 the ISOGG spreadsheet in csv format (`'SNP Index - Human.csv'`). Outputs a `haploy_map.txt`
 file which is used by haploy_find.py. See haploy.py for details.
 
+## haploy_anno_import.py
+
+This example script is used to import your own annotations that can be attached to the reported tree nodes. As an example the script will 
+import YFull person IDs, and also some selected FTDNA project files. Open project chart tables in a browser, select page
+size so that everything fits in one page, and edit the corresponding lines at the end of the script. Snipsa will load any 
+files starting with haploy_annodb. An example file is included.
+
 ## haplomt_find.py
 
 This small tool reads a raw SNP data file and lists MT chromosome haplogroup information. You must first initialize the mutation
diff --git a/haplomt.py b/haplomt.py
index e546d79..a05b504 100755
--- a/haplomt.py
+++ b/haplomt.py
@@ -64,9 +64,9 @@ def print_data(do_print=True):
     return rep
 
 
-def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force=''):
+def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', vcf_sample='', force_build=0):
     rep=''
-    snpset, meta = snpload.load(fname, ['MT'])
+    snpset, meta = snpload.load(fname, ['MT'], vcf_sample=vcf_sample, force_build=force_build)
     if 'MT' not in snpset:
         return "No MT data found\n"
 
diff --git a/haplomt_find.py b/haplomt_find.py
index 7902e01..5c8bfb8 100755
--- a/haplomt_find.py
+++ b/haplomt_find.py
@@ -11,11 +11,15 @@
 force=''
 filt=''
 all=False
+force_build=0
+vcf_sample=''
 
 parser = argparse.ArgumentParser()
 parser.add_argument('-s', '--single', help='Analyse a path for single group')
 parser.add_argument('-a', '--all', action='store_true', help='Show listing of all found mutations')
 parser.add_argument('-n', '--num', help='Show num best matches')
+parser.add_argument('-v', '--vcf-sample', help='VCF sample select (regexp)')
+parser.add_argument('-b', '--build', help='Force build36/37&38 input')
 parser.add_argument('file', nargs='+')
 
 args = parser.parse_args()
@@ -27,6 +31,10 @@
     n_multi = int(args.num)
 if args.all:
     all=True
+if args.vcf_sample:
+    vcf_sample = args.vcf_sample
+if args.build:
+    force_build = int(args.build)
 
 if len(args.file) < 1:
     print(sys.argv[0]+" <filename>")
@@ -37,7 +45,7 @@
     print("DB loaded!")
     
     print("Loading chr data...")
-    rep = haplomt.report(args.file[0], n_single, do_all=all, filt=filt, force=force)
+    rep = haplomt.report(args.file[0], n_single, do_all=all, filt=filt, force=force, vcf_sample=vcf_sample, force_build=force_build)
     print(rep)
     
 else:
@@ -48,7 +56,7 @@
     lookfor = args.file[0].split(',')
     for fname in args.file[1:]:
         #try:
-        snpset, meta = snpload.load(fname, ['MT'])
+        snpset, meta = snpload.load(fname, ['MT'], vcf_sample=vcf_sample, force_build=force_build)
         
         if 'MT' not in snpset:
             print('%s: no MT data'%fname)
diff --git a/haploy.py b/haploy.py
index f928ee4..da1bac8 100755
--- a/haploy.py
+++ b/haploy.py
@@ -133,9 +133,9 @@ def path_str(ut, n):
 
     return rep
 
-def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', min_match_level=0):
+def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', min_match_level=0, vcf_sample='', force_build=0):
     rep=''
-    snpset, meta = snpload.load(fname, ['Y'])
+    snpset, meta = snpload.load(fname, ['Y'], vcf_sample=vcf_sample, force_build=force_build)
     if 'Y' not in snpset:
         return "No Y data found\n"
 
@@ -714,7 +714,7 @@ def show_db2():
         print(m)
 
 blacklist_etc='M8990' #str etc
-blacklist_yb='Z1908  Y13952 Z6171 PF1401 M11813 YSC0001289 BY2821 M11836 M3745 M11838 M11843'
+blacklist_yb='M3745'
 blacklist_yf='Z8834 Z7451 YP1757 YP2129 YP1822 YP1795 YP2228 YP1809 YP2229 YP1948 YP2226 YP1827 L508'
 blacklist_yf+=' Y125394 Y125393 Y125392 Y125391 Y125390 Y125389 Y125396 Y125397 Y125408 [report-spacer] V1896 PAGE65.1 Y2363 PF3515 PF3512 PF3507 PF3596 Z6023 M547 A3073 Z1716 PF5827 PF1534 PF6011 PF2634 PF2635'
 blacklist_rootambi='BY229589 Z2533 DFZ77 M11801 FT227770 Y3946 Y125419 FT227767 Y1578 CTS12490 FT227774 YP1740 Y125394 Y125393 Y125392 Y125391 Y125390' #TODO
diff --git a/haploy_find.py b/haploy_find.py
index aaf0d2f..1b3f4c3 100755
--- a/haploy_find.py
+++ b/haploy_find.py
@@ -14,12 +14,16 @@
 min_match_level=0
 min_tree_load_level=0
 new_yfind=1
+force_build=0
+vcf_sample=''
 
 parser = argparse.ArgumentParser()
 parser.add_argument('-s', '--single', help='Analyse a path for single group')
 parser.add_argument('-a', '--all', action='store_true', help='Show listing of all found mutations')
 parser.add_argument('-n', '--num', help='Show num best matches')
 parser.add_argument('-q', '--quick', help='Quick mode')
+parser.add_argument('-v', '--vcf-sample', help='VCF sample select (regexp)')
+parser.add_argument('-b', '--build', help='Force build36/37&38 input')
 parser.add_argument('file', nargs='+')
 
 args = parser.parse_args()
@@ -34,6 +38,10 @@
 if args.quick:
     min_match_level=int(args.quick)
     min_tree_load_level=int(args.quick)
+if args.vcf_sample:
+    vcf_sample = args.vcf_sample
+if args.build:
+    force_build = int(args.build)
 
 if len(args.file) < 2:
     
@@ -42,7 +50,7 @@
         haploy.load_db2j(min_tree_load_level=min_tree_load_level)
         print("DB loaded!")
         haploy.load_annotations('haploy_annodb_*.txt')
-        rep = haploy.report(args.file[0], n_single, do_all=all, filt=filt, force=force, min_match_level=min_match_level)
+        rep = haploy.report(args.file[0], n_single, do_all=all, filt=filt, force=force, min_match_level=min_match_level, vcf_sample=vcf_sample, force_build=force_build)
         print(rep)
     else:
         # keep old one available
@@ -69,7 +77,8 @@
         haploy.load_db2j(min_tree_load_level=min_tree_load_level)
         print("DB loaded!")
         for fname in args.file[1:]:
-            snpset, meta = snpload.load(fname, ['Y'])
+            #TODO: loop over vcf samples
+            snpset, meta = snpload.load(fname, ['Y'], vcf_sample=vcf_sample, force_build=force_build)
 
             if 'Y' not in snpset:
                 print('%s: no Y data'%fname)
diff --git a/snipsa-gui.py b/snipsa-gui.py
index 1a948cf..f3f193f 100755
--- a/snipsa-gui.py
+++ b/snipsa-gui.py
@@ -44,10 +44,16 @@ def handle_findmt():
     do_uptree = report_snps.get()
     do_all = report_all.get()
     force = pathvar.get()
+    vcf_sample = vcfvar.get()
+    buildstr = buildvar.get()
+    force_build=0
+    if  buildstr == 'Build36': force_build = 36
+    if  buildstr == 'Build37': force_build = 37
+    if  buildstr == 'Build38': force_build = 38
     fname = fnamevar.get()
     print("Reporting file: "+fname)
     num = int(numbox.get())
-    rep = haplomt.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force)
+    rep = haplomt.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force, vcf_sample=vcf_sample, force_build=force_build)
     print("Done")
     scr.config(state=tk.NORMAL)
     scr.delete('1.0', tk.END)
@@ -64,10 +70,16 @@ def handle_findy():
     do_uptree = report_snps.get()
     do_all = report_all.get()
     force = pathvar.get()
+    vcf_sample = vcfvar.get()
+    buildstr = buildvar.get()
+    force_build=0
+    if  buildstr == 'Build36': force_build = 36
+    if  buildstr == 'Build37': force_build = 37
+    if  buildstr == 'Build38': force_build = 38
     fname = fnamevar.get()
     print("Reporting file: "+fname)
     num = int(numbox.get())
-    rep = haploy.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force)
+    rep = haploy.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force, vcf_sample=vcf_sample, force_build=force_build)
     print("Done")
     scr.config(state=tk.NORMAL)
     scr.delete('1.0', tk.END)
@@ -86,12 +98,25 @@ def handle_findy():
 # File
 fframe=tk.Frame(hdrframe)
 fframe.pack(fill='both')
+if bam_support:
+    cbutton3 = tk.Button(fframe, text="Import BAM", command=handle_bam_select)
+    cbutton3.pack(side=tk.LEFT)
 button = tk.Button(fframe, text="Choose file", command=handle_file_select)
 button.pack(side=tk.LEFT)
 fnamevar = tk.StringVar()
 fnamevar.set("No file selected")
 fnamelabel=tk.Label(fframe, textvariable=fnamevar,  anchor='w')
-fnamelabel.pack(side=tk.RIGHT, fill='both', expand=True)
+fnamelabel.pack(side=tk.LEFT, fill='both', expand=True)
+vcflabel=tk.Label(fframe, text='VCF sample:', anchor='w')
+vcflabel.pack(side=tk.LEFT, fill='both')
+vcfvar = tk.StringVar()
+vcfbox = tk.Entry(fframe, textvariable=vcfvar,width=10)
+vcfbox.pack(side=tk.LEFT)
+buildvar = tk.StringVar()
+buildchoices = {'Auto', 'Build36', 'Build37', 'Build38'}
+buildvar.set('Auto')
+builddropdown = tk.OptionMenu(fframe, buildvar, *buildchoices)
+builddropdown.pack(side=tk.LEFT)
 
 # Settings
 sframe=tk.Frame(hdrframe)
@@ -123,9 +148,6 @@ def handle_findy():
 cbutton1.pack(side=tk.LEFT, fill='x', expand=True)
 cbutton2 = tk.Button(cframe, text="Find Y", command=handle_findy)
 cbutton2.pack(side=tk.LEFT, fill='x', expand=True)
-if bam_support:
-    cbutton3 = tk.Button(cframe, text="Import BAM", command=handle_bam_select)
-    cbutton3.pack(side=tk.LEFT, fill='x', expand=True)
 
 # Result area
 scr=scrolledtext.ScrolledText(window, wrap=tk.WORD)  
diff --git a/snpload.py b/snpload.py
index d6bd470..8e0be03 100755
--- a/snpload.py
+++ b/snpload.py
@@ -6,19 +6,28 @@
 
 #fname: filename
 #crs: chromosomes to read
-def load(fname, crs=[]):
+def load(fname, crs=[], vcf_sample='', force_build=0):
     snpset = {}
     meta = {}
+    vcf_idx = 0
     try:
         tmpfile = preprocess_file(fname)
         (build, fmt) = detect_file_format(tmpfile)
+        if fmt == 'vcf':
+            idxs = get_vcf_sample_idx(tmpfile, vcf_sample)
+            #print(idxs)
+            if len(idxs) > 0:
+                vcf_idx = idxs[0]
+            print('VCF sample idx:', vcf_idx)
     except:
         print("FORMAT AUTODETECT FAILED!!!!!")
         meta['build'] = 0
         meta['format'] = ''
         meta['total'] = 0
         return (snpset, meta)
-    
+
+    if force_build:
+        build = force_build
     meta['build'] = build
     meta['format'] = fmt
     meta['total'] = 0
@@ -31,6 +40,8 @@ def load(fname, crs=[]):
         importer = import_line_ftdna
     elif fmt == 'myheritage':
         importer = import_line_ftdna #same
+    elif fmt == 'vcf':
+        importer = import_line_vcf
     else:
         print("Undetected format: build%d, %s"%(build, fmt))
         meta['total'] = 0
@@ -41,8 +52,8 @@ def load(fname, crs=[]):
         with open(tmpfile) as f:
             for line in f:
                 try:
-                    (snp, pos, cr, gen) = importer(line, build)
-                except:
+                    (snp, pos, cr, gen) = importer(line, build, vcf_idx)
+                except RuntimeError:
                     continue
                 
                 if gen[0] == "-":
@@ -79,14 +90,14 @@ def load(fname, crs=[]):
     meta['total'] = n_total
     return (snpset, meta)
 
-def import_line_23andme(line, build):
+def import_line_23andme(line, build, idx):
     if len(line) < 7:
-        raise Exception()
+        raise RuntimeError()
     if line.startswith('#'):
-        raise Exception()
+        raise RuntimeError()
     sline = line.split()
     if len(sline) < 4:
-        raise Exception()
+        raise RuntimeError()
     cr = sline[1];
     pos = sline[2];
     gen = sline[3];
@@ -104,16 +115,16 @@ def import_line_23andme(line, build):
         
     return (snp, pos, cr, gen)
 
-def import_line_ancestry(line, build):
+def import_line_ancestry(line, build, idx):
     if 'allele1' in line and 'allele2' in line:
-        raise Exception()
+        raise RuntimeError()
     if len(line) < 7:
-        raise Exception()
+        raise RuntimeError()
     if line.startswith('#'):
-        raise Exception()
+        raise RuntimeError()
     sline = line.split()
     if len(sline) < 4:
-        raise Exception()
+        raise RuntimeError()
     cr = sline[1];
     if cr == '23':
         cr = 'X'
@@ -140,16 +151,16 @@ def import_line_ancestry(line, build):
         
     return (snp, pos, cr, gen)
 
-def import_line_ftdna(line, build):
+def import_line_ftdna(line, build, idx):
     if len(line) < 7:
-        raise Exception()
+        raise RuntimeError()
     if line.startswith('#'):
-        raise Exception()
+        raise RuntimeError()
     if line.startswith('RSID'):
-        raise Exception()
+        raise RuntimeError()
     sline = line.split(',')
     if len(sline) < 4:
-        raise Exception()
+        raise RuntimeError()
     cr = sline[1].strip('"');
     pos = sline[2].strip('"');
     gen = sline[3].strip().strip('"');
@@ -167,6 +178,82 @@ def import_line_ftdna(line, build):
         
     return (snp, pos, cr, gen)
 
+def import_line_vcf(line, build, idx):
+    if len(line) < 7:
+        raise RuntimeError()
+    if line.startswith('#'):
+        raise RuntimeError()
+    sline = line.split()
+    if len(sline) < 4:
+        raise RuntimeError()
+    cr = sline[0];
+    if cr == 'chrY':
+        cr = 'Y'
+    elif cr == 'chrMT':
+        cr = 'MT'
+    elif cr == 'M':
+        cr = 'MT'
+    elif cr == 'chrM':
+        cr = 'MT'
+    else:
+        cr.replace('chr','')
+
+    pos = sline[1]
+    ref = sline[3]
+    alt = sline[4]
+    alls = [ref]
+    alls+=alt.split(',')
+
+    #skip indel
+    if len(ref) > 1 or len(alt) > 1:
+        raise RuntimeError()
+
+    #GT index from format string
+    form = sline[8]
+    gti=0
+    for i,gtstr in enumerate(form.split(':')):
+        if gtstr == 'GT':
+            gti=i
+
+    s = sline[9+idx].split(':')
+
+    #print(pos, alls, gti, s)
+    if s[gti][0] == '.':
+        raise RuntimeError()
+    g = alls[int(s[gti][0])]
+
+    #print(pos, alls, g)
+    gen = g+g;
+
+    snp = {'id': sline[2],
+        'cr': cr,
+        'gen': gen }
+
+    if build == 36:
+        snp['b36'] = pos
+    elif build == 37:
+        snp['b37'] = pos
+    elif build == 38:
+        snp['b38'] = pos
+
+    return (snp, pos, cr, gen)
+
+def get_vcf_sample_idx(fname, sname):
+    lc=0
+    idxs=[]
+    with open(fname) as f:
+        for line in f:
+            #print(line)
+            if line.startswith('#CHROM'):
+                for i, sam in enumerate(line.split('\t')[9:]):
+                    if re.search(sname, sam):
+                        idxs.append(i)
+                break
+            lc+=1
+            if lc > 10000:
+                break
+    return idxs
+
 def detect_file_format(fname):
     lc=0
     build=0
@@ -174,14 +261,14 @@ def detect_file_format(fname):
     with open(fname) as f:
         for line in f:
             lc += 1
-            if lc > 3000:
+            if lc > 4000:
                 break
             if fmt != '' and build != 0:
                 break
             if line.startswith('#'):
-                if 'build ' in line:
+                if fmt != 'vcf' and 'build ' in line:
                     build = int(re.findall(r'build \d+', line)[0].split()[1])
-                if 'Build: ' in line:
+                if fmt != 'vcf' and 'Build: ' in line:
                     build = int(re.findall(r'Build: \d+', line)[0].split()[1])
                 if "23andMe" in line:
                     fmt = '23andme'
@@ -191,7 +278,12 @@ def detect_file_format(fname):
                     fmt = 'myheritage'
                 if "##fileformat=VCF" in line:
                     fmt = 'vcf'
-                    return (build, fmt)
+                if "##reference" in line:
+                    #hacky autodetect may work for many files
+                    if '37' in line:
+                        build = 37
+                    if '38' in line:
+                        build = 38
                 continue
             if lc == 1 and line.startswith('RSID'):
                      fmt = 'ftdna'