Skip to content

Commit

Permalink
0.3.0 (#16)
Browse files Browse the repository at this point in the history
* 🚸 Make enhancements (#15)

* 🔖 0.3.0

* Update README.md

* ⬆️ Require datar 0.5.6
  • Loading branch information
pwwang authored Feb 3, 2022
1 parent 4156813 commit 4b6c3e2
Show file tree
Hide file tree
Showing 16 changed files with 83 additions and 35 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ vcfstats --vcf examples/sample.vcf \
```
![Depths between sample 1 and 2](examples/depths-between-sample-1-and-2.scatter.png)


See more examples:

https://github.com/pwwang/vcfstats/issues/15#issuecomment-1029367903

[1]: https://img.shields.io/pypi/v/vcfstats?style=flat-square
[2]: https://pypi.org/project/vcfstats/
[3]: https://img.shields.io/github/v/tag/pwwang/vcfstats?style=flat-square
Expand Down
4 changes: 4 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.3.0

- Introduce enhancements (pwwang/vcfstats#15)

# 0.2.0
- Use `plotnine` instead of `ggplot2` so no `R` is needed
- Add `figfmt` to generate different format of figures other than `png`
Expand Down
Binary file added examples/info-dp-chroms.boxplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/info-dp.histogram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions examples/mymacros.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from vcfstats.macros import continuous


@continuous
def INFO_DP(variant):
"""DP from INFO"""
return variant.INFO["DP"]


@continuous
def MISSINGs(variant):
"""DP from INFO"""
# convert boolean array to int
# gts012 = True
return (variant.gt_types == 3) + 0


@continuous
def N_MISSING(variant):
"""DP from INFO"""
# convert boolean array to int
# gts012 = True
return variant.num_unknown
Binary file added examples/n-hets-sample1.col.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/n-missings.col.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/per-locus-missings.boxplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/per-locus-missings.histogram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 5 additions & 5 deletions examples/sample.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,19 @@
1 10177 . A C 37 MinMQ DP=495;VDB=0.0168;AF1=0.1596;AC1=1;DP4=167,82,52,21;MQ=13;FQ=37;PV4=0.57,1,1,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/0:0,0,15:101:2:5 0/0:0,36,89:51:5:40 0/0:0,79,103:85:1:83 0/1:41,0,31:85:16:36
1 10250 . A C 61 MinMQ DP=271;VDB=0.0265;AF1=0.125;AC1=1;DP4=87,78,18,9;MQ=17;FQ=61;PV4=0.21,1,1,0.1;AN=8;AC=1 GT:DP:SP:GQ 0/0:60:0:99 0/0:32:5:53 0/0:50:2:83 0/1:50:3:62
1 10257 . A C 31.9 MinMQ DP=400;VDB=0.0245;AF1=0.2404;AC1=2;DP4=93,100,26,10;MQ=16;FQ=31.9;PV4=0.01,1,1,0.013;AN=8;AC=2 GT:PL:DP:SP:GQ 0/0:0,93,197:65:3:95 0/1:13,0,92:41:9:11 0/0:0,91,128:59:0:93 0/1:27,0,70:64:14:25
1 10329 . ACCCC ACCC 26.4 MinMQ INDEL;DP=315;VDB=0.0160;AF1=0.2047;AC1=2;DP4=2,42,9,16;MQ=17;FQ=29.3;PV4=0.0011,1,0.061,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/0:0,4,68:22:10:7 0/0:2,0,16:7:0:3 0/0:0,15,61:18:0:17 0/1:46,0,34:22:7:40
1 10352 . TACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 253 MinMQ INDEL;DP=413;VDB=0.0226;AF1=0.8598;AC1=7;DP4=7,17,13,44;MQ=15;FQ=4.35;PV4=0.58,1,1,0.0055;AN=8;AC=7 GT:PL:DP:SP:GQ 1/1:67,6,0:18:2:11 1/1:14,7,0:12:0:12 1/1:111,22,0:23:0:26 0/1:83,0,22:28:2:18
1 10329 . ACCCC ACCC 26.4 MinMQ INDEL;DP=315;VDB=0.0160;AF1=0.2047;AC1=2;DP4=2,42,9,16;MQ=17;FQ=29.3;PV4=0.0011,1,0.061,1;AN=8;AC=1 GT:PL:DP:SP:GQ ./.:0,4,68:22:10:7 ./.:2,0,16:7:0:3 0/0:0,15,61:18:0:17 0/1:46,0,34:22:7:40
1 10352 . TACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 253 MinMQ INDEL;DP=413;VDB=0.0226;AF1=0.8598;AC1=7;DP4=7,17,13,44;MQ=15;FQ=4.35;PV4=0.58,1,1,0.0055;AN=8;AC=7 GT:PL:DP:SP:GQ ./.:67,6,0:18:2:11 1/1:14,7,0:12:0:12 1/1:111,22,0:23:0:26 0/1:83,0,22:28:2:18
1 10492 . C T 999 PASS DP=213;VDB=0.0102;AF1=0.375;AC1=3;DP4=84,74,34,19;MQ=32;FQ=999;PV4=0.2,0.11,0.057,0.13;AN=8;AC=3 GT:PL:DP:SP:GQ 0/1:85,0,255:57:3:86 0/0:0,123,255:41:0:99 0/1:255,0,255:47:0:99 0/1:114,0,255:66:4:99
1 10583 . G A,C 20 PASS DP=134;VDB=0.0071;AF1=0.1242;AC1=1;DP4=78,41,6,6;MQ=32;FQ=20;PV4=0.35,0.29,0.052,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:26,0,227:40:8:21 2/2:0,35,255:21:0:40 0/0:0,108,255:36:0:99 0/0:0,21,255:34:0:26
1 10797 . CAGA CAGAGA 90.4 MinMQ INDEL;DP=37;VDB=0.0243;AF1=0.2819;AC1=2;DP4=7,3,0,6;MQ=29;FQ=93.3;PV4=0.011,1,1,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/0:0,9,104:3:0:10 0/0:0,6,39:2:0:8 0/1:59,0,36:5:0:43 0/1:56,0,32:6:4:39
1 10797 . CAGA CAGAGA 90.4 MinMQ INDEL;DP=37;VDB=0.0243;AF1=0.2819;AC1=2;DP4=7,3,0,6;MQ=29;FQ=93.3;PV4=0.011,1,1,1;AN=8;AC=2 GT:PL:DP:SP:GQ ./.:0,9,104:3:0:10 ./.:0,6,39:2:0:8 0/1:59,0,36:5:0:43 0/1:56,0,32:6:4:39
1 10821 . T A 49.7 PASS DP=9;VDB=0.0091;AF1=1;AC1=5;DP4=1,3,0,4;MQ=12;FQ=7.75;PV4=1,1,1,1;AN=8;AC=6 GT:PL:DP:SP:GQ 1/1:42,9,0:33:0:20 0/1:0,3,4:1:0:2 1/1:12,1,0:2:0:4 0/1:0,6,8:2:0:2
2 14907 . A G 999 MinMQ DP=461;VDB=0.0384;AF1=0.5;G3=8.874e-45,1,8.011e-40;HWE=0.0185;AC1=4;DP4=101,122,129,102;MQ=25;FQ=999;PV4=0.031,0.011,1,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:225,0,255:133:0:99 0/1:213,0,225:91:20:99 0/1:255,0,188:104:0:99 0/1:255,0,208:126:4:99
2 14930 . A G 999 MinMQ DP=502;VDB=0.0393;AF1=0.5;G3=1.282e-48,1,7.866e-46;HWE=0.0185;AC1=4;DP4=117,121,135,111;MQ=28;FQ=999;PV4=0.24,0.02,0.42,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:255,0,255:150:0:99 0/1:232,0,255:84:9:99 0/1:255,0,250:114:0:99 0/1:255,0,218:136:4:99
2 15118 . A G 196 MinMQ DP=408;VDB=0.0389;AF1=0.4995;G3=4.894e-09,1,2.035e-09;HWE=0.0193;AC1=4;DP4=79,107,98,101;MQ=13;FQ=196;PV4=0.19,1,0.16,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:42,0,97:110:1:45 0/1:14,0,34:82:7:17 0/1:54,0,93:90:2:57 0/1:92,0,15:103:1:18
2 15211 . T G 999 MinMQ DP=381;VDB=0.0374;AF1=0.6993;AC1=6;DP4=52,44,122,137;MQ=17;FQ=156;PV4=0.28,0.31,1,1;AN=8;AC=6 GT:PL:DP:SP:GQ 0/1:146,0,101:114:5:99 1/1:77,1,0:67:2:4 0/1:121,0,61:78:7:60 1/1:192,89,0:96:11:90
2 15274 . A T 999 MinMQ DP=229;VDB=0.0313;AF1=1;AC1=8;DP4=0,0,99,120;MQ=11;FQ=-92.5;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:83,114,0:54:0:99 1/1:82,108,0:48:0:99 1/1:84,105,0:47:0:99 1/1:112,175,0:70:0:99
2 15820 . G T 90.6 MinMQ DP=149;VDB=0.0252;AF1=0.374;AC1=3;DP4=24,68,15,40;MQ=17;FQ=90.6;PV4=1,1,2e-07,1;AN=8;AC=3 GT:PL:DP:SP:GQ 0/1:40,0,124:48:1:41 0/0:0,27,153:33:2:26 1/1:65,25,0:15:3:20 0/0:0,65,195:51:5:64
2 15903 . GCC GCCC 158 MinMQ INDEL;DP=14;VDB=0.0182;AF1=1;AC1=8;DP4=0,0,0,7;MQ=29;FQ=-18.2;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:29,3,0:1:0:13 1/1:44,6,0:2:0:16 1/1:29,3,0:1:0:13 1/1:72,9,0:3:0:19
2 15903 . GCC GCCC 158 MinMQ INDEL;DP=14;VDB=0.0182;AF1=1;AC1=8;DP4=0,0,0,7;MQ=29;FQ=-18.2;AN=8;AC=8 GT:PL:DP:SP:GQ ./.:29,3,0:1:0:13 1/1:44,6,0:2:0:16 1/1:29,3,0:1:0:13 1/1:72,9,0:3:0:19
2 16103 . T G 24.3 PASS DP=110;VDB=0.0122;AF1=0.2119;AC1=2;DP4=49,26,23,2;MQ=31;FQ=24.3;PV4=0.01,1,6.9e-13,0.33;AN=8;AC=1 GT:PL:DP:SP:GQ 0/0:0,2,228:39:11:5 0/0:0,7,189:15:6:10 0/0:0,0,218:25:0:4 0/1:26,0,192:21:5:24
2 16378 . T C 999 MinMQ DP=587;VDB=0.0267;AF1=0.5;G3=9.954e-26,1,3.125e-18;HWE=0.0185;AC1=4;DP4=128,75,245,120;MQ=18;FQ=999;PV4=0.36,0.23,0.024,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:127,0,150:194:9:99 0/1:118,0,139:108:4:99 0/1:156,0,80:130:3:83 0/1:166,0,148:136:1:99
2 16495 . G C 97.7 MinMQ DP=644;VDB=0.0239;AF1=0.2493;AC1=2;DP4=226,252,67,87;MQ=19;FQ=97.7;PV4=0.46,0.14,1,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/0:0,41,126:190:5:43 0/0:0,17,168:115:1:19 0/1:19,0,132:166:1:17 0/1:87,0,176:161:4:85
Expand All @@ -70,7 +70,7 @@
3 28558 . C T 164 MinMQ DP=142;VDB=0.0026;AF1=0.4529;G3=1.465e-06,1,4.392e-30;HWE=0.0307;AC1=4;DP4=38,62,18,20;MQ=17;FQ=164;PV4=0.34,1,1,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:0,0,104:44:9:4 0/1:27,0,28:32:3:27 0/1:77,0,113:35:5:79 0/1:64,0,31:27:0:35
3 28563 . A G 999 MinMQ DP=124;VDB=0.0072;AF1=1;AC1=8;DP4=22,31,27,39;MQ=18;FQ=-3.67;PV4=1,1,1,1;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:191,6,0:41:1:14 1/1:90,2,0:24:0:11 1/1:213,20,0:31:4:28 1/1:104,0,1:23:0:8
3 28590 . TT TTGGT 116 MinMQ INDEL;DP=112;VDB=0.0233;AF1=0.3933;AC1=3;DP4=5,46,10,16;MQ=19;FQ=54.6;PV4=0.005,1,1,0.00097;AN=8;AC=3 GT:PL:DP:SP:GQ 0/1:80,0,2:23:10:8 0/1:9,0,9:15:15:9 0/1:51,0,26:21:2:31 0/0:0,17,39:18:5:16
3 30867 . CCTCTCTCTCTCTCTCTCTCTCTCTC CCTCTCTCTCTCTCTCTCTCTC 999 PASS INDEL;DP=229;VDB=0.0320;AF1=0.5;G3=4.953e-17,1,5e-52;HWE=0.0185;AC1=4;DP4=56,66,27,32;MQ=37;FQ=999;PV4=1,1,1,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:211,0,255:47:1:99 0/1:74,0,255:20:2:77 0/1:255,0,255:70:3:99 0/1:176,0,255:44:0:99
3 30867 . CCTCTCTCTCTCTCTCTCTCTCTCTC CCTCTCTCTCTCTCTCTCTCTC 999 PASS INDEL;DP=229;VDB=0.0320;AF1=0.5;G3=4.953e-17,1,5e-52;HWE=0.0185;AC1=4;DP4=56,66,27,32;MQ=37;FQ=999;PV4=1,1,1,1;AN=8;AC=4 GT:PL:DP:SP:GQ ./.:211,0,255:47:1:99 0/1:74,0,255:20:2:77 0/1:255,0,255:70:3:99 0/1:176,0,255:44:0:99
3 30923 . G T 999 PASS DP=107;VDB=0.0022;AF1=1;AC1=8;DP4=0,0,47,50;MQ=37;FQ=-36;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:255,75,0:25:0:99 1/1:221,30,0:10:0:72 1/1:255,117,0:39:0:99 1/1:255,69,0:23:0:99
3 40639 . CTTTTTTTTTTTTTTTTTTT CTTTTTTTTTTTTTTTT 118 PASS INDEL;DP=72;VDB=0.0379;AF1=1;AC1=8;DP4=0,0,14,2;MQ=33;FQ=-18.8;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:77,30,0:11:0:40 1/1:16,3,0:1:0:14 1/1:16,3,0:1:0:14 1/1:25,6,0:3:0:16
4 46633 . T A 46.4 MinMQ DP=169;VDB=0.0275;AF1=0.1322;AC1=1;DP4=67,81,9,9;MQ=15;FQ=46.4;PV4=0.8,0.5,1,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:52,0,60:47:0:47 0/0:0,7,71:30:0:12 0/0:0,114,146:38:0:99 0/0:0,154,179:51:0:99
Expand Down
Binary file added examples/titv-numbers.col.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "vcfstats"
version = "0.2.0"
version = "0.3.0"
description = "Powerful statistics for VCF files"
authors = [ "pwwang <[email protected]>",]
license = "MIT"
Expand All @@ -23,7 +23,7 @@ lark-parser = "^0.11"
plotnine = "^0.8.0"
plotnine-prism = "^0.0"
python-slugify = "^5.0.2"
datar = "^0.5.2"
datar = "^0.5.6"
py = "^1.10.0"

[tool.poetry.dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion vcfstats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

from . import macros

__version__ = "0.2.0"
__version__ = "0.3.0"
38 changes: 23 additions & 15 deletions vcfstats/formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def run(self, variant, passed):
if passed and variant.FILTER:
return False
value = self.term["func"](variant)

if value is False or value is None:
return False
# numpy.array
Expand Down Expand Up @@ -209,41 +210,48 @@ def run(self, variant, passed):
"""Run each variant"""
if self.filter and self.filter.run(variant, passed) is False:
return

if not self.group:
raise RuntimeError(
"No group specified, don't know how to aggregate."
)

group = self.group.run(variant, passed)
if group is False:
return
if len(group) > 1:

value = self.term.run(variant, passed)
if value is False:
return

if len(group) > 1 and len(value) != len(group):
raise ValueError(
"Cannot aggregate on more than one group, "
+ "make sure you specified sample for sample data."
)
group = group[0]
# group = group[0]

xgroup = False
xgroup = None
if self.xgroup:
xgroup = self.xgroup.run(variant, passed)
if xgroup is False:
return
if len(xgroup) > 1:
if len(xgroup) > 1 and len(value) != len(xgroup):
raise ValueError(
"Cannot aggregate on more than one level of xgroup."
)
xgroup = xgroup[0]

value = self.term.run(variant, passed)

if value is False:
return
if xgroup:
self.cache.setdefault(xgroup, {}).setdefault(group, []).extend(
value
)
# xgroup = xgroup[0]

if xgroup is not None:
for xgrup, grup, val in zip(xgroup, group, value):
self.cache.setdefault(
xgrup, {}
).setdefault(
grup, []
).append(val)
else:
self.cache.setdefault(group, []).extend(value)
for grup, val in zip(group, value):
self.cache.setdefault(grup, []).append(val)

def dump(self):
"""Dump and calculate the aggregations"""
Expand Down
26 changes: 14 additions & 12 deletions vcfstats/instance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Handling one plot/instance"""
from os import path
from types import ModuleType

import pandas
import plotnine as p9
Expand Down Expand Up @@ -28,6 +29,7 @@
attr: getattr(p9p, attr)
for attr in dir(p9p)
if not attr.startswith("_")
and not isinstance(getattr(p9p, attr), ModuleType)
},
}

Expand Down Expand Up @@ -235,20 +237,12 @@ def plot(self):
if df.shape[1] > 2:
plt = plt + p9.geom_bar(
p9.aes(x=df.columns[2], y=col0.name, fill=df.columns[2]),
stat="identity"
# aes_for_geom_fill,
# x=df.Group,
# y=col0,
# label=paste0(round_(100 * col0 / sum_(col0), 1), "%"),
# show_legend=False,
# position=p9.position_adjust_text(),
stat="identity",
)
else:
col0 = factor(col0, levels=rev(unique(as_character(col0))))
fills = rev(levels(col0))
sums = map(lambda x: sum(col0 == x), fills)
print(col0)
print(fills)
plt = (
p9.ggplot(df, p9.aes(x=df.columns[1]))
+ p9.geom_bar(p9.aes(fill=df.columns[0]))
Expand Down Expand Up @@ -292,6 +286,7 @@ def plot(self):

def save_plot(self, plt, theme_elems):
has_theme = False
theme_frags = []
for i, gg in enumerate(self.ggs.split(";")):
gg = gg.strip()
if not gg:
Expand All @@ -303,14 +298,21 @@ def save_plot(self, plt, theme_elems):
exec(ggcode, GGS_ENV)
except Exception as exc:
raise ValueError(f"Invalid ggs expression: {gg}") from exc

ggexpr = GGS_ENV.pop("__gg__")
plt = plt + ggexpr
if gg.startswith("theme_"):
has_theme = True
# avoid user theme fragments being overriden by theme_prism
if gg.startswith("theme("):
theme_frags.append(ggexpr)
else:
plt = plt + ggexpr
if gg.startswith("theme_"):
has_theme = True

if not has_theme:
plt = plt + p9p.theme_prism(base_size=12, base_family="monospace")
plt = plt + theme_elems
for theme_frag in theme_frags:
plt = plt + theme_frag

devpars = (
Diot(height=1000, width=1000, res=100, format=self.figfmt)
Expand Down
6 changes: 6 additions & 0 deletions vcfstats/macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ def SUBST(variant):
return "{}>{}".format(variant.REF, ",".join(variant.ALT))


@categorical
def SAMPLES(variant):
"""Get the sample indices"""
return list(range(len(variant.genotypes)))


@continuous
def NALT(variant):
"""Number of alternative alleles"""
Expand Down

0 comments on commit 4b6c3e2

Please sign in to comment.