Skip to content

Commit

Permalink
Merge pull request #18 from AlgoLab/use_snpEff
Browse files Browse the repository at this point in the history
Add SnpEff to MALVIRUS pipeline
  • Loading branch information
yp authored Sep 15, 2020
2 parents 625a7c5 + 2271e64 commit 6fac35f
Show file tree
Hide file tree
Showing 31 changed files with 1,054 additions and 696 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ MALVIRUS is a fast and accurate tool for genotyping haploid individuals that doe
It is tailored to work with virological data (including but not limited to SARS-CoV-2) and can genotype an individual directly from sequencing data in minutes.

MALVIRUS is divided into two logically distinct steps: the creation of a variant catalog from a set of assemblies and the genotype calling.
The first step is based on mafft [[1]](#mafft7) and snp-sites [[2]](#snp-sites), whereas the second step is based on KMC [[3]](#kmc) and MALVA [[4]](#malva).
The first step is based on mafft [[1]](#mafft7) and snp-sites [[2]](#snp-sites), whereas the second step is based on KMC [[3]](#kmc), MALVA [[4]](#malva), and SnpEff [[5]](#snpeff).

The variant catalog can be built once and reused for genotyping multiple individuals.

Expand All @@ -30,3 +30,5 @@ bioRxiv 2020.05.05.076992; doi: [10.1101/2020.05.05.076992](https://doi.org/10.1
<a id="kmc">[3]</a> Kokot, Marek, Maciej Dlugosz, and Sebastian Deorowicz. 2017. “KMC 3: counting and manipulating k-mer statistics.” Bioinformatics 33 (17): 2759–61. doi:[10.1093/bioinformatics/btx304](https://doi.org/10.1093/bioinformatics/btx304).

<a id="malva">[4]</a> Denti, Luca, Marco Previtali, Giulia Bernardini, Alexander Schönhuth, and Paola Bonizzoni. 2019. “MALVA: Genotyping by Mapping-Free Allele Detection of Known Variants.” iScience 18: 20–27. doi:[10.1016/j.isci.2019.07.011](https://doi.org/10.1016/j.isci.2019.07.011).

<a id="snpeff">[5]</a> Pablo Cingolani _et al_. 2012. “A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3” Fly 6(2): 80-92. doi:[10.4161/fly.19695](https://doi.org/10.4161/fly.19695).
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dependencies:
- gffutils=0.10.1
- snp-sites=2.5.1
- malva=1.3.1
- snpeff=4.5covid19
107 changes: 81 additions & 26 deletions flask/app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from app import app
from werkzeug.utils import secure_filename

from os.path import join as pjoin
from pathlib import Path
from os import getcwd
import os
Expand All @@ -21,12 +20,32 @@
def mkdirp(path):
Path(path).mkdir(parents=True, exist_ok=True)

def pjoin(basepath, *paths):
path = os.path.join(basepath, *paths)
if not os.path.abspath(path).startswith(os.path.abspath(basepath)):
raise Exception('Trying to access a non safe-path.')
return path

@app.route('/<path:route>')
def not_found(route):
return abort(make_response(jsonify(message='Route not found'), 404))


def base_get_refs():
try:
with open(pjoin(app.config['JOB_DIR'], 'refs', 'refs.json'), 'r') as f:
refs = json.load(f)
return refs
except:
return None

@app.route('/ref', methods=['GET'])
def get_refs():
refs = base_get_refs()
if refs is None:
return jsonify([])
return jsonify(refs)

@app.route('/vcf', methods=['GET'])
def get_vcf_list():
vcfs = list()
Expand Down Expand Up @@ -121,6 +140,9 @@ def rm_vcf(vcf_id):

return jsonify(info)

def first_true(iterable, default=None, pred=None):
return next(filter(pred, iterable), default)


@app.route('/vcf', methods=['POST'])
def post_vcf():
Expand All @@ -137,16 +159,25 @@ def post_vcf():

if 'file' not in request.files:
abort(make_response(jsonify(message="Missing file"), 400))
if 'reference' not in request.files:

custom_ref = ('refid' not in request.form or request.form['refid'] == '__custom__')
if custom_ref and 'reference' not in request.files:
abort(make_response(jsonify(message="Missing file"), 400))

rfile = request.files['file']
reffile = request.files['reference']

if rfile.filename == '':
abort(make_response(jsonify(message="Missing filename"), 400))
if reffile.filename == '':
abort(make_response(jsonify(message="Missing filename"), 400))

if custom_ref:
reffile = request.files['reference']
if reffile.filename == '':
abort(make_response(jsonify(message="Missing filename"), 400))
else:
refs = base_get_refs()
refid = request.form['refid']
ref = first_true(refs, None, lambda x: x['id'] == refid)
if ref is None:
abort(make_response(jsonify(message="Unknown ref"), 400))

uuid = datetime.datetime.now().strftime('%Y%m%d-%H%M%S_') + str(uuid4())

Expand All @@ -173,24 +204,37 @@ def post_vcf():
os.remove(dfile)
dfile = nfile

# Download reference
refpath = pjoin(workdir, secure_filename(reffile.filename))
reffile.save(refpath)
if refpath.endswith('.gz'):
nfile = refpath.replace('.gz', '')
with gzip.open(refpath, 'rb') as f_in:
with open(nfile, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(refpath)
refpath = nfile

# Download GTF (optional)
if 'gtf' not in request.files:
gtfpath = "NULL"
if custom_ref:
# Download reference
refpath = pjoin(workdir, secure_filename(reffile.filename))
reffile.save(refpath)
if refpath.endswith('.gz'):
nfile = refpath.replace('.gz', '')
with gzip.open(refpath, 'rb') as f_in:
with open(nfile, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(refpath)
refpath = nfile

# Download GTF (optional)
if 'gtf' not in request.files:
gtfpath = "NULL"
else:
gtffile = request.files['gtf']
gtfpath = pjoin(workdir, secure_filename(gtffile.filename))
gtffile.save(gtfpath)
else:
gtffile = request.files['gtf']
gtfpath = pjoin(workdir, secure_filename(gtffile.filename))
gtffile.save(gtfpath)
# Copy reference
sourcepath = pjoin(app.config['JOB_DIR'], 'refs', ref['reference']['file'])
refpath = pjoin(workdir, secure_filename(ref['reference']['file']))
shutil.copy2(sourcepath, refpath)

# Copy GTF
sourcepath = pjoin(app.config['JOB_DIR'], 'refs', ref['annotation']['file'])
gtfpath = pjoin(workdir, secure_filename(ref['annotation']['file']))
shutil.copy2(sourcepath, gtfpath)



info = {
"filename": dfile,
Expand All @@ -203,6 +247,8 @@ def post_vcf():
}
if filetype == 'fasta':
info['params'] = {"cores": cores}
if not custom_ref:
info['internal_ref'] = ref

with open(pjoin(workdir, 'info.json'), 'w+') as f:
json.dump(info, f)
Expand Down Expand Up @@ -385,7 +431,7 @@ def post_malva():
with open(pjoin(app.config['JOB_DIR'], 'vcf', vcf, 'status.json'), 'r') as f:
status = json.load(f)
with open(pjoin(app.config['JOB_DIR'], 'vcf', vcf, 'info.json'), 'r') as f:
info = json.load(f)
binfo = json.load(f)

if status['status'] in ['Uploaded', 'Precomputed']:
vcfpath = status['output']['vcf']
Expand All @@ -396,8 +442,8 @@ def post_malva():
vcf,
'vcf', 'run.cleaned.vcf'
)
reference = info['reference']
gtf = info['gtf']
reference = binfo['reference']
gtf = binfo['gtf']

uuid = datetime.datetime.now().strftime('%Y%m%d-%H%M%S_') + str(uuid4())
workdir = pjoin(
Expand Down Expand Up @@ -445,6 +491,11 @@ def post_malva():
},
"submission_time": int(round(time()))
}

has_internal_ref = 'internal_ref' in binfo
if has_internal_ref:
info['internal_ref'] = binfo['internal_ref']

with open(pjoin(workdir, 'info.json'), 'w+') as f:
json.dump(info, f)

Expand All @@ -463,6 +514,10 @@ def post_malva():
f'gtf: {gtf}\n' +
f'cores: {cores}\n'
)
if has_internal_ref and ('snpEff' in info['internal_ref']) and ('id' in info['internal_ref']['snpEff']):
conf.write(
f"refname: {info['internal_ref']['snpEff']['id']}\n"
)

status = {
"status": "Pending",
Expand Down
6 changes: 4 additions & 2 deletions frontend/craco.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ module.exports = {
plugin: CracoLessPlugin,
options: {
lessLoaderOptions: {
modifyVars: theme,
javascriptEnabled: true,
lessOptions: {
modifyVars: theme,
javascriptEnabled: true,
},
},
},
},
Expand Down
11 changes: 5 additions & 6 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@
"dependencies": {
"@2fd/ant-design-icons": "2.1.0",
"@ant-design/icons": "4.2.2",
"antd": "4.5.3",
"antd": "4.6.4",
"core-js": "^3.5.0",
"dayjs": "1.8.33",
"dayjs": "1.8.35",
"history": "5.0.0",
"markdown-to-jsx": "6.11.4",
"rc-resize-observer": "0.2.3",
"react": "^16.13.1",
"react-app-polyfill": "^1.0.6",
"react-dom": "^16.13.1",
"react-refetch": "^3.0.1",
"react-router": "6.0.0-alpha.3",
"react-router-dom": "6.0.0-alpha.3",
"react-scripts": "3.4.1",
"react-scripts": "3.4.3",
"react-window": "^1.8.5",
"xlsx": "^0.15.6"
},
Expand All @@ -42,10 +41,10 @@
"@craco/craco": "5.6.4",
"antd-dayjs-webpack-plugin": "1.0.1",
"babel-plugin-import": "1.13.0",
"craco-less": "1.16.0",
"craco-less": "1.17.0",
"eslint": "^6.1.0",
"eslint-config-prettier": "6.11.0",
"eslint-plugin-prettier": "3.1.4",
"prettier": "2.0.5"
"prettier": "2.1.1"
}
}
18 changes: 18 additions & 0 deletions frontend/src/ajax/refs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { api } from 'app-config';

import { connect } from './utils';

const refs = {
url: api.ref,
force: true,
refreshing: true,
};

const ajaxRefs = connect(() => ({
refs,
reloadRefs: () => ({
refs,
}),
}));

export default ajaxRefs;
1 change: 1 addition & 0 deletions frontend/src/app-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export const api = {
return `${wsBaseHref}/malva${isNotNull(idm)}`;
},
update: `${wsBaseHref}/update`,
ref: `${wsBaseHref}/ref`,
};

export const basepath = process.env.PUBLIC_URL
Expand Down
34 changes: 30 additions & 4 deletions frontend/src/pages/CallReport/CallReport.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,31 @@ import { Error, Loading } from 'components';

import GenotypeTable from './GenotypeTable';
import DownloadAsXlsx from './DownloadAsXlsx';
import { ANN_FIELDS } from './utils';

const customTranslation = {
ANN: function (effects) {
return effects.split(',').map((effect, idx) => ({
key: idx,
...Object.fromEntries(
effect.split('|').map((v, i) => [ANN_FIELDS[i] || i, v])
),
}));
},
};

function info2dict(info) {
if (!info) return {};
return Object.fromEntries(
info
.split(';')
.map((field) => field.split('=', 2))
.map(([key, value]) => [
key,
customTranslation[key] ? customTranslation[key](value) : value,
])
);
}

function vcf2data(vcf) {
const [pheader, ...data] = vcf
Expand All @@ -26,10 +51,11 @@ function vcf2data(vcf) {
variant.DONOR && variant.DONOR.indexOf(':') !== -1
? variant.DONOR.split(':').map((x) => +x)
: undefined,
_gene:
variant.INFO && variant.INFO.startsWith('GENE=')
? variant.INFO.slice(5)
: undefined,
_info: info2dict(variant.INFO),
...variant,
}))
.map((variant) => ({
_gene: variant._info && variant._info.GENE,
...variant,
}));
}
Expand Down
53 changes: 53 additions & 0 deletions frontend/src/pages/CallReport/EffectsText.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import React, { useCallback } from 'react';
import { Button, Modal, Table } from 'antd';

import { ANN_FIELDS } from './utils';

const tableScroll = { x: '100%' };

const columns = ANN_FIELDS.map((field) => ({ title: field, dataIndex: field }));

function EffectTable({ effects }) {
return (
<Table
dataSource={effects}
rowKey="key"
columns={columns}
pagination={false}
size="small"
scroll={tableScroll}
bordered
/>
);
}

const buttonStyle = { display: 'inline', height: 'unset', padding: 'unset' };

function EffectsText({ effects }) {
const onClick = useCallback(
() =>
Modal.info({
title: 'Effects predicted by SnpEff',
content: <EffectTable effects={effects} />,
width: '80%',
icon: false,
maskClosable: true,
}),
[effects]
);
if (!effects) return 'None';
const effectText = [
...new Set(
effects
.filter((effect) => effect['Annotation Impact'] !== 'MODIFIER')
.map((effect) => effect['Annotation'])
),
].join(', ');
return (
<Button onClick={onClick} type="link" style={buttonStyle}>
{effectText}
</Button>
);
}

export default EffectsText;
Loading

0 comments on commit 6fac35f

Please sign in to comment.