-
Notifications
You must be signed in to change notification settings - Fork 1
/
Makefile.models
62 lines (49 loc) · 2.49 KB
/
Makefile.models
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- makefile-gmake -*-
# Common substitution models
presets: $(addprefix src/,$(addsuffix .h,jc jcrna lg dayhoff jones wag ECMrest ECMunrest))
# model/lg.json is the matrix from this paper, estimated from Pfam using XRATE:
# An improved general amino acid replacement matrix.
# Le SQ, Gascuel O.
# Mol Biol Evol. 2008 Jul;25(7):1307-20. doi: 10.1093/molbev/msn067
# https://www.ncbi.nlm.nih.gov/pubmed/18367465
# model/dayhoff.json:
# Dayhoff MO, Schwartz RM, Orcutt BC. 1978.
# A model of evolutionary change in proteins.
# In: Atlas of Protein Sequence and Structure. M.O. Dayhoff, ed., pp. 345–352.
# model/jones.json:
# Jones D.T., Taylor W.R. and Thornton J.M. 1992.
# The rapid generation of mutation data matrices from protein sequences.
# CABIOS 8:275-282
# model/wag.json:
# Whelan, S. and N. Goldman. 2001.
# A general empirical model of protein evolution derived from multiple protein families
# using a maximum likelihood approach.
# Molecular Biology and Evolution 18, 691-699.
# lg.dat downloaded from this URL on 10/22/2016:
# http://www.atgc-montpellier.fr/download/datasets/models/lg_LG.PAML.txt
# dayhoff.dat, jones.dat and wag.dat were extracted from this CPAN archive on 10/22/2016:
# http://cpansearch.perl.org/src/HVALVERDE/Mecom-1.13/paml4.7/dat/
# Several files in this archive are credited to Ziheng Yang, 1998.
model/%.json: model/amino/%.dat
node/paml2json.js $< >$@
# model/ECMrest.json and model/ECMunrest.json are the empirical codon models from this paper:
# An empirical codon model for protein sequence evolution.
# Kosiol C, Holmes I, Goldman N.
# Mol Biol Evol. 2007 Jul;24(7):1464-79.
# https://www.ncbi.nlm.nih.gov/pubmed/17400572
# ECMrest is restricted (no multi-nucleotide mutations), ECMunrest is unrestricted.
# The tokenization scheme is as defined in perl/tokenize.pl and also in src/ctok.cpp
CODTOKENS := $(shell perl/tokenize.pl -alphabet)
model/ECM%.json: model/codon/ECM%.dat
node/paml2json.js -a '$(CODTOKENS)' $< >$@
src/%.cpp src/%.h: model/%.json
perl/model2cpp.pl $*
# Conversion of old-format xrate models to Historian JSON models
model/%.json: model/hsm/%.hsm
perl/xrate2json.pl $< >$@
# The default amino acid model is Le & Gascuel (2008)
# The default codon model is the unrestricted model from Kosiol, Goldman & Holmes (2007)
# Example of adding discretized-gamma rate classes to a model
# (can now do this via -gamma and -shape parameters from within historian)
model/prot1-4.json: model/prot1.json
perl/gamma-sites.pl $< -bins 4 -pretty -verbose >$@