-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaper.bib
104 lines (104 loc) · 11.4 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
@article{Dutheil2009,
abstract = {With incomplete lineage sorting (ILS), the genealogy of closely related species differs along their genomes. The amount of ILS depends on population parameters such as the ancestral effective population sizes and the recombination rate, but also on the number of generations between speciation events. We use a hidden Markov model parameterized according to coalescent theory to infer the genealogy along a four-species genome alignment of closely related species and estimate population parameters. We analyze a basic, panmictic demographic model and study its properties using an extensive set of coalescent simulations. We assess the effect of the model assumptions and demonstrate that the Markov property provides a good approximation to the ancestral recombination graph. Using a too restricted set of possible genealogies, necessary to reduce the computational load, can bias parameter estimates. We propose a simple correction for this bias and suggest directions for future extensions of the model. We show that the patterns of ILS along a sequence alignment can be recovered efficiently together with the ancestral recombination rate. Finally, we introduce an extension of the basic model that allows for mutation rate heterogeneity and reanalyze human-chimpanzee-gorilla-orangutan alignments, using the new models. We expect that this framework will prove useful for population genomics and provide exciting insights into genome evolution. 10.1534/genetics.109.103010},
author = {Dutheil, Julien Y. and Ganapathy, Ganesh and Hobolth, Asger and Mailund, Thomas and Uyenoyama, Marcy K. and Schierup, Mikkel H.},
doi = {10.1534/genetics.109.103010},
file = {:Users/tkchafin/Documents/Papers/Dutheil et al.{\_}2009{\_}Ancestral population genomics The coalescent hidden Markov model approach.pdf:pdf},
issn = {00166731},
journal = {Genetics},
number = {1},
pages = {259--274},
title = {{Ancestral population genomics: The coalescent hidden Markov model approach}},
volume = {183},
year = {2009}
}
@article{Kukekova2018,
author = {Kukekova, Anna V and Johnson, Jennifer L and Xiang, Xueyan and Feng, Shaohong and Liu, Shiping and Rando, Halie M and Kharlamova, Anastasiya V and Herbeck, Yury and Serdyukova, Natalya A and Xiong, Zijun and Beklemischeva, Violetta and Koepfli, Klaus-peter and Gulevich, Rimma G and Vladimirova, Anastasiya V and Hekman, Jessica P and Perelman, Polina L and Graphodatsky, Aleksander S},
doi = {10.1038/s41559-018-0611-6},
file = {:Users/tkchafin/Documents/Papers/Kukekova et al.{\_}2018{\_}Red fox genome assembly identifies genomic regions associated with tame and aggressive behaviours.pdf:pdf},
issn = {2397-334X},
journal = {Nature Ecology {\&} Evolution},
number = {September},
publisher = {Springer US},
title = {{Red fox genome assembly identifies genomic regions associated with tame and aggressive behaviours}},
url = {http://dx.doi.org/10.1038/s41559-018-0611-6},
volume = {2},
year = {2018}
}
@article{Hudson1985,
abstract = {Some statistical properties of samples of DNA sequences are studied under an infinite-site neutral model with recombination. The two quantities of interest are R, the number of recombination events in the history of a sample of sequences, and RM, the number of recombination events that can be parsimoniously inferred from a sample of sequences. Formulas are derived for the mean and variance of R. In contrast to R, RM can be determined from the sample. Since no formulas are known for the mean and variance of RM, they are estimated with Monte Carlo simulations. It is found that RM is often much less than R, therefore, the number of recombination events may be greatly under-estimated in a parsimonious reconstruction of the history of a sample. The statistic RM can be used to estimate the product of the recombination rate and the population size or, if the recombination rate is known, to estimate the population size. To illustrate this, DNA sequences from the Adh region of Drosophila melanogaster are used to estimate the effective population size of this species.},
author = {Hudson, R. R. and Kaplan, N. L.},
file = {:Users/tkchafin/Documents/Papers/Hudson, Kaplan{\_}1985{\_}Statistical properties of the number of recombination events in the history of a sample of DNA sequences.pdf:pdf},
issn = {00166731},
journal = {Genetics},
number = {1},
pages = {147--164},
title = {{Statistical properties of the number of recombination events in the history of a sample of DNA sequences}},
volume = {111},
year = {1985}
}
@article{Liu2013,
abstract = {The estimation of genetic ancestry in human populations has important applications in medical genetic studies. Genetic ancestry is used to control for population stratification in genetic association studies, and is used to understand the genetic basis for ethnic differences in disease susceptibility. In this review, we present an overview of genetic ancestry estimation in human disease studies, followed by a review of popular softwares and methods used for this estimation.},
author = {Liu, Yushi and Nyunoya, Toru and Leng, Shuguang and Belinsky, Steven A. and Tesfaigzi, Yohannes and Bruse, Shannon},
doi = {10.1186/1479-7364-7-1},
file = {:Users/tkchafin/Documents/Papers/Liu et al.{\_}2013{\_}Softwares and methods for estimating genetic ancestry in human populations.pdf:pdf},
issn = {14797364},
journal = {Human Genomics},
keywords = {Ancestry,Genetic,Polymorphism,Structure},
number = {1},
pages = {1--7},
title = {{Softwares and methods for estimating genetic ancestry in human populations}},
volume = {7},
year = {2013}
}
@article{VonHoldt2016,
abstract = {Protection of populations comprising admixed genomes is a challenge under the Endangered Species Act (ESA), which is regarded as the most powerful species protection legislation ever passed in the United States but lacks specific provisions for hybrids. The eastern wolf is a newly recognized wolf-like species that is highly admixed and inhabits the Great Lakes and eastern United States, a region previously thought to be included in the geographic range of only the gray wolf. The U.S. Fish and Wildlife Service has argued that the presence of the eastern wolf, rather than the gray wolf, in this area is grounds for removing ESA protection (delisting) from the gray wolf across its geographic range. In contrast, the red wolf from the southeastern United States was one of the first species protected under the ESA and was protected despite admixture with coyotes. We use whole-genome sequence data to demonstrate a lack of unique ancestry in eastern and red wolves that would not be expected if they represented long divergent North American lineages. These results suggest that arguments for delisting the gray wolf are not valid. Our findings demonstrate how a strict designation of a species under the ESA that does not consider admixture can threaten the protection of endangered entities. We argue for a more balanced approach that focuses on the ecological context of admixture and allows for evolutionary processes to potentially restore historical patterns of genetic variation.},
author = {VonHoldt, B. M. and Cahill, J. A. and Fan, Z. and Gronau, I. and Robinson, J. and Pollinger, J. P. and Shapiro, B. and Wall, J. and Wayne, R. K.},
doi = {10.1126/sciadv.1501714},
file = {:Users/tkchafin/Documents/Papers/vonHoldt et al.{\_}2016{\_}Whole-genome sequence analysis shows that two endemic species of North American wolf are admixtures of the coyote a.pdf:pdf},
isbn = {2375-2548},
issn = {2375-2548},
journal = {Science Advances},
number = {7},
pages = {e1501714--e1501714},
title = {{Whole-genome sequence analysis shows that two endemic species of North American wolf are admixtures of the coyote and gray wolf}},
url = {http://advances.sciencemag.org/cgi/doi/10.1126/sciadv.1501714},
volume = {2},
year = {2016}
}
@article{Wang2010,
abstract = {Intraspecific genomes can be subdivided into blocks with limited diversity. Understanding the distribution and structure of these blocks will help to unravel many biological problems including the identification of genes associated with complex diseases, finding the ancestral origins of a given population, and localizing regions of historical recombination, gene conversion, and homoplasy. We present methods for partitioning a genome into blocks for which there are no apparent recombinations, thus providing parsimonious sets of compatible genome intervals based on the four-gamete test. Our contribution is a thorough analysis of the problem of dividing a genome into compatible intervals, in terms of its computational complexity, and by providing an achievable lower-bound on the minimal number of intervals required to cover an entire data set. In general, such minimal interval partitions are not unique. However, we identify properties that are common to every possible solution. We also define the notion of an interval set that achieves the interval lower-bound, yet maximizes interval overlap. We demonstrate algorithms for partitioning both haplotype data from inbred mice as well as outbred heterozygous genotype data using extensions of the standard four-gamete test. These methods allow our algorithms to be applied to a wide range of genomic data sets. Copyright ? 2010 ACM.},
author = {Wang, Jeremy and Moore, Kyle J. and Zhang, Qi and de Villena, Fernando Pardo-Manual and Wang, Wei and McMillan, Leonard},
doi = {10.1145/1854776.1854788},
file = {:Users/tkchafin/Documents/Papers/Wang et al.{\_}2010{\_}Genome-wide compatible SNP intervals and their properties.pdf:pdf},
isbn = {9781450304382},
journal = {Proceedings of the First ACM International Conference on Bioinformatics and Computational Biology},
pages = {43--52},
title = {{Genome-wide compatible SNP intervals and their properties}},
year = {2010}
}
@article{Danecek2011,
abstract = {SUMMARY The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API. AVAILABILITY http://vcftools.sourceforge.net},
author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard},
doi = {10.1093/bioinformatics/btr330},
file = {:Users/tkchafin/Documents/Papers/Danecek et al.{\_}2011{\_}The variant call format and VCFtools.pdf:pdf},
issn = {13674803},
journal = {Bioinformatics},
number = {15},
pages = {2156--2158},
title = {{The variant call format and VCFtools}},
volume = {27},
year = {2011}
}
@article{Springer2018,
author = {Springer, Mark S and Gatesy, John},
doi = {10.3390/genes9030123},
file = {:Users/tkchafin/Documents/Papers/Springer, Gatesy{\_}2018{\_}Delimiting Coalescence Genes (C-Genes) in Phylogenomic Data Sets.pdf:pdf},
issn = {20734425},
journal = {Genes},
keywords = {coalescence genes,phylogenomics,protein-coding sequences,recombination breakpoints},
number = {123},
pages = {1--19},
title = {{Delimiting Coalescence Genes (C-Genes) in Phylogenomic Data Sets}},
volume = {9},
year = {2018}
}