diff --git a/.Rbuildignore b/.Rbuildignore index d5b84a3f..50c727ea 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,4 @@ ^cran-comments\.md$ +^CRAN-RELEASE$ diff --git a/CHANGELOG b/CHANGELOG index 9b859a2a..1f1db3f2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ +Version - 3.3.0 - 06-03-19 + + Version - 3.2.8 - 06-02-19 (Not Released to CRAN) -- Fixed bug communicated by user Armin Eisler in October 2018 in cal3TimePaleoPhy where if `dateTreatment = 'minMax'` was used, the output trees would have tips at the same absolute time, as the generating time-tree used for cal3 inference was generated using the input fixed first and last dates as if they were FAD and LAD, with the upper age constraint treated as fixed LAD and time of observation for each tip. This has the effect of making 'minMax' treatment with cal3 nearly identical to using 'firstLast'. This is now fixed by creating a new new basic-time-scaled tree using the randomly generated set of point occurrence times within each iteration under 'minMax', such that the resulting tip dates are always different. +- Fixed bug communicated by user Armin Elsler in October 2018 in cal3TimePaleoPhy where if `dateTreatment = 'minMax'` was used, the output trees would have tips at the same absolute time, as the generating time-tree used for cal3 inference was generated using the input fixed first and last dates as if they were FAD and LAD, with the upper age constraint treated as fixed LAD and time of observation for each tip. This has the effect of making 'minMax' treatment with cal3 nearly identical to using 'firstLast'. This is now fixed by creating a new new basic-time-scaled tree using the randomly generated set of point occurrence times within each iteration under 'minMax', such that the resulting tip dates are always different. - Also fixed tests for variance in cal3 tree calculations that were not properly sorting time data before comparing it against the time-scaled tree. Version - 3.2.4 - 05-30-19 (Not Released to CRAN) @@ -13,7 +16,7 @@ Version - 3.2.0 - 04-14-19 (Not Released to CRAN) -Numerous fixes to how making taxon trees are created. Version - 3.1.9 - 02-15-19 (Not Released to CRAN) --Updates to timePaleoPhy and cal3 documentation for dateTreatment argument, thanks to Armin Eisler for pointing out some inaccuracies in the text. +-Updates to timePaleoPhy and cal3 documentation for dateTreatment argument, thanks to Armin Elsler for pointing out some inaccuracies in the text. -Updated some (but not all) functions related to use of PBDB data to use the proper 1.2 API, and made sure usage was properly accounted for. -Added functions for getting taxonomic data for clades, or for a specific list of taxa from the PBDB, which previously had existed in proto form in examples. -Added a function for plotting a tree with phylopics from the PBDB - this function currently does not allow for a lot of flexibility and is a work in progress. diff --git a/CRAN-RELEASE b/CRAN-RELEASE new file mode 100644 index 00000000..5ec79fd6 --- /dev/null +++ b/CRAN-RELEASE @@ -0,0 +1,2 @@ +This package was submitted to CRAN on 2019-06-04. +Once it is accepted, delete this file and tag the release (commit f6ddc4a1ba). diff --git a/DESCRIPTION b/DESCRIPTION index ebf7a5c5..5764adfa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: paleotree Type: Package -Version: 3.2.8 +Version: 3.3.0 Title: Paleontological and Phylogenetic Analyses of Evolution -Date: 2019-05-30 +Date: 2019-06-04 Author: David W. Bapst, Peter J. Wagner Depends: R (>= 3.0.0), diff --git a/R/binTimeData.R b/R/binTimeData.R index 05cf88ee..03c27c9b 100644 --- a/R/binTimeData.R +++ b/R/binTimeData.R @@ -33,26 +33,27 @@ #' intervals giving in the output increases with time, as these are numbered #' relative to each other, from first to last. #' -#' As of version 1.7 of paleotree, taxa which are extant as indicated in timeData as being -#' in a time interval bounded (0,0), unless time-bins are preset using -#' argument int.times (prior to version 1.5 they were erroneously listed as +#' As of version 1.7 of paleotree, taxa which are +#' extant as indicated in timeData as being +#' in a time interval bounded \code{(0, 0)}, unless time-bins are preset using +#' argument \code{int.times} (prior to version 1.5 they were erroneously listed as #' NA). #' #' @param timeData Two-column matrix of simulated first and last occurrences in -#' absolute continuous time +#' absolute continuous time. -#' @param int.length Time interval length, default is 1 time unit +#' @param int.length Time interval length, default is 1 time-unit. #' @param start Starting time for calculating the intervals. #' @param int.times A two column matrix with the start and end times of the #' intervals to be used. -#' @return A list containing: \item{int.times}{A 2 column matrix with the start +#' @return A list containing: \item{\code{int.times}}{A 2-column matrix with the start #' and end times of the intervals used; time decreases relative to the -#' present.} \item{taxon.times}{A 2 column matrix with the first and last -#' occurrences of taxa in the intervals listed in int.times, with numbers -#' referring to the row of int.times.} +#' present.} \item{\code{taxon.times}}{A 2-column matrix with the first and last +#' occurrences of taxa in the intervals listed in \code{int.times}, with numbers +#' referring to the row of \code{int.times}.} #' @seealso \code{\link{simFossilRecord}}, \code{\link{sampleRanges}}, #' \code{\link{taxicDivCont}} @@ -63,10 +64,11 @@ #' #' #Simulate some fossil ranges with simFossilRecord #' set.seed(444) -#' record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, -#' nTotalTaxa = c(30,40), nExtant = 0) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, nruns = 1, +#' nTotalTaxa = c(30,40), nExtant = 0) #' taxa <- fossilRecord2fossilTaxa(record) -#' #simulate a fossil record with imperfect sampling with sampleRanges() +#' #simulate a fossil record with imperfect sampling via sampleRanges #' rangesCont <- sampleRanges(taxa,r = 0.5) #' #Now let's use binTimeData() to bin in intervals of 1 time unit #' rangesDisc <- binTimeData(rangesCont,int.length = 1) @@ -74,27 +76,48 @@ #' equalDiscInt <- taxicDivDisc(rangesDisc) #' #' #example with pre-set intervals input (including overlapping) -#' presetIntervals <- cbind(c(1000,990,970,940),c(980,970,950,930)) -#' rangesDisc1 <- binTimeData(rangesCont,int.times = presetIntervals) +#' presetIntervals <- cbind( +#' c(1000, 990, 970, 940), +#' c(980, 970, 950, 930) +#' ) +#' rangesDisc1 <- binTimeData(rangesCont, +#' int.times = presetIntervals) +#' +#' # plot the diversity curve with these uneven bins #' taxicDivDisc(rangesDisc1) -#' #now let's plot diversity with (different) equal length intervals used above -#' taxicDivDisc(rangesDisc1,int.times = equalDiscInt[,1:2]) +#' +#' # now let's plot the diversity from these unequal-length bins +#' # with the original equal length intervals from above +#' taxicDivDisc(rangesDisc1, int.times = equalDiscInt[,1:2]) +#' #' +#' #################################### #' #example with extant taxa #' set.seed(444) -#' record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, -#' nTotalTaxa = c(30,40)) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, nruns = 1, +#' nTotalTaxa = c(30,40)) #' taxa <- fossilRecord2fossilTaxa(record) -#' #simulate a fossil record with imperfect sampling with sampleRanges() -#' rangesCont <- sampleRanges(taxa,r = 0.5,,modern.samp.prob = 1) -#' #Now let's use binTimeData() to bin in intervals of 1 time unit -#' rangesDisc <- binTimeData(rangesCont,int.length = 1) +#' #simulate a fossil record with imperfect sampling via sampleRanges +#' rangesCont <- sampleRanges( +#' taxa, r = 0.5, +#' modern.samp.prob = 1) +#' #Now let's use binTimeDat to bin into intervals of 1 time-unit +#' rangesDisc <- binTimeData(rangesCont, +#' int.length = 1) #' #plot with taxicDivDisc() #' taxicDivDisc(rangesDisc) #' -#' #example with pre-set intervals input (including overlapping) -#' presetIntervals <- cbind(c(40,30,20,10),c(30,20,10,0)) -#' rangesDisc1 <- binTimeData(rangesCont,int.times = presetIntervals) +#' +#' ################################################# +#' #example with pre-set intervals input +#' # (including overlapping) +#' presetIntervals <- cbind( +#' c(40, 30, 20, 10), +#' c(30, 20, 10, 0) +#' ) +#' rangesDisc1 <- binTimeData(rangesCont, +#' int.times = presetIntervals) #' taxicDivDisc(rangesDisc1) #' #' @export binTimeData @@ -108,14 +131,27 @@ binTimeData <- function(timeData,int.length = 1,start = NA,int.times = NULL){ #the last bin is cut off at zero (present day) #x <- c(0,runif(99));timeData <- cbind(x+rexp(100),x);int.length = 1;start = NA;int.times = NULL timeData <- timeData[!is.na(timeData[,1]),] - if(any(is.na(timeData))){stop("Weird NAs in Data?")} - if(any(timeData[,1]= x & y[2]= x & y[2]= x & y[2]= x & y[2]y & x[2]y & x[2]y & int.times[i,2]y & int.times[i,2]1){stop("Splitter returning more than one value?!")} splitter <- c(int.times[i,1],splitter,int.times[i,2]) - int.times <- rbind(int.times,cbind(splitter[1:(length(splitter)-1)],splitter[2:length(splitter)])) + int.times <- rbind( + int.times, + cbind( + splitter[1:(length(splitter)-1)], + splitter[2:length(splitter)] + ) + ) } int.times <- int.times[-which(mustSplit),] int.times <- int.times[order(-int.times[,1]),] @@ -219,15 +291,33 @@ multiDiv <- function(data,int.length = 1,plot = TRUE,split.int = TRUE,drop.ZLB = div <- matrix(,nrow(int.times),1) for(i in 1:length(data)){ if((dclass1[i] == 1)){ - divs1 <- taxicDivCont(timeData = data[[i]],int.times = int.times,plot = FALSE,drop.cryptic = drop.cryptic)[,3] + divs1 <- taxicDivCont( + timeData = data[[i]], + int.times = int.times, + plot = FALSE, + drop.cryptic = drop.cryptic + ) + divs1 <- divs1[,3] div <- cbind(div,divs1) } if((dclass1[i] == 2)){ - divs2 <- taxicDivDisc(timeList = data[[i]],int.times = int.times,split.int = FALSE,plot = FALSE)[,3] + divs2 <- taxicDivDisc( + timeList = data[[i]], + int.times = int.times, + split.int = FALSE, + plot = FALSE + ) + divs2 <- divs2[,3] div <- cbind(div,divs2) } if((dclass1[i] == 3)){ - divs3 <- phyloDiv(tree = data[[i]],int.times = int.times,plot = FALSE,drop.ZLB = drop.ZLB)[,3] + divs3 <- phyloDiv( + tree = data[[i]], + int.times = int.times, + plot = FALSE, + drop.ZLB = drop.ZLB + ) + divs3 <- divs3[,3] div <- cbind(div,divs3) } } @@ -235,16 +325,35 @@ multiDiv <- function(data,int.length = 1,plot = TRUE,split.int = TRUE,drop.ZLB = colnames(div) <- paste("dataset",1:ncol(div),sep = "") #get median curve median <- apply(div,1,median) - q1 <- apply(div,1,quantile,probs = 0.025) #the low quantile - q2 <- apply(div,1,quantile,probs = 0.975) #the high quantile - median.curve <- cbind(median = median,low.95.quantile = q1,high.95.quantile = q2) - res <- list(int.times = int.times,div.counts = div,median.curve = median.curve) + #the low quantile + q1 <- apply(div,1,quantile,probs = 0.025) + #the high quantile + q2 <- apply(div,1,quantile,probs = 0.975) + median.curve <- cbind( + median = median, + low.95.quantile = q1, + high.95.quantile = q2 + ) + # + res <- list( + int.times = int.times, + div.counts = div, + median.curve = median.curve + ) + # if(plot){ plotMultiDiv( - res,plotLogRich = plotLogRich,timelims = timelims,yAxisLims = yAxisLims, - plotMultCurves = plotMultCurves,multRainbow = multRainbow, - divPalette = divPalette,divLineType = divLineType,main = main + res, + plotLogRich = plotLogRich, + timelims = timelims, + yAxisLims = yAxisLims, + plotMultCurves = plotMultCurves, + multRainbow = multRainbow, + divPalette = divPalette, + divLineType = divLineType, + main = main )} + # return(invisible(res)) } @@ -253,8 +362,11 @@ multiDiv <- function(data,int.length = 1,plot = TRUE,split.int = TRUE,drop.ZLB = plotMultiDiv <- function(results,plotLogRich = FALSE,timelims = NULL, yAxisLims = NULL,plotMultCurves = FALSE, multRainbow = TRUE,divPalette = NULL,divLineType = 1,main = NULL){ - if(!is.null(timelims)){if(length(timelims) != 2){ - stop("if given, timelims should be a vector of length 2")}} + if(!is.null(timelims)){ + if(length(timelims) != 2){ + stop("if given, timelims should be a vector of length 2") + } + } if(!is.null(yAxisLims)){ if(length(yAxisLims) != 2){ stop("if given, yAxisLims should be a vector of length 2") @@ -283,9 +395,14 @@ plotMultiDiv <- function(results,plotLogRich = FALSE,timelims = NULL, } plot(times1[divs1[,1]>0],divs1[divs1[,1]>0,1],type = "n", ylim = y_lim,log = "y", - xlim = if(is.null(timelims)){c(max(times1),max(0,min(times1)))}else{timelims}, + xlim = if(is.null(timelims)){ + c(max(times1),max(0,min(times1))) + }else{ + timelims + }, xaxs = if(is.null(timelims)){"r"}else{"i"}, - xlab = "Time (Before Present)",ylab = "Log Lineage/Taxic Richness", + xlab = "Time (Before Present)", + ylab = "Log Lineage/Taxic Richness", main = ) }else{ if(is.null(yAxisLims)){ @@ -295,20 +412,33 @@ plotMultiDiv <- function(results,plotLogRich = FALSE,timelims = NULL, } plot(times1,divs1[,1],type = "n", ylim = y_lim, - xlim = if(is.null(timelims)){c(max(times1),max(0,min(times1)))}else{timelims}, + xlim = if(is.null(timelims)){ + c(max(times1),max(0,min(times1))) + }else{ + timelims + }, xaxs = if(is.null(timelims)){"r"}else{"i"}, - xlab = "Time (Before Present)",ylab = "Lineage/Taxic Richness", + xlab = "Time (Before Present)", + ylab = "Lineage/Taxic Richness", main = main) } if(is.null(divPalette)){ - if(multRainbow){divPalette <- sample(rainbow(ncol(divs1))) - }else{divPalette <- rep(1,ncol(divs1))} + if(multRainbow){ + divPalette <- sample(rainbow(ncol(divs1))) + }else{ + divPalette <- rep(1,ncol(divs1)) + } } if(length(divLineType) != ncol(divs1)){ divLineType <- rep(divLineType,ncol(divs1)) } for(i in 1:ncol(divs1)){ #plot each line - lines(times1,divs1[,i],lwd = 3,col = divPalette[i],lty = divLineType[i]) + lines(times1, + divs1[,i], + lwd = 3, + col = divPalette[i], + lty = divLineType[i] + ) } }else{ if(is.null(main)){ @@ -321,15 +451,20 @@ plotMultiDiv <- function(results,plotLogRich = FALSE,timelims = NULL, mdiv1[mdiv1[,2]<1,2] <- 1 mdiv1[mdiv1[,3]<1,3] <- 1 if(is.null(yAxisLims)){ - y_lim <- c(min(mdiv1[mdiv1 >= 1]),max(mdiv1[mdiv1 >= 1])) + y_lim <- c(min(mdiv1[mdiv1 >= 1]), max(mdiv1[mdiv1 >= 1])) }else{ y_lim <- yAxisLims } plot(times1[mdiv1[,3]>0],mdiv1[mdiv1[,3]>0,3],type = "n", ylim = y_lim,log = "y", - xlim = if(is.null(timelims)){c(max(times1),max(0,min(times1)))}else{timelims}, + xlim = if(is.null(timelims)){ + c(max(times1),max(0,min(times1))) + }else{ + timelims + }, xaxs = if(is.null(timelims)){"r"}else{"i"}, - xlab = "Time (Before Present)",ylab = "Log Lineage/Taxic Richness", + xlab = "Time (Before Present)", + ylab = "Log Lineage/Taxic Richness", main = main) }else{ if(is.null(yAxisLims)){ @@ -339,12 +474,24 @@ plotMultiDiv <- function(results,plotLogRich = FALSE,timelims = NULL, } plot(times1,mdiv1[,3],type = "n", ylim = y_lim, - xlim = if(is.null(timelims)){c(max(times1),max(0,min(times1)))}else{timelims}, + xlim = if(is.null(timelims)){ + c(max(times1),max(0,min(times1))) + }else{ + timelims + }, xaxs = if(is.null(timelims)){"r"}else{"i"}, - xlab = "Time (Before Present)",ylab = "Lineage/Taxic Richness", + xlab = "Time (Before Present)", + ylab = "Lineage/Taxic Richness", main = main) } - polygon(c(times1,rev(times1)),c(mdiv1[,3],rev(mdiv1[,2])),col = "gray",border = NA) + polygon( + c(times1,rev(times1)), + c(mdiv1[,3],rev(mdiv1[,2])), + col = "gray", + border = NA + ) + # lines(times1,mdiv1[,1],lwd = 3) + # } } \ No newline at end of file diff --git a/R/occData2timeList.R b/R/occData2timeList.R index 3d85497e..8747523d 100644 --- a/R/occData2timeList.R +++ b/R/occData2timeList.R @@ -67,8 +67,10 @@ #' "zoneOverlap". Please see details below. #' @return -#' Returns a standard timeList data object, as used by many other paleotree functions, like -#' \code{\link{bin_timePaleoPhy}}, \code{\link{bin_cal3TimePaleoPhy}} and \code{\link{taxicDivDisc}} +#' Returns a standard timeList data object, as used by +#' many other paleotree functions, like +#' \code{\link{bin_timePaleoPhy}}, \code{\link{bin_cal3TimePaleoPhy}} +#' and \code{\link{taxicDivDisc}} #' @seealso #' Occurrence data as commonly used with \code{paleotree} functions can @@ -83,13 +85,20 @@ #' @examples #' data(graptPBDB) #' -#' graptOccSpecies <- taxonSortPBDBocc(graptOccPBDB,rank = "species",onlyFormal = FALSE) +#' graptOccSpecies <- taxonSortPBDBocc( +#' data = graptOccPBDB, +#' rank = "species", +#' onlyFormal = FALSE) #' graptTimeSpecies <- occData2timeList(occList = graptOccSpecies) #' #' head(graptTimeSpecies[[1]]) #' head(graptTimeSpecies[[2]]) #' -#' graptOccGenus <- taxonSortPBDBocc(graptOccPBDB,rank = "genus",onlyFormal = FALSE) +#' graptOccGenus <- taxonSortPBDBocc( +#' data = graptOccPBDB, +#' rank = "genus", +#' onlyFormal = FALSE +#' ) #' graptTimeGenus <- occData2timeList(occList = graptOccGenus) #' #' layout(1:2) @@ -98,47 +107,53 @@ #' #' # the default interval calculation is "dateRange" #' # let's compare to the other option, "occRange" -#' # for species +#' # but now for graptolite *species* #' -#' graptOccRange <- occData2timeList(occList = graptOccSpecies, intervalType = "occRange") +#' graptOccRange <- occData2timeList( +#' occList = graptOccSpecies, +#' intervalType = "occRange" +#' ) #' #' #we would expect no change in the diversity curve -#' #because there are only changes in th -#' #earliest bound for the FAD -#' #latest bound for the LAD +#' #because there are only changes in th +#' #earliest bound for the FAD +#' #latest bound for the LAD #' #so if we are depicting ranges within maximal bounds -#' #dateRanges has no effect +#' #dateRanges has no effect #' layout(1:2) #' taxicDivDisc(graptTimeSpecies) #' taxicDivDisc(graptOccRange) -#' #yep, identical +#' #yep, identical! #' #' #so how much uncertainty was gained by using dateRange? #' -#' # write a simple function for getting uncertainty in first and last -#' # appearance dates from a timeList object +#' # write a function for getting uncertainty in first and last +#' # appearance dates from a timeList object #' sumAgeUncert <- function(timeList){ -#' fourDate <- timeList2fourDate(timeList) -#' perOcc <- (fourDate[,1]-fourDate[,2])+(fourDate[,3]-fourDate[,4]) -#' sum(perOcc) -#' } +#' fourDate <- timeList2fourDate(timeList) +#' perOcc <- (fourDate[,1] - fourDate[,2]) + +#' (fourDate[,3] - fourDate[,4]) +#' sum(perOcc) +#' } #' #' #total amount of uncertainty in occRange dataset #' sumAgeUncert(graptOccRange) #' #total amount of uncertainty in dateRange dataset #' sumAgeUncert(graptTimeSpecies) #' #the difference -#' sumAgeUncert(graptOccRange)-sumAgeUncert(graptTimeSpecies) +#' sumAgeUncert(graptOccRange) - sumAgeUncert(graptTimeSpecies) #' #as a proportion -#' 1-(sumAgeUncert(graptTimeSpecies)/sumAgeUncert(graptOccRange)) +#' 1 - (sumAgeUncert(graptTimeSpecies) / sumAgeUncert(graptOccRange)) #' #' #a different way of doing it -#' dateChange <- timeList2fourDate(graptTimeSpecies)-timeList2fourDate(graptOccRange) -#' apply(dateChange,2,sum) +#' dateChange <- timeList2fourDate(graptTimeSpecies) - +#' timeList2fourDate(graptOccRange) +#' apply(dateChange, 2, sum) #' #total amount of uncertainty removed by dateRange algorithm #' sum(abs(dateChange)) #' #' layout(1) +#' #' @name occData2timeList #' @rdname occData2timeList diff --git a/R/parentChild2TaxonTree.R b/R/parentChild2TaxonTree.R index c8a84f7f..d7da3f61 100644 --- a/R/parentChild2TaxonTree.R +++ b/R/parentChild2TaxonTree.R @@ -23,17 +23,22 @@ #' will add a tip to every internal node with the parent-taxon name encapsulated in #' parentheses. -# @param reorderTree A logical indicating whether a step of \code{reorder.phylo()} will be applied, -# if \code{cleanTree = TRUE}; has no effect if \code{cleanTree = FALSE}. -# Reordering may cause more problems than it is worth in older versions of \code{ape}. +# @param reorderTree A logical indicating whether a +# step of \code{reorder.phylo()} will be applied, +# if \code{cleanTree = TRUE}; has no effect +# if \code{cleanTree = FALSE}. +# Reordering may cause more problems than it is +# worth in older versions of \code{ape}. #' @inheritParams makePBDBtaxonTree #' @return -#' A phylogeny of class 'phylo', with tip taxa as controlled by argument \code{tipSet}. +#' A phylogeny of class 'phylo', with tip taxa as +#' controlled by argument \code{tipSet}. #' The output tree is returned with no edge lengths. #' -#' The names of higher taxa than the tips should be appended as the element $node.label for the internal nodes. +#' The names of higher taxa than the tips should be appended +#' as the element $node.label for the internal nodes. #' @seealso \code{\link{makePBDBtaxonTree}}, \code{\link{taxonTable2taxonTree}} @@ -42,81 +47,108 @@ #' @examples #' #' #let's create a small, really cheesy example -#' pokexample <- rbind(cbind("Squirtadae",c("Squirtle","Blastoise","Wartortle")), -#' c("Shelloidea","Lapras"),c("Shelloidea","Squirtadae"), -#' c("Pokezooa","Shelloidea"),c("Pokezooa","Parasect"), -#' c("Rodentapokemorpha","Linoone"),c("Rodentapokemorpha","Sandshrew"), -#' c("Rodentapokemorpha","Pikachu"),c("Hirsutamona","Ursaring"), -#' c("Hirsutamona","Rodentapokemorpha"),c("Pokezooa","Hirsutamona")) +#' pokexample <- rbind( +#' cbind("Squirtadae", c("Squirtle","Blastoise","Wartortle")), +#' c("Shelloidea","Lapras"), c("Shelloidea","Squirtadae"), +#' c("Pokezooa","Shelloidea"), c("Pokezooa","Parasect"), +#' c("Rodentapokemorpha","Linoone"), c("Rodentapokemorpha","Sandshrew"), +#' c("Rodentapokemorpha","Pikachu"), c("Hirsutamona","Ursaring"), +#' c("Hirsutamona","Rodentapokemorpha"), c("Pokezooa","Hirsutamona") +#' ) #' #' #Default: tipSet = 'nonParents' -#' pokeTree <- parentChild2taxonTree(pokexample, tipSet = "nonParents") -#' plot(pokeTree);nodelabels(pokeTree$node.label) +#' pokeTree <- parentChild2taxonTree( +#' parentChild = pokexample, +#' tipSet = "nonParents") +#' plot(pokeTree) +#' nodelabels(pokeTree$node.label) #' #' #Get ALL taxa as tips with tipSet = 'all' -#' pokeTree <- parentChild2taxonTree(pokexample, tipSet = "all") -#' plot(pokeTree);nodelabels(pokeTree$node.label) +#' pokeTree <- parentChild2taxonTree( +#' parentChild = pokexample, +#' tipSet = "all") +#' plot(pokeTree) +#' nodelabels(pokeTree$node.label) #' #' #' \dontrun{ #' -#' # let's try a dataset where not all the taxon relationships lead to a common root +#' # let's try a dataset where not all the +#' # taxon relationships lead to a common root #' -#' pokexample_bad <- rbind(cbind("Squirtadae",c("Squirtle","Blastoise","Wartortle")), -#' c("Shelloidea","Lapras"),c("Shelloidea","Squirtadae"), -#' c("Pokezooa","Shelloidea"),c("Pokezooa","Parasect"), -#' c("Rodentapokemorpha","Linoone"),c("Rodentapokemorpha","Sandshrew"), -#' c("Rodentapokemorpha","Pikachu"),c("Hirsutamona","Ursaring"), -#' c("Hirsutamona","Rodentapokemorpha"),c("Pokezooa","Hirsutamona"), -#' c("Umbrarcheota","Gengar")) +#' pokexample_bad <- rbind( +#' cbind("Squirtadae", c("Squirtle","Blastoise","Wartortle")), +#' c("Shelloidea","Lapras"), c("Shelloidea","Squirtadae"), +#' c("Pokezooa","Shelloidea"), c("Pokezooa","Parasect"), +#' c("Rodentapokemorpha","Linoone"), c("Rodentapokemorpha","Sandshrew"), +#' c("Rodentapokemorpha","Pikachu"), c("Hirsutamona","Ursaring"), +#' c("Hirsutamona","Rodentapokemorpha"), c("Pokezooa","Hirsutamona"), +#' c("Umbrarcheota","Gengar") +#' ) #' -#' #this should return an error, as Gengar doesn't share common root -#' pokeTree <- parentChild2taxonTree(pokexample_bad) +#' # this should return an error +#' # as Gengar doesn't share common root +#' pokeTree <- parentChild2taxonTree(parentChild = pokexample_bad) #' #' #' # another example, where a taxon is listed as both parent and child -#' pokexample_bad2 <- rbind(cbind("Squirtadae",c("Squirtle","Blastoise","Wartortle")), -#' c("Shelloidea",c("Lapras","Squirtadae","Shelloidea")), -#' c("Pokezooa","Shelloidea"),c("Pokezooa","Parasect"), -#' c("Rodentapokemorpha","Linoone"),c("Rodentapokemorpha","Sandshrew"), -#' c("Rodentapokemorpha","Pikachu"),c("Hirsutamona","Ursaring"), -#' c("Hirsutamona","Rodentapokemorpha"),c("Pokezooa","Hirsutamona"), -#' c("Umbrarcheota","Gengar")) +#' pokexample_bad2 <- rbind( +#' cbind("Squirtadae", c("Squirtle","Blastoise","Wartortle")), +#' c("Shelloidea", c("Lapras","Squirtadae","Shelloidea")), +#' c("Pokezooa","Shelloidea"), c("Pokezooa","Parasect"), +#' c("Rodentapokemorpha","Linoone"), c("Rodentapokemorpha","Sandshrew"), +#' c("Rodentapokemorpha","Pikachu"), c("Hirsutamona","Ursaring"), +#' c("Hirsutamona","Rodentapokemorpha"), c("Pokezooa","Hirsutamona"), +#' c("Umbrarcheota","Gengar") +#' ) #' #' #this should return an error, as Shelloidea is its own parent -#' pokeTree <- parentChild2taxonTree(pokexample_bad2) +#' pokeTree <- parentChild2taxonTree(parentChild = pokexample_bad2) #' #' } #' #' #' -#' # note that we should even be able to do this with ancestor-descendent pairs from -#' # simulated datasets from simFossilRecord, like so: +#' # note that we should even be able to do this +#' # with ancestor-descendent pairs from +#' # simulated datasets from simFossilRecord, like so: #' set.seed(444) -#' record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, -#' nTotalTaxa = c(30,40), nExtant = 0) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, nruns = 1, +#' nTotalTaxa = c(30, 40), +#' nExtant = 0 +#' ) #' taxa <- fossilRecord2fossilTaxa(record) -#' # need to reorder the columns so parents (ancestors) first, then children -#' parentChild2taxonTree(taxa[,2:1]) -#' # now note that it issues a warning that the input wasn't type character -#' # and it will be coerced to be such +#' # need to reorder the columns so parents +#' # (ancestors) first, then children +#' parentChild2taxonTree(parentChild = taxa[,2:1]) +#' # now note that it issues a warning that +#' # the input wasn't type character +#' # and it will be coerced to be such #' #' @name parentChild2taxonTree #' @rdname parentChild2taxonTree #' @export -parentChild2taxonTree <- function(parentChild, tipSet = "nonParents", cleanTree = TRUE){ - #,reorderTree = TRUE - # - +parentChild2taxonTree <- function( + parentChild, + tipSet = "nonParents", + cleanTree = TRUE + #,reorderTree = TRUE + # + ){ + ################################### #takes a two column matrix of character class taxon names #each row is a relationship: parent, then child #CHECKS - if(length(tipSet) != 1 | !is.character(tipSet)){stop("tipSet must be a single character element")} + if(length(tipSet) != 1 | !is.character(tipSet)){ + stop("tipSet must be a single character element") + } if(!is.character(parentChild)){ message("parentChild isn't of class character, attempting to coerce") - parentChild <- apply(parentChild,2,as.character)} + parentChild <- apply(parentChild,2,as.character) + } if(length(dim(parentChild)) != 2){ stop("parentChild must be a matrix of class character with two columns and multiple rows") }else{ @@ -125,24 +157,30 @@ parentChild2taxonTree <- function(parentChild, tipSet = "nonParents", cleanTree } } # - if(!testParentChild(parentChild = parentChild)){stop("parentChild relationships are inconsistent")} + if(!testParentChild(parentChild = parentChild)){ + stop("parentChild relationships are inconsistent") + } # #remove singular root edges #trace tips to ultimate ancestor (should be same for all, as this has already been checked) continue <- TRUE while(continue){ unqIDs <- unique(c(parentChild[,1],parentChild[,2])) - ultimateAnc <- sapply(unqIDs,getUltimateAnc,parentChild = parentChild) + ultimateAnc <- sapply(unqIDs, + getUltimateAnc, parentChild = parentChild) if(length(unique(ultimateAnc)) == 1){ ultAnc1 <- ultimateAnc[1] }else{ - stop("parentChild constructed improperly")} + stop("parentChild constructed improperly") + } descEdge <- which(parentChild[,1] == ultAnc1) if(length(descEdge) == 1){ message(paste("Removing singular node leading to root:",ultAnc1)) #remove from parentChild parentChild <- parentChild[-descEdge,,drop = FALSE] - if(nrow(parentChild)<1){stop("No branching nodes found?!")} + if(nrow(parentChild)<1){ + stop("No branching nodes found?!") + } }else{ continue = FALSE } @@ -154,28 +192,37 @@ parentChild2taxonTree <- function(parentChild, tipSet = "nonParents", cleanTree !any(sapply(parentChild[,2],identical,unname(x))))) #check that there isn't more than one root if(length(whichRoot)>1){ - stop(paste("Not all taxable are traceable to a single common root \n", + stop(paste( + "Not all taxable are traceable to a single common root \n", length(whichRoot),"possible roots found:", - paste0(nodeNames[whichRoot],collapse = ", ")))} + paste0(nodeNames[whichRoot],collapse = ", ") + )) + } #now resort nodeNames nodeNames <- c(nodeNames[whichRoot],nodeNames[-whichRoot]) if(tipSet != "nonParents"){ if(tipSet == "all"){ - parentChild <- rbind(parentChild,cbind(nodeNames,paste0("(",nodeNames,")"))) + parentChild <- rbind(parentChild, + cbind(nodeNames,paste0("(",nodeNames,")")) + ) }else{ - stop("tipSet must be one of either 'nonParents' or 'all'")} + stop("tipSet must be one of either 'nonParents' or 'all'" + ) } + } #identify tip taxa, this will be all taxa who are not-parents - notParents <- sapply(parentChild[,2],function(x) - !any(sapply(parentChild[,1],identical,unname(x)))) - tipNames <- parentChild[notParents,2] + notParents <- sapply(parentChild[ , 2], function(x) + !any(sapply(parentChild[,1], identical,unname(x)))) + tipNames <- parentChild[notParents, 2] #now convert parentChild matrix to edge matrix - edgeMat <- matrix(,nrow(parentChild),ncol(parentChild)) + edgeMat <- matrix(,nrow(parentChild), ncol(parentChild)) taxonNames <- c(tipNames,nodeNames) #test that none have been lost - nUniquePC <- length(unique(c(parentChild[,1],parentChild[,2]))) + nUniquePC <- length(unique(c(parentChild[,1], parentChild[,2]))) if(length(taxonNames) != nUniquePC){ - stop("Number of tip and node names doesn't sum to total number of unique names in parentChild") + stop( + "Number of tip and node names doesn't sum to total number of unique names in parentChild" + ) } #convert internal nodes to Ntip+nodeNames ID edgeMat[,1] <- sapply(parentChild[,1],function(x) @@ -189,14 +236,22 @@ parentChild2taxonTree <- function(parentChild, tipSet = "nonParents", cleanTree stop("created edge relationships are inconsistent") } #make the tree - tree <- list(edge = edge,tip.label = tipNames,edge.length = NULL, #edge.length = rep(1,nrow(edge)) - Nnode = length(nodeNames),node.label = nodeNames) + tree <- list( + edge = edge, + tip.label = tipNames, + edge.length = NULL, + #edge.length = rep(1,nrow(edge)), + Nnode = length(nodeNames), + node.label = nodeNames + ) class(tree) <- "phylo" if(cleanTree){ #make it a good tree #reordering seems to cause errors?? 06-11-15 tree <- cleanNewPhylo(tree) } - if(Ntip(tree) != length(tipNames)){stop("Taxa number changed while cleaning tree")} + if(Ntip(tree) != length(tipNames)){ + stop("Taxa number changed while cleaning tree") + } #plot(tree);nodelabels(tree$node.label) return(tree) } @@ -207,7 +262,8 @@ getUltimateAnc <- function(taxa,parentChild){ count <- count+1 taxa <- parentChild[match(taxa,parentChild[,2]),1] if(length(taxa)>1){ - stop("Some parents are listed as children twice in parentChild")} + stop("Some parents are listed as children twice in parentChild") + } if(count>(length(parentChild)*2)){ stop("Breaking while() loop: cannot find ultimate ancestor") } @@ -217,7 +273,8 @@ getUltimateAnc <- function(taxa,parentChild){ charEdge2numeric <- function(parentChild){ if(!is.character(parentChild)){ - stop("parentChild has to be character for charEdge2numeric")} + stop("parentChild has to be character for charEdge2numeric") + } #get unique IDs unqIDs <- c(NA,sort(unique(c(parentChild[,1],parentChild[,2])))) parentChild2 <- matrix(,nrow(parentChild),2) @@ -235,9 +292,11 @@ charEdge2numeric <- function(parentChild){ testParentChild <- function(parentChild){ #check that its a matrix with two columns if(!is.matrix(parentChild)){ - stop("edge/parentChild matrix must be of type matrix")} + stop("edge/parentChild matrix must be of type matrix") + } if(ncol(parentChild) != 2){ - stop("edge/parentChild matrix must have two columns")} + stop("edge/parentChild matrix must have two columns") + } #convert to numeric if(!is.numeric(parentChild)){ if(is.character(parentChild)){ @@ -253,8 +312,11 @@ testParentChild <- function(parentChild){ #test that all but one node has an ancestor parentMatch <- match(unique(parentChild[,1]),parentChild[,2]) if(sum(is.na(parentMatch))>1){ - stop(paste("More than one apparent root; \n", - "more than one parent without their own parent listed"))} + stop(paste( + "More than one apparent root; \n", + "more than one parent without their own parent listed" + )) + } # #check that any ancestor is listed as its own descendant parentANDchild <- apply(parentChild,1,function(x){ @@ -269,7 +331,8 @@ testParentChild <- function(parentChild){ unqIDs <- unique(c(parentChild[,1],parentChild[,2])) ultimateAnc <- sapply(unqIDs,getUltimateAnc,parentChild = parentChild) if(length(unique(ultimateAnc)) != 1){ - stop("IDs trace back to more than one unique common ancestor")} + stop("IDs trace back to more than one unique common ancestor") + } # #test for nodes listed as descendant twice if(any(table(parentChild[,2])>1)){ @@ -280,7 +343,10 @@ testParentChild <- function(parentChild){ #test that all but one node has an ancestor parentMatch <- match(unique(parentChild[,1]),parentChild[,2]) if(sum(is.na(parentMatch))>1){ - stop(paste("More than one apparent root; \n", - "more than ancestor without an ancestor listed"))} + stop(paste( + "More than one apparent root; \n", + "more than ancestor without an ancestor listed" + )) + } return(TRUE) } diff --git a/R/pqr2Ps.R b/R/pqr2Ps.R index ba834873..b247a3c9 100644 --- a/R/pqr2Ps.R +++ b/R/pqr2Ps.R @@ -37,6 +37,10 @@ #' in the code anyway, here's the unpublished equation for the exact solution: #' #' \eqn{Ps = 1-(((p+q+r)-(sqrt(((p+q+r)^2)-(4*p*q))))/(2*p))} +#' +#' The above exact solution was independent derived and published by Dider et al. (2017). +#' Also see Wagner (2019) for additional discussion of this value and its importance for +#' understanding the timing of branching events in an imperfect fossil record. #' @inheritParams SamplingConv @@ -53,7 +57,7 @@ #' \code{\link{SamplingConv}} #' @author -#' This function is entirely the product of a joint effort between the package author +#' This function is entirely the product of a joint unpublished effort between the package author #' (David W. Bapst), Emily A. King and Matthew W. Pennell. In particular, Emily King #' solved a nasty bit of calculus to get the inexact solution and later re-derived #' the function with a quadratic methodology to get the exact solution. Some elements @@ -62,19 +66,40 @@ #' numbers. #' @references -#' Bapst, D. W., E. A. King and M. W. Pennell. In prep. Probability models +#' Bapst, D. W., E. A. King and M. W. Pennell. Unpublished. Probability models #' for branch lengths of paleontological phylogenies. #' #' Bapst, D. W. 2013. A stochastic rate-calibrated method for time-scaling #' phylogenies of fossil taxa. \emph{Methods in Ecology and Evolution}. #' 4(8):724-733. #' +#' Didier, G., M. Fau, and M. Laurin. 2017. Likelihood of Tree Topologies +#' with Fossils and Diversification Rate Estimation. \emph{Systematic Biology} +#' 66(6):964-987. +#' +#' Wagner, P. J. 2019. On the probabilities of branch durations and +#' stratigraphic gaps in phylogenies of fossil taxa when rates of +#' diversification and sampling vary over time. \emph{Paleobiology} 45(1):30-55. +#' + #' @examples #' #with exact solution -#' pqr2Ps(p = 0.1,q = 0.1,r = 0.1,useExact = TRUE) +#' pqr2Ps( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' useExact = TRUE +#' ) +#' #' #with inexact solution -#' pqr2Ps(p = 0.1,q = 0.1,r = 0.1,useExact = TRUE) +#' pqr2Ps( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' useExact = TRUE +#' ) +#' #' @export pqr2Ps <- function(p,q,r,useExact = TRUE){ diff --git a/R/simFossilRecord.R b/R/simFossilRecord.R index 00695dbd..4745eccc 100644 --- a/R/simFossilRecord.R +++ b/R/simFossilRecord.R @@ -130,67 +130,103 @@ #' \code{totalTime}, and any extant taxa will stop at some time greater than zero. #' -#' @param p,q,r,anag.rate These parameters control the instantaneous ('per-capita') rates of branching, extinction, -#' sampling and anagenesis, respectively. These can be given as a number equal to or greater than zero, or as a -#' character string which will be interpreted as an algebraic equation. These equations can make use of three -#' quantities which will/may change throughout the simulation: the standing richness is \code{N}, the -#' current time passed since the start of the simulation is \code{T}, the present duration of a given still-living +#' @param p,q,r,anag.rate These parameters control the instantaneous +#' ('per-capita') rates of branching, extinction, +#' sampling and anagenesis, respectively. These can be given as a number +#' equal to or greater than zero, or as a +#' character string which will be interpreted as an algebraic equation. +#' These equations can make use of three +#' quantities which will/may change throughout the simulation: the +#' standing richness is \code{N}, the current time passed since the start +#' of the simulation is \code{T}, the present duration of a given still-living #' lineage since its origination time is code{D}, and the current branching rate is \code{P} #' (corresponding to the argument name \code{p}). #' Note that \code{P} cannot be used in equations for the branching rate itself; it is for making other rates #' relative to the branching rate. -#' By default, the rates \code{r} and \code{anag.rate} are set to zero, so that the default simulator is a birth-death -#' simulator. -#' Rates set to \code{ = Inf} are treated as if 0. When a rate is set to 0, this event type will not occur in the simulation. -#' Setting certain processes to zero, like sampling, may increase simulation efficiency, if the goal is a birth-death or +#' +#' By default, the rates \code{r} and \code{anag.rate} are set to zero, +#' so that the default simulator is a birth-death simulator. +#' Rates set to \code{ = Inf} are treated as if 0. When a rate is set +#' to 0, this event type will not occur in the simulation. +#' Setting certain processes to zero, like sampling, may increase +#' simulation efficiency, if the goal is a birth-death or #' pure-birth model. -#' See documentation for argument \code{negRatesAsZero} about the treatment of rates that decrease below zero. +#' See documentation for argument \code{negRatesAsZero} about the +#' treatment of rates that decrease below zero. #' Notation of branching, extinction and sampling rates as \code{p, q, r} -#' follows what is typical for the paleobiology literature (e.g. Foote, 1997), not the Greek letters \code{lambda, mu, phi} -#' found more typically in the biological literature (e.g. Stadler, 2009; Heath et al., 2014; Gavryushkina et al., 2014). +#' follows what is typical for the paleobiology literature +#' (e.g. Foote, 1997), not the Greek letters \code{lambda, mu, phi} +#' found more typically in the biological literature (e.g. Stadler, 2009; +#' Heath et al., 2014; Gavryushkina et al., 2014). -#' @param totalTime,nTotalTaxa,nExtant,nSamp These arguments represent stopping and -#' acceptance conditions for simulation runs. They are respectively \code{totalTime}, the -#' total length of the simulation in time-units, \code{nTotalTaxa}, the total number of taxa -#' over the past evolutionary history of the clade, \code{nExtant}, the total number of extant taxa at -#' the end of the simulation and \code{nSamp} the total number of sampled taxa (not counting extant -#' taxa sampled at the modern day). These are used to determine when to end simulation runs, and whether to accept -#' or reject them as output. They can be input as a vector of two numbers, representing minimum -#' and maximum values of a range for accepted simulation runs (i.e. the simulation length can be between 0 and -#' 1000 time-steps, by default), or as a single number, representing a point condition (i.e. if -#' \code{nSamp = 100} then the only simulation runs with exactly 100 taxa sampled will be output). -#' Note that it is easy to set combinations of parameters and run conditions that are impossible -#' to produce satisfactory input under, in which case \code{simFossilRecord} would run in a nonstop loop. -#' How cryptic taxa are counted for the sake of these conditions is controlled by argument \code{count.cryptic}. +#' @param totalTime,nTotalTaxa,nExtant,nSamp These arguments represent +#' stopping and acceptance conditions for simulation runs. They are +#' respectively \code{totalTime}, the total length of the simulation +#' in time-units, \code{nTotalTaxa}, the total number of taxa over the +#' past evolutionary history of the clade, \code{nExtant}, the total +#' number of extant taxa at the end of the simulation and \code{nSamp} +#' the total number of sampled taxa (not counting extant taxa sampled +#' at the modern day). These are used to determine when to end simulation +#' runs, and whether to accept or reject them as output. They can be input +#' as a vector of two numbers, representing minimum and maximum values of +#' a range for accepted simulation runs (i.e. the simulation length can be +#' between 0 and 1000 time-steps, by default), or as a single number, +#' representing a point condition (i.e. if \code{nSamp = 100} then the only +#' simulation runs with exactly 100 taxa sampled will be output). Note that it +#' is easy to set combinations of parameters and run conditions that are +#' impossible to produce satisfactory input under, in which case +#' \code{simFossilRecord} would run in a nonstop loop. +#' How cryptic taxa are counted for the sake of these +#' conditions is controlled by argument \code{count.cryptic}. -#' @param negRatesAsZero A logical. Should rates calculated as a negative number cause the simulation to fail -#' with an error message (\code{ = FALSE}) or should these be treated as zero (\code{" = TRUE"}, the default). This -#' is equivalent to saying that the \code{rate.as.used = } \code{max(0, rate.as.given)}. +#' @param negRatesAsZero A logical. Should rates calculated as a +#' negative number cause the simulation to fail +#' with an error message (\code{ = FALSE}) or should these be +#' treated as zero (\code{" = TRUE"}, the default). This +#' is equivalent to saying that +#' the \code{rate.as.used = } \code{max(0, rate.as.given)}. -#' @param prop.cryptic,prop.bifurc These parameters control (respectively) the proportion of branching events that have -#' morphological differentiation, versus those that are cryptic (\code{prop.cryptic}) and the proportion of morphological -#' branching events that are bifurcating, as opposed to budding. Both of these proportions must be a number between 0 and 1. -#' By default, both are set to zero, meaning all branching events are events of budding cladogenesis. See description of +#' @param prop.cryptic,prop.bifurc These parameters control +#' (respectively) the proportion of branching events that have +#' morphological differentiation, versus those that are cryptic +#' (\code{prop.cryptic}) and the proportion of morphological +#' branching events that are bifurcating, as opposed to budding. +#' Both of these proportions must be a number between 0 and 1. +#' By default, both are set to zero, meaning all branching events +#' are events of budding cladogenesis. See description of #' the available models of morphological differentiation in the \emph{Description} section. -#' @param tolerance A small number which defines a tiny interval for the sake of placing run-sampling dates before events and +#' @param tolerance A small number which defines a tiny interval for +#' the sake of placing run-sampling dates before events and #' for use in determining whether a taxon is extant in simFossilRecordMethods. -#' @param maxStepTime When rates are time-dependent (i.e. when parameters 'D' or 'T' are used in equations input for one of -#' the four rate arguments), then protocol used by \code{simFossilRecord} of drawing waiting times to the next event could -#' produce a serious mismatch of resulting process to the defined model, because the possibility of new events is only -#' considered at the end of these waiting times. Instead, any time a waiting time greater than \code{maxStepTime} is -#' selected, then instead \emph{no} event occurs and a time-step equal to \code{maxStepTime} occurs instead, thus effectively -#' discretizing the progression of time in the simulations run by \code{simFossilRecord}. Decreasing this value will increase -#' accuracy (as the time-scale is effectively more discretized) but increase computation time, as the computer will need -#' to stop and check rates to see if an event happened more often. Users should toggle this value relative to the time-dependent -#' rate equations they input, relative to the rate of change in rates expected in time-dependent rates. +#' @param maxStepTime When rates are time-dependent (i.e. when +#' parameters 'D' or 'T' are used in equations input for one of +#' the four rate arguments), then protocol used by +#' \code{simFossilRecord} of drawing waiting times to the next event could +#' produce a serious mismatch of resulting process to the defined model, +#' because the possibility of new events is only +#' considered at the end of these waiting times. Instead, any +#' time a waiting time greater than \code{maxStepTime} is +#' selected, then instead \emph{no} event occurs and a +#' time-step equal to \code{maxStepTime} occurs instead, thus effectively +#' discretizing the progression of time in the simulations +#' run by \code{simFossilRecord}. Decreasing this value will increase +#' accuracy (as the time-scale is effectively more discretized) +#' but increase computation time, as the computer will need +#' to stop and check rates to see if an event happened more often. +#' Users should toggle this value relative to the time-dependent +#' rate equations they input, relative to the rate of change in +#' rates expected in time-dependent rates. -#' @param nruns Number of simulation datasets to accept, save and output. If \code{nruns = 1}, output will be a single -#' object of class \code{fossilRecordSimulation}, and if \code{nruns} is greater than 1, a list will be output composed of +#' @param nruns Number of simulation datasets to accept, save and output. +#' If \code{nruns = 1}, output will be a single +#' object of class \code{fossilRecordSimulation}, and +#' if \code{nruns} is greater than 1, a list will be output composed of #' \code{nruns} objects of class \code{fossilRecordSimulation}. -#' @param startTaxa Number of initial taxa to begin a simulation with. All will have the simulation start date +#' @param startTaxa Number of initial taxa to begin a simulation with. +#' All will have the simulation start date #' listed as their time of origination. #' @param sortNames If TRUE, output taxonomic lists are sorted by the taxon @@ -261,9 +297,11 @@ #' means a point in time which is 50 time-units before the present-day, if the #' present-day is zero (the default, but see argument \code{shiftRoot4TimeSlice}). #' -#' Each individual element of a \code{fossilRecordSimulation} list object is named, generally of -#' the form "t1" and "t2", where the number is the \code{taxon.id}. Cryptic taxa are instead -#' named in the form of "t1.2" and "t5.3", where the first number is the taxon which they are a +#' Each individual element of a \code{fossilRecordSimulation} list object +#' is named, generally of the form "t1" and "t2", +#' where the number is the \code{taxon.id}. +#' Cryptic taxa are instead named in the form of "t1.2" and "t5.3", +#' where the first number is the taxon which they are a #' cryptic descendant of (\code{looks.like}) and the second number, after the period, is #' the order of appearance of lineage units in that cryptic complex. For example, for #' "t5.3", the first number is the \code{taxon.id} and the second number communicates @@ -334,69 +372,129 @@ #' #' set.seed(2) #' -#' # quick birth-death-sampling run with 1 run, 50 taxa +#' # quick birth-death-sampling run +#' # with 1 run, 50 taxa #' -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' nTotalTaxa = 50, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nruns = 1, +#' nTotalTaxa = 50, +#' plot = TRUE +#' ) #' #' \donttest{ #' # examining multiple runs of simulations #' -#' #example of repeated pure birth simulations over 50 time-units -#' records <- simFossilRecord(p = 0.1, q = 0, nruns = 10, -#' totalTime = 50, plot = TRUE) -#' #plot multiple diversity curves on a log scale -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE,plotLogRich = TRUE) -#' #histogram of total number of taxa -#' hist(sapply(records,nrow)) -#' -#' #example of repeated birth-death-sampling simulations over 50 time-units -#' records <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 10, -#' totalTime = 50, plot = TRUE) -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE) -#' -#' #like above, but conditioned instead on having 10 extant taxa -#' # between 1 and 100 time-units +#' # example of repeated pure birth simulations over 50 time-units +#' records <- simFossilRecord( +#' p = 0.1, +#' q = 0, +#' nruns = 10, +#' totalTime = 50, +#' plot = TRUE +#' ) +#' +#' # plot multiple diversity curves on a log scale +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' multiDiv(records, +#' plotMultCurves = TRUE, +#' plotLogRich = TRUE +#' ) +#' +#' # histogram of total number of taxa +#' hist(sapply(records, nrow)) +#' +#' +#' ############################################## +#' # example of repeated birth-death-sampling +#' # simulations over 50 time-units +#' records <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' nruns = 10, +#' totalTime = 50, +#' plot = TRUE) +#' +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' +#' multiDiv(records, +#' plotMultCurves = TRUE) +#' +#' # like above... +#' # but conditioned instead on having 10 extant taxa +#' # between 1 and 100 time-units +#' #' set.seed(4) -#' records <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 10, -#' totalTime = c(1,300), nExtant = 10, plot = TRUE) -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE) +#' +#' records <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nruns = 10, +#' totalTime = c(1,300), +#' nExtant = 10, +#' plot = TRUE +#' ) +#' +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' +#' multiDiv(records, +#' plotMultCurves = TRUE +#' ) #' #' ################################################ #' #' # How probable were the runs I accepted? -#' # The effect of conditions +#' # The effect of conditions #' #' set.seed(1) #' #' # Let's look at an example of a birth-death process -#' # with high extinction relative to branching +#' # with high extinction relative to branching #' # use default run conditions (barely any conditioning) #' # use print.runs to look at acceptance probability -#' records <- simFossilRecord(p = 0.1, q = 0.8, nruns = 10, -#' print.runs = TRUE, plot = TRUE) +#' +#' records <- simFossilRecord( +#' p = 0.1, +#' q = 0.8, +#' nruns = 10, +#' print.runs = TRUE, +#' plot = TRUE +#' ) +#' #' # 10 runs accepted from a total of 10 ! #' #' # now let's give much more stringent run conditions -#' # require 3 extant taxa at minimum, 5 taxa total minimum -#' records <- simFossilRecord(p = 0.1, q = 0.8, nruns = 10, -#' nExtant = c(3,100), nTotalTaxa = c(5,100), -#' print.runs = TRUE, plot = TRUE) +#' # require 3 extant taxa at minimum, 5 taxa total minimum +#' +#' records <- simFossilRecord( +#' p = 0.1, +#' q = 0.8, +#' nruns = 10, +#' nExtant = c(3,100), +#' nTotalTaxa = c(5,100), +#' print.runs = TRUE, +#' plot = TRUE +#' ) +#' #' # thousands of simulations to just obtail 10 accepable runs! -#' # most ended in extinction before minimums were hit +#' # most ended in extinction before minimums were hit #' #' # beware analysis of simulated where acceptance conditions -#' # are too stringent: your data will be a 'special case' -#' # of the simulation parameters +#' # are too stringent: your data will be a 'special case' +#' # of the simulation parameters #' # it will also take you a long time to generate reasonable -#' # numbers of replicates for whatever analysis you are doing +#' # numbers of replicates for whatever analysis you are doing #' #' # TLDR: You should look at print.runs = TRUE #' #' ###################### +#' ############################## +#' ###################################### #' #' # Using the rate equation-input for complex diversification models #' @@ -405,75 +503,97 @@ #' #' # first, let's write the rate equation #' # We'll use the diversity dependent rate equation model -#' # from Ettienne et al. 2012 as an example here -#' # Under this equation, p = q at carrying capacity K +#' # from Ettienne et al. 2012 as an example here +#' # Under this equation, p = q at carrying capacity K #' # Many others are possible! #' # Note that we don't need to use max(0,rate) as negative rates -#' # are converted to zero by default, as controlled by -#' # the argument negRatesAsZero +#' # are converted to zero by default, as controlled by +#' # the argument negRatesAsZero #' #' # From Ettiene et al. -#' # lambda = lambda0 - (lambda0 - mu)*(n/K) +#' # lambda = lambda0 - (lambda0 - mu)*(n/K) #' # lambda and mu are branching rate and extinction rate -#' # lambda and mu == p and q in paleotree (i.e. Foote convention) +#' # lambda and mu == p and q in paleotree (i.e. Foote convention) #' # lambda0 is the branching rate at richness = 0 #' # K is the carrying capacity #' # n is the richness #' #' # 'N' is the algebra symbol for standing taxonomic richness -#' # for simFossilRecord's simulation capabilities +#' # for simFossilRecord's simulation capabilities #' # also branching rate cannot reference extinction rate #' # we'll have to set lambda0, mu and K in the rate equation directly #' -#' lambda0 <- 0.3 # branching rate at 0 richness in Ltu -#' K <- 40 # carrying capacity -#' mu <- 0.1 # extinction rate will 0.1 Ltu ( = 1/3 of lambda0) +#' lambda0 <- 0.3 # branching rate at 0 richness in Ltu +#' K <- 40 # carrying capacity +#' mu <- 0.1 # extinction rate will 0.1 Ltu ( = 1/3 of lambda0 ) #' #' # technically, mu here represents the lambda at richness = K -#' # i.e. lambdaK +#' # i.e. lambdaK #' # Ettienne et al. are just implicitly saying that the carrying capacity -#' # is the richness at which lambda == mu +#' # is the richness at which lambda == mu #' #' # construct the equation programmatically using paste0 -#' branchingRateEq <- paste0(lambda0,"-(",lambda0,"-",mu,")*(N/",K,")") +#' branchingRateEq <- paste0(lambda0, "-(", lambda0, "-", mu, ")*(N/", K, ")") #' # and take a look at it... #' branchingRateEq -#' # its a thing of beauty, folks +#' # its a thing of beauty, folks! #' #' # now let's try it -#' records <- simFossilRecord(p = branchingRateEq, q = mu, nruns = 3, -#' totalTime = 100, plot = TRUE, print.runs = TRUE) -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE) +#' records <- simFossilRecord( +#' p = branchingRateEq, +#' q = mu, +#' nruns = 3, +#' totalTime = 100, +#' plot = TRUE, +#' print.runs = TRUE +#' ) +#' +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' +#' multiDiv(records, +#' plotMultCurves = TRUE) +#' #' # those are some happy little diversity plateaus! #' #' #' # now let's do diversity-dependent extinction #' #' # let's slightly modify the model from Ettiene et al. -#' # mu = mu0 + (mu0 - muK)*(n/K) +#' # mu = mu0 + (mu0 - muK)*(n/K) #' -#' mu0 <- 0.001 # mu at n = 0 -#' muK <- 0.1 # mu at n = K (should be equal to lambda at K) -#' K <- 40 -#' lambda <- muK # equal to muK +#' mu0 <- 0.001 # mu at n = 0 +#' muK <- 0.1 # mu at n = K (should be equal to lambda at K) +#' K <- 40 # carrying capacity (like above) +#' lambda <- muK # equal to muK #' #' # construct the equation programmatically using paste0 -#' extRateEq <- paste0(mu0,"-(",mu0,"-",muK,")*(N/",K,")") +#' extRateEq <- paste0(mu0, "-(", mu0, "-", muK, ")*(N/" ,K, ")") #' extRateEq #' #' # now let's try it -#' records <- simFossilRecord(p = lambda, q = extRateEq, nruns = 3, -#' totalTime = 100, plot = TRUE, print.runs = TRUE) -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE) +#' records <- simFossilRecord( +#' p = lambda, +#' q = extRateEq, +#' nruns = 3, +#' totalTime = 100, +#' plot = TRUE, +#' print.runs = TRUE) +#' +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' +#' multiDiv(records, +#' plotMultCurves = TRUE) #' #' # these plateaus looks a little more spiky -#' #( maybe there is more turnover at K? ) +#' #( maybe there is more turnover at K? ) #' # also, it took a longer for the rapid rise to occur #' +#' ####################################################### +#' ############################### #' # Now let's try an example with time-dependent origination -#' # and extinction constrained to equal origination +#' # and extinction constrained to equal origination #' #' # Note! Use of time-dependent parameters "D" and "T" may #' # result in slower than normal simulation run times @@ -481,31 +601,53 @@ #' # info for argument maxTimeStep above #' #' # First, let's define a time-dependent rate equation -#' # "T" is the symbol for time passed +#' # "T" is the symbol for time passed #' timeEquation <- "0.4-(0.007*T)" #' #' #in this equation, 0.4 is the rate at time = 0 -#' # and it will decrease by 0.007 with every time-unit -#' # at time = 50, the final rate will be 0.05 +#' # and it will decrease by 0.007 with every time-unit +#' # at time = 50, the final rate will be 0.05 #' # We can easily make it so extinction is always equal to branching rate #' # "P" is the algebraic equivalent for "branching rate" in simFossilRecord #' #' # now let's try it -#' records <- simFossilRecord(p = timeEquation, q = "P", nruns = 3, -#' totalTime = 50, plot = TRUE, print.runs = TRUE) -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE) +#' records <- simFossilRecord( +#' p = timeEquation, +#' q = "P", +#' nruns = 3, +#' totalTime = 50, +#' plot = TRUE, +#' print.runs = TRUE +#' ) +#' +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' +#' multiDiv(records, +#' plotMultCurves = TRUE) +#' #' # high variability that seems to then smooth out as turnover decreases #' #' # And duration what about duration-dependent processes? -#' # let's do a duration-dep extinction equation: +#' # let's do a duration-dep extinction equation: #' durDepExt <- "0.01+(0.01*D)" #' #' # okay, let's take it for a spin -#' records <- simFossilRecord(p = 0.1, q = durDepExt, nruns = 3, -#' totalTime = 50, plot = TRUE, print.runs = TRUE) -#' records <- lapply(records,fossilRecord2fossilTaxa) -#' multiDiv(records,plotMultCurves = TRUE) +#' records <- simFossilRecord( +#' p = 0.1, +#' q = durDepExt, +#' nruns = 3, +#' totalTime = 50, +#' plot = TRUE, +#' print.runs = TRUE +#' ) +#' +#' records <- lapply(records, +#' fossilRecord2fossilTaxa) +#' +#' multiDiv(records, +#' plotMultCurves = TRUE) +#' #' # creates runs full of short lived taxa #' #' # Some more stuff to do with rate formulae! @@ -522,28 +664,30 @@ #' nTotalTaxa = 50, plot = TRUE) #' #' # Setting up specific time-variable rates can be laborious though -#' # e.g. one rate during this 10 unit interval, -#' # another during this interval, etc +#' # e.g. one rate during this 10 unit interval, +#' # another during this interval, etc #' # The problem is setting this up within a fixed function #' +#' +#' ############################################################# #' # Worked Example #' # What if we want to draw a new rate from a -#' # lognormal distribution every 10 time units? +#' # lognormal distribution every 10 time units? #' #' # Need to randomly draw these rates *before* running simFossilTaxa #' # This means also that we will need to individually do each simFossilTaxa run -#' # since the rates are drawn outside of simFossilTaxa +#' # since the rates are drawn outside of simFossilTaxa #' #' # Get some reasonable log normal rates: #' rates <- 0.1+rlnorm(100,meanlog = 1,sdlog = 1)/100 #' #' # Now paste it into a formulae that describes a function that -#' # will change the rate output every 10 time units +#' # will change the rate output every 10 time units #' rateEquation <- paste0("c(",paste0(rates,collapse = ","),")[1+(T%/%10)]") #' #' # and let's run it #' record <- simFossilRecord(p = rateEquation, q = 0.1, r = 0.1, nruns = 1, -#' totalTime = c(30,40), plot = TRUE) +#' totalTime = c(30,40), plot = TRUE) #' #' #' ########################################################## @@ -552,78 +696,120 @@ #' # Some examples of varying the 'speciation modes' in simFossilRecord #' #' # The default is pure budding cladogenesis -#' # anag.rate = prop.bifurc = prop.cryptic = 0 +#' # anag.rate = prop.bifurc = prop.cryptic = 0 #' # let's just set those for the moment anyway #' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0, -#' nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0, plot = TRUE) +#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0, +#' nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0, plot = TRUE) #' #' #convert and plot phylogeny -#' # note this will not reflect the 'budding' pattern -#' # branching events will just appear like bifurcation -#' # its a typical convention for phylogeny plotting +#' # note this will not reflect the 'budding' pattern +#' # branching events will just appear like bifurcation +#' # its a typical convention for phylogeny plotting #' converted <- fossilRecord2fossilTaxa(record) #' tree <- taxa2phylo(converted,plot = TRUE) #' #' #now, an example of pure bifurcation #' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 1, prop.cryptic = 0, -#' nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) +#' anag.rate = 0, prop.bifurc = 1, prop.cryptic = 0, +#' nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) #' tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) #' #' # all the short branches are due to ancestors that terminate -#' # via pseudoextinction at bifurcation events +#' # via pseudoextinction at bifurcation events #' #' # an example with anagenesis = branching -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0.1, prop.bifurc = 0, prop.cryptic = 0, -#' nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) -#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0.1, +#' prop.bifurc = 0, +#' prop.cryptic = 0, +#' nruns = 1, +#' nTotalTaxa = c(20,30), +#' nExtant = 0 +#' ) +#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record), +#' plot = TRUE) #' # lots of pseudoextinction #' #' # an example with anagenesis, pure bifurcation -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0.1, prop.bifurc = 1, prop.cryptic = 0, -#' nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) -#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0.1, +#' prop.bifurc = 1, +#' prop.cryptic = 0, +#' nruns = 1, +#' nTotalTaxa = c(20,30) , +#' nExtant = 0 +#' ) +#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record), +#' plot = TRUE) #' # lots and lots of pseudoextinction #' #' # an example with half cryptic speciation -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, -#' nruns = 1, nTotalTaxa = c(20,30), nExtant = 0) -#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 0.5, +#' nruns = 1, +#' nTotalTaxa = c(20,30), +#' nExtant = 0 +#' ) +#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record), +#' plot = TRUE) #' #' # notice that the tree has many more than the maximum of 30 tips: -#' # that's because the cryptic taxa are not counted as -#' # separate taxa by default, as controlled by count.cryptic +#' # that's because the cryptic taxa are not counted as +#' # separate taxa by default, as controlled by count.cryptic #' #' # an example with anagenesis, bifurcation, cryptic speciation -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0.1, prop.bifurc = 0.5, prop.cryptic = 0.5, -#' nruns = 1, nTotalTaxa = c(20,30), nExtant = 0) -#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0.1, +#' prop.bifurc = 0.5, +#' prop.cryptic = 0.5, +#' nruns = 1, +#' nTotalTaxa = c(20,30), +#' nExtant = 0 +#' ) +#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record), +#' plot = TRUE) #' # note in this case, 50% of branching is cryptic -#' # 25% is bifurcation, 25% is budding +#' # 25% is bifurcation, 25% is budding #' #' # an example with anagenesis, pure cryptic speciation -#' # morphotaxon identity will thus be entirely indep of branching! -#' # I wonder if this is what is really going on, sometimes... -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0.1, prop.bifurc = 0, prop.cryptic = 1, -#' nruns = 1, nTotalTaxa = c(20,30), nExtant = 0) -#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +#' # morphotaxon identity will thus be entirely indep of branching! +#' # I wonder if this is what is really going on, sometimes... +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0.1, +#' prop.bifurc = 0, +#' prop.cryptic = 1, +#' nruns = 1, +#' nTotalTaxa = c(20,30), +#' nExtant = 0 +#' ) +#' tree <- taxa2phylo(fossilRecord2fossilTaxa(record), +#' plot = TRUE) #' #' # merging cryptic taxa when all speciation is cryptic #' set.seed(1) -#' record <- simFossilRecord(p = 0.1, -#' q = 0.1, r = 0.1, -#' prop.crypt = 1, -#' totalTime = 50, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' prop.crypt = 1, +#' totalTime = 50, +#' plot = TRUE +#' ) +#' #' # there looks like there is only a single taxon, but... -#' length(record) #actual number of cryptic lineages +#' length(record) +#' #the above is the *actual* number of cryptic lineages #' -#' ############# +#' ###################################### +#' ############################### #' #' # playing with count.cryptic with simulations of pure cryptic speciation #' @@ -631,100 +817,198 @@ #' #or total taxa including cryptic taxa with count.cryptic = FALSE #' #' # an example with pure cryptic speciation with count.cryptic = TRUE -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 1, -#' nruns = 1, totalTime = 50, nTotalTaxa = c(10,100), count.cryptic = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 1, +#' nruns = 1, +#' totalTime = 50, +#' nTotalTaxa = c(10,100), +#' count.cryptic = TRUE +#' ) #' tree <- taxa2phylo(fossilRecord2fossilTaxa(record)) -#' plot(tree);axisPhylo() -#' # notice how the tip labels indicate all are the same morphotaxon #' -#' # we'll replace the # of taxa constraints with a time constraint -#' # or else the count.cryptic = FALSE simulation will never end! +#' # plot the tree +#' plot(tree) +#' axisPhylo() +#' # notice how the tip labels indicate all are the same morphotaxon? #' +#' ################# #' # an example with pure cryptic speciation with count.cryptic = FALSE -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 1, -#' nruns = 1, totalTime = 50, count.cryptic = FALSE) +#' # Need to be careful with this! +#' # We'll have to replace the # of taxa constraints with a time constraint +#' # or else the count.cryptic = FALSE simulation will never end! +#' +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 1, +#' nruns = 1, +#' totalTime = 50, +#' count.cryptic = FALSE +#' ) #' tree <- taxa2phylo(fossilRecord2fossilTaxa(record)) -#' plot(tree);axisPhylo() #' +#' # plot it +#' plot(tree) +#' axisPhylo() +#' +#' ########################################### #' #let's look at numbers of taxa returned when varying count.cryptic -#' # with prop.cryptic = 0.5 +#' # with prop.cryptic = 0.5 #' #' #simple simulation going for 50 total taxa #' #' #first, count.cryptic = FALSE (default) -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, -#' nruns = 1, nTotalTaxa = 50, count.cryptic = FALSE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 0.5, +#' nruns = 1, +#' nTotalTaxa = 50, +#' count.cryptic = FALSE +#' ) #' taxa <- fossilRecord2fossilTaxa(record) -#' nrow(taxa) #number of lineages (inc. cryptic) -#' length(unique(taxa[,6])) #number of morph-distinguishable taxa #' -#' # and count.cryptic = TRUE -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, -#' nruns = 1, nTotalTaxa = 50, count.cryptic = TRUE) +#' #### Count the taxa/lineages ! +#' # number of lineages (inc. cryptic) +#' nrow(taxa) +#' # number of morph-distinguishable taxa +#' length(unique(taxa[,6])) +#' +#' ################### +#' # Now let' try with count.cryptic = TRUE +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 0.5, +#' nruns = 1, +#' nTotalTaxa = 50, +#' count.cryptic = TRUE +#' ) #' taxa <- fossilRecord2fossilTaxa(record) -#' nrow(taxa) #number of lineages (inc. cryptic) -#' length(unique(taxa[,6])) #number of morph-distinguishable taxa +#' +#' ### Count the taxa/lineages ! +#' # number of lineages (inc. cryptic) +#' nrow(taxa) +#' # number of morph-distinguishable taxa +#' length(unique(taxa[,6])) #' #' # okay... -#' # now let's try with 50 extant taxa #' -#' #first, count.cryptic = FALSE (default) -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, -#' nruns = 1, nExtant = 10, totalTime = c(1,100), count.cryptic = FALSE) +#' ######################## +#' ####################### +#' # now let's try cryptic speciation *with* 50 extant taxa! +#' +#' # first, count.cryptic = FALSE (default) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 0.5, +#' nruns = 1, +#' nExtant = 10, +#' totalTime = c(1,100), +#' count.cryptic = FALSE +#' ) #' taxa <- fossilRecord2fossilTaxa(record) -#' sum(taxa[,5]) #number of still-living lineages (inc. cryptic) -#' length(unique(taxa[taxa[,5] == 1,6])) #number of still-living morph-dist. taxa #' +#' ### Count the taxa/lineages ! +#' # number of still-living lineages (inc. cryptic) +#' sum(taxa[,5]) +#' # number of still-living morph-dist. taxa +#' length(unique(taxa[taxa[,5] == 1,6])) +#' +#' ############## #' # and count.cryptic = TRUE -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, -#' nruns = 1, nExtant = 10, totalTime = c(1,100), count.cryptic = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' anag.rate = 0, +#' prop.bifurc = 0, +#' prop.cryptic = 0.5, +#' nruns = 1, +#' nExtant = 10, +#' totalTime = c(1,100), +#' count.cryptic = TRUE +#' ) #' taxa <- fossilRecord2fossilTaxa(record) -#' sum(taxa[,5]) #number of still-living lineages (inc. cryptic) -#' length(unique(taxa[taxa[,5] == 1,6])) #number of still-living morph-dist. taxa +#' +#' ### Count the taxa/lineages ! +#' # number of still-living lineages (inc. cryptic) +#' sum(taxa[,5]) +#' # number of still-living morph-dist. taxa +#' length(unique(taxa[taxa[,5] == 1,6])) #' #' ################################################# #' #' # an example using startTaxa to have more initial taxa -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' nTotalTaxa = 100, startTaxa = 20, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nruns = 1, +#' nTotalTaxa = 100, +#' startTaxa = 20, +#' plot = TRUE +#' ) #' #' ###################################################### #' #' # Using run conditions #' #' # Users can generate datasets that meet multiple conditions: -#' # such as time, number of total taxa, extant taxa, sampled taxa +#' # such as time, number of total taxa, extant taxa, sampled taxa #' # These can be set as point conditions or ranges #' #' # let's set time = 10-100 units, total taxa = 30-40, extant = 10 -#' #and look at acceptance rates with print.run +#' #and look at acceptance rates with print.run #' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' totalTime = c(10,100), nTotalTaxa = c(30,40), nExtant = 10, -#' print.runs = TRUE, plot = TRUE) +#' totalTime = c(10,100), nTotalTaxa = c(30,40), nExtant = 10, +#' print.runs = TRUE, plot = TRUE) #' #' # let's make the constraints on totaltaxa a little tighter #' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' totalTime = c(50,100), nTotalTaxa = 30, nExtant = 10, -#' print.runs = TRUE, plot = TRUE) +#' totalTime = c(50,100), nTotalTaxa = 30, nExtant = 10, +#' print.runs = TRUE, plot = TRUE) #' # still okay acceptance rates #' #' # alright, now let's add a constraint on sampled taxa -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' totalTime = c(50,100), nTotalTaxa = 30, nExtant = 10, -#' nSamp = 15, print.runs = TRUE, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nruns = 1, +#' totalTime = c(50,100), +#' nTotalTaxa = 30, +#' nExtant = 10, +#' nSamp = 15, +#' print.runs = TRUE, +#' plot = TRUE +#' ) #' # still okay acceptance rates #' #' # we can be really odd and condition on having a single taxon #' set.seed(1) -#' record <- simFossilRecord(p = 0.1, -#' q = 0.1, r = 0.1, nTotalTaxa = 1, -#' totalTime = c(10,20), plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nTotalTaxa = 1, +#' totalTime = c(10,20), +#' plot = TRUE +#' ) #' #' ######################################################## #' @@ -732,32 +1016,64 @@ #' #' #Typically, a user may want to condition on a precise #' # number of sampled taxa in an all-extinct simulation -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' nTotalTaxa = c(1,100), nExtant = 0, nSamp = 20, -#' print.runs = TRUE, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nruns = 1, +#' nTotalTaxa = c(1,100), +#' nExtant = 0, +#' nSamp = 20, +#' print.runs = TRUE, +#' plot = TRUE +#' ) #' #' # Note that when simulations don't include #' # sampling or extant taxa, the plot #' # functionality changes -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0, nruns = 1, -#' nExtant = 0, print.runs = TRUE, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0, +#' nruns = 1, +#' nExtant = 0, +#' print.runs = TRUE, +#' plot = TRUE +#' ) +#' #' # something similar happens when there is no sampling #' # and there are extant taxa but they aren't sampled -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0, nruns = 1, -#' nExtant = 10, nTotalTaxa = 100, modern.samp.prob = 0, -#' print.runs = TRUE, plot = TRUE) -#' -#' +#' record <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0, +#' nruns = 1, +#' nExtant = 10, +#' nTotalTaxa = 100, +#' modern.samp.prob = 0, +#' print.runs = TRUE, +#' plot = TRUE +#' ) +#' +#' ######################################### #' # We can set up a test to make sure that no extant taxa somehow get #' # returned in many simulations with extinct-only conditioning: -#' res <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, -#' nTotalTaxa = 10,nExtant = 0,nruns = 1000,plot = TRUE) +#' res <- simFossilRecord( +#' p = 0.1, +#' q = 0.1, +#' r = 0.1, +#' nTotalTaxa = 10, +#' nExtant = 0, +#' nruns = 1000, +#' plot = TRUE +#' ) #' anyLive <- any(sapply(res,function(z) -#' any(sapply(z,function(x) x[[1]][5] == 1)))) +#' any(sapply(z,function(x) x[[1]][5] == 1))) +#' ) #' # test if any are still alive #' if(anyLive){ -#' stop("Runs have extant taxa under conditioning for none?") -#' } +#' stop("Runs have extant taxa under conditioning for none?") +#' } #' #' } #' diff --git a/R/simFossilRecordMethods.R b/R/simFossilRecordMethods.R index d4cad846..9f0682d9 100644 --- a/R/simFossilRecordMethods.R +++ b/R/simFossilRecordMethods.R @@ -24,7 +24,8 @@ #' #' \code{fossilTaxa2fossilRecord} does the reverse, converting a \code{simFossilTaxa} #' table into a \code{fossilRecordSimulation} list object, -#' but returns a \code{fossilRecordSimulation} object that considers each species as un-sampled (as sampling +#' but returns a \code{fossilRecordSimulation} object that +#' considers each species as un-sampled (as sampling #' information is not contained within a \code{simFossilTaxa} table). #' #' \code{fossilRecord2fossilRanges} converts a \code{fossilRecordSimulation} object @@ -83,29 +84,50 @@ #' @examples #' #' set.seed(44) -#' record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, -#' nTotalTaxa = c(20,30) ,nExtant = 0, plot = TRUE) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, r = 0.1, +#' nruns = 1, +#' nTotalTaxa = c(20,30), +#' nExtant = 0, +#' plot = TRUE +#' ) #' -#' # time-slicing +#' ################################################## +#' # time-slicing simulations at particular dates #' #' # let's try slicing this record at 940 time-units -#' slicedRecord <- timeSliceFossilRecord(fossilRecord = record, sliceTime = 940) +#' slicedRecord <- timeSliceFossilRecord( +#' fossilRecord = record, +#' sliceTime = 940 +#' ) #' # and let's plot it #' divCurveFossilRecordSim(slicedRecord) #' #' # now with shiftRoot4TimeSlice = TRUE to shift the root age -#' slicedRecord <- timeSliceFossilRecord(fossilRecord = record, sliceTime = 940, -#' shiftRoot4TimeSlice = TRUE) +#' slicedRecord <- timeSliceFossilRecord( +#' fossilRecord = record, +#' sliceTime = 940, +#' shiftRoot4TimeSlice = TRUE +#' ) #' # and let's plot it #' divCurveFossilRecordSim(slicedRecord) #' -#' # plot look a little different due to how axis limits are treated... -#' # notice that in both, 'modern' (extant) taxa are sampled with probability = 1 -#' #let's try it again, make that probability = 0 -#' +#' # the last two plots look a little different +#' # due to how axis limits are treated... +#' # notice that in both, 'modern' (extant) taxa +#' # are sampled with probability = 1 +#' +#' ######## +#' # let's try it again, make that probability = 0 #' # now with shiftRoot4TimeSlice = TRUE -#' slicedRecord <- timeSliceFossilRecord(fossilRecord = record, sliceTime = 940, -#' shiftRoot4TimeSlice = TRUE, modern.samp.prob = 0) +#' +#' slicedRecord <- timeSliceFossilRecord( +#' fossilRecord = record, +#' sliceTime = 940, +#' shiftRoot4TimeSlice = TRUE, +#' modern.samp.prob = 0 +#' ) +#' #' # and let's plot it #' divCurveFossilRecordSim(slicedRecord) #' @@ -119,21 +141,25 @@ #' ranges <- fossilRecord2fossilRanges(record) #' #' # plot diversity curves with multiDiv -#' multiDiv(list(taxa,ranges),plotMultCurves = TRUE) +#' multiDiv(list(taxa,ranges), +#' plotMultCurves = TRUE) #' # should look a lot like what we got earlier #' #' # get the cladogram we'd obtain for these taxa with taxa2cladogram -#' cladogram <- taxa2cladogram(taxa,plot = TRUE) +#' cladogram <- taxa2cladogram(taxa, +#' plot = TRUE) #' #' # now get the time-scaled phylogenies with taxa2phylo #' #' # first, with tips extending to the true times of extinction -#' treeExt <- taxa2phylo(taxa,plot = TRUE) +#' treeExt <- taxa2phylo(taxa, +#' plot = TRUE) #' #' # now, with tips extending to the first appearance dates (FADs) of taxa #' # get the FADs from the ranges #' FADs <- ranges[,1] -#' treeFAD <- taxa2phylo(taxa,FADs,plot = TRUE) +#' treeFAD <- taxa2phylo(taxa, +#' FADs,plot = TRUE) #' #' @rdname simFossilRecordMethods #' @export diff --git a/R/taxonSortPBDBocc.R b/R/taxonSortPBDBocc.R index e87e7edd..dc506792 100644 --- a/R/taxonSortPBDBocc.R +++ b/R/taxonSortPBDBocc.R @@ -125,20 +125,27 @@ #' # getting occurrence data for a genus, sorting it #' # Dicellograptus #' dicelloData <- getPBDBocc("Dicellograptus") -#' dicelloOcc2 <- taxonSortPBDBocc(dicelloData, -#' rank = "species", onlyFormal = FALSE) +#' dicelloOcc2 <- taxonSortPBDBocc( +#' data = dicelloData, +#' rank = "species", +#' onlyFormal = FALSE +#' ) #' names(dicelloOcc2) #' #' # try a PBDB API download with lots of synonymization #' #this should have only 1 species -#' # old way, using v1.1 of PBDB API: +#' # *old* way, using v1.1 of PBDB API: #' # acoData <- read.csv(paste0( #' # "http://paleobiodb.org/data1.1/occs/list.txt?", #' # "base_name = Acosarina%20minuta&show=ident,phylo")) -#' # new way - with getPBDBocc, using v1.2 of PBDB API: +#' # +#' # *new* method - with getPBDBocc, using v1.2 of PBDB API: #' acoData <- getPBDBocc("Acosarina minuta") -#' acoOcc <- taxonSortPBDBocc(acoData, -#' rank = "species", onlyFormal = FALSE) +#' acoOcc <- taxonSortPBDBocc( +#' data = acoData, +#' rank = "species", +#' onlyFormal = FALSE +#' ) #' names(acoOcc) #' #' } @@ -147,22 +154,27 @@ #' data(graptPBDB) #' #' #get formal genera -#' occGenus <- taxonSortPBDBocc(graptOccPBDB, -#' rank = "genus") +#' occGenus <- taxonSortPBDBocc( +#' data = graptOccPBDB, +#' rank = "genus" +#' ) #' length(occGenus) #' #' #get formal species -#' occSpeciesFormal <- taxonSortPBDBocc(graptOccPBDB, -#' rank = "species") +#' occSpeciesFormal <- taxonSortPBDBocc( +#' data = graptOccPBDB, +#' rank = "species") #' length(occSpeciesFormal) #' -#' #yes, there are fewer 'formal' graptolite species in the PBDB then genera +#' #yes, there are fewer 'formal' +#' # graptolite species in the PBDB then genera #' #' #get formal and informal species #' occSpeciesInformal <- taxonSortPBDBocc( -#' graptOccPBDB, -#' rank = "species", -#' onlyFormal = FALSE) +#' data = graptOccPBDB, +#' rank = "species", +#' onlyFormal = FALSE +#' ) #' length(occSpeciesInformal) #' #' #way more graptolite species are 'informal' in the PBDB @@ -170,9 +182,11 @@ #' #get formal and informal species #' #including from occurrences with uncertain taxonomy #' #basically everything and the kitchen sink -#' occSpeciesEverything <- taxonSortPBDBocc(graptOccPBDB, -#' rank = "species", -#' onlyFormal = FALSE, cleanUncertain = FALSE) +#' occSpeciesEverything <- taxonSortPBDBocc( +#' data = graptOccPBDB, +#' rank = "species", +#' onlyFormal = FALSE, +#' cleanUncertain = FALSE) #' length(occSpeciesEverything) #' #' diff --git a/R/termTaxa.R b/R/termTaxa.R index 397b028d..d82a3384 100644 --- a/R/termTaxa.R +++ b/R/termTaxa.R @@ -150,74 +150,107 @@ #' @examples #' #' set.seed(444) -#' #example for 20 taxa +#' # example for 20 taxa #' termTaxaRes <- simTermTaxa(20) #' -#' #let look at the taxa... +#' # let look at the taxa... #' taxa <- termTaxaRes$taxonRanges #' taxicDivCont(taxa) -#' #because ancestors don't even exist as taxa -#' #the true diversity curve can go to zero -#' #kinda bizarre! +#' # because ancestors don't even exist as taxa +#' # the true diversity curve can go to zero +#' # kinda bizarre! #' -#' #the tree should give a better idea +#' # the tree should give a better idea #' tree <- termTaxaRes$tree #' phyloDiv(tree) -#' #well, okay, its a tree. +#' # well, okay, its a tree. #' -#' #get the 'ideal cladogram' ala taxa2cladogram -#' #much easier with terminal-taxa simulations as no paraphyletic taxa +#' # get the 'ideal cladogram' ala taxa2cladogram +#' # much easier with terminal-taxa simulations +#' # as no paraphyletic taxa #' cladogram <- tree #' cladogram$edge.length <- NULL #' plot(cladogram) #' -#' #trying out trueTermTaxaTree -#' #random times of observation: uniform distribution -#' time.obs <- apply(taxa,1,function(x) runif(1,x[2],x[1])) -#' tree1 <- trueTermTaxaTree(termTaxaRes,time.obs) +#' # trying out trueTermTaxaTree +#' # random times of observation: uniform distribution +#' time.obs <- apply(taxa,1, +#' function(x) runif(1,x[2],x[1]) +#' ) +#' tree1 <- trueTermTaxaTree( +#' termTaxaRes, +#' time.obs +#' ) #' layout(1:2) #' plot(tree) #' plot(tree1) -#' #' layout(1) -#' +#' +#' ########################################### #' #let's look at the change in the terminal branches -#' plot(tree$edge.length,tree1$edge.length) +#' plot(tree$edge.length, +#' tree1$edge.length) #' #can see some edges are shorter on the new tree, cool #' #' #let's now simulate sampling and use FADs #' layout(1:2) -#' plot(tree);axisPhylo() -#' FADs <- sampleRanges(termTaxaRes$taxonRanges,r = 0.1)[,1] -#' tree1 <- trueTermTaxaTree(termTaxaRes,FADs) -#' plot(tree1);axisPhylo() +#' plot(tree) +#' axisPhylo() +#' FADs <- sampleRanges( +#' termTaxaRes$taxonRanges, +#' r = 0.1)[,1] +#' tree1 <- trueTermTaxaTree(termTaxaRes, FADs) +#' plot(tree1) +#' axisPhylo() #' +#' ################################################ #' #can condition on sampling some average number of taxa #' #analogous to deprecated function simFossilTaxa_SRcond #' r <- 0.1 #' avgtaxa <- 50 #' sumRate <- 0.2 #' #avg number necc for an avg number sampled -#' ntaxa_orig <- avgtaxa/(r/(r+sumRate)) -#' termTaxaRes <- simTermTaxa(ntaxa = ntaxa_orig,sumRate = sumRate) +#' ntaxa_orig <- avgtaxa / (r / (r + sumRate)) +#' termTaxaRes <- simTermTaxa( +#' ntaxa = ntaxa_orig, +#' sumRate = sumRate) +#' #' #note that conditioning must be conducted using full sumRate #' #this is because durations are functions of both rates #' #just like in bifurcation #' +#' +#' ################################# #' #use advanced version of simTermTaxa: simTermTaxaAdvanced #' #allows for extant taxa in a term-taxa simulation #' #' #with min.cond -#' termTaxaRes <- simTermTaxaAdvanced(p = 0.1,q = 0.1,mintaxa = 50, -#' maxtaxa = 100,maxtime = 100,minExtant = 10,maxExtant = 20,min.cond = TRUE) +#' termTaxaRes <- simTermTaxaAdvanced( +#' p = 0.1, +#' q = 0.1, +#' mintaxa = 50, +#' maxtaxa = 100, +#' maxtime = 100, +#' minExtant = 10, +#' maxExtant = 20, +#' min.cond = TRUE +#' ) #' #notice that arguments are similar to simFossilRecord -#' # and somewhat more similar to deprecated function simFossilTaxa ;P +#' # and even more similar to deprecated function simFossilTaxa #' plot(termTaxaRes$tree) #' Ntip(termTaxaRes$tree) #' #' #without min.cond -#' termTaxaRes <- simTermTaxaAdvanced(p = 0.1,q = 0.1,mintaxa = 50, -#' maxtaxa = 100,maxtime = 100,minExtant = 10,maxExtant = 20,min.cond = FALSE) +#' termTaxaRes <- simTermTaxaAdvanced( +#' p = 0.1, +#' q = 0.1, +#' mintaxa = 50, +#' maxtaxa = 100, +#' maxtime = 100, +#' minExtant = 10, +#' maxExtant = 20, +#' min.cond = FALSE +#' ) #' plot(termTaxaRes$tree) #' Ntip(termTaxaRes$tree) #' @@ -226,16 +259,18 @@ #' @name termTaxa #' @rdname termTaxa #' @export -simTermTaxa <- function(ntaxa,sumRate = 0.2){ +simTermTaxa <- function(ntaxa, sumRate = 0.2){ #previously known as "candle" #This function will make ideal cladist datasets #all taxa will be descendants #all evolution prior to branching or differentiation is unsampled #taxa do not differentiatiate over those times #require(ape) - tree <- deadTree(ntaxa = ntaxa,sumRate = sumRate) + tree <- deadTree(ntaxa = ntaxa, sumRate = sumRate) #taxonNames <- tree$tip.label - termEdge <- sapply(tree$edge[,2],function(x) any(x == (1:ntaxa))) + termEdge <- sapply(tree$edge[,2],function(x) + any(x == (1:ntaxa)) + ) #termAnc <- tree$edge[termEdge,1] taxonDurations <- tree$edge.length[termEdge] nodeDist <- node.depth.edgelength(tree) @@ -249,9 +284,17 @@ simTermTaxa <- function(ntaxa,sumRate = 0.2){ #' @rdname termTaxa #' @export -simTermTaxaAdvanced <- function(p = 0.1,q = 0.1,mintaxa = 1,maxtaxa = 1000,mintime = 1,maxtime = 1000, - minExtant = 0,maxExtant = NULL,min.cond = TRUE){ - #previously known as "candle" +simTermTaxaAdvanced <- function( + p = 0.1, + q = 0.1, + mintaxa = 1, maxtaxa = 1000, + mintime = 1,maxtime = 1000, + minExtant = 0,maxExtant = NULL, + min.cond = TRUE + ){ + ################################################### + #previously known as "candle" + #################################################### #This function will make ideal cladist datasets #all taxa will be monophyletic descendants #all evolution prior to branching or differentiation is unsampled @@ -259,21 +302,39 @@ simTermTaxaAdvanced <- function(p = 0.1,q = 0.1,mintaxa = 1,maxtaxa = 1000,minti #extant example #p = 0.1;q = 0.1;mintaxa = 50;maxtaxa = 100;mintime = 1;maxtime = 1000;minExtant = 10;maxExtant = 20;min.cond = FALSE #require(ape) - #taxa <- simFossilTaxa(p = p,q = q,mintaxa = mintaxa,maxtaxa = maxtaxa,mintime = mintime,maxtime = maxtime, - # minExtant = minExtant,maxExtant = maxExtant,min.cond = min.cond,nruns = 1, - # anag.rate = 0,prop.bifurc = 0,prop.cryptic = 0,count.cryptic = FALSE,print.runs = FALSE,plot = FALSE) - record <- simFossilRecord(p = p, q = q, r = 0, nruns = 1, - nTotalTaxa = c(mintaxa,maxtaxa),totalTime = c(mintime,maxtime),nExtant = c(minExtant,maxExtant), - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0, modern.samp.prob = 1, startTaxa = 1, - nSamp = c(0, 1000), tolerance = 10^-4, maxStepTime = 0.01, - shiftRoot4TimeSlice = "withExtantOnly", count.cryptic = FALSE, - negRatesAsZero = TRUE, print.runs = FALSE, sortNames = FALSE, - plot = FALSE) + #taxa <- simFossilTaxa(p = p,q = q,mintaxa = mintaxa, + # maxtaxa = maxtaxa,mintime = mintime,maxtime = maxtime, + # minExtant = minExtant,maxExtant = maxExtant, + # min.cond = min.cond,nruns = 1, + # anag.rate = 0,prop.bifurc = 0,prop.cryptic = 0, + # count.cryptic = FALSE,print.runs = FALSE,plot = FALSE) + record <- simFossilRecord( + p = p, q = q, r = 0, nruns = 1, + nTotalTaxa = c(mintaxa,maxtaxa), + totalTime = c(mintime,maxtime), + nExtant = c(minExtant,maxExtant), + anag.rate = 0, prop.bifurc = 0, + prop.cryptic = 0, + modern.samp.prob = 1, + startTaxa = 1, + nSamp = c(0, 1000), + tolerance = 10^-4, + maxStepTime = 0.01, + shiftRoot4TimeSlice = "withExtantOnly", + count.cryptic = FALSE, + negRatesAsZero = TRUE, + print.runs = FALSE, + sortNames = FALSE, + plot = FALSE + ) + ############# taxa <- fossilRecord2fossilTaxa(record) tree <- dropZLB(taxa2phylo(taxa)) ntaxa <- Ntip(tree) #taxonNames <- tree$tip.label - termEdge <- sapply(tree$edge[,2],function(x) any(x == (1:ntaxa))) + termEdge <- sapply(tree$edge[,2], + function(x) any(x == (1:ntaxa)) + ) termNodes <- tree$edge[termEdge,2] #termAnc <- tree$edge[termEdge,1] taxonDurations <- tree$edge.length[termEdge] @@ -296,15 +357,18 @@ trueTermTaxaTree <- function(TermTaxaRes,time.obs){ #first, check if the times of observations are outside of original taxon ranges taxR <- TermTaxaRes$taxonRanges nameMatch <- match(names(time.obs),rownames(taxR)) - if(any(is.na(nameMatch))){stop("ERROR: names on time.obs and in TermTaxaRes don't match")} - if(any(sapply(1:length(time.obs),function(x) + if(any(is.na(nameMatch))){ + stop("ERROR: names on time.obs and in TermTaxaRes don't match") + } + timeObsOutsideRanges <- sapply(1:length(time.obs),function(x) if(is.na(time.obs[x])){ FALSE }else{ (time.obs[x]>taxR[nameMatch[x],1])|(time.obs[x]0){ tree1 <- drop.tip(tree1,dropTaxa) } + # #need to correct root.time if basal outgroups were removed - tree1 <- fixRootTime(treeNoDrop,tree1) #use the tree after adjusting the term branch lengths + # + #use the tree after adjusting the term branch lengths + tree1 <- fixRootTime(treeNoDrop,tree1) + # return(tree1) } @@ -348,7 +418,7 @@ deadTree <- function(ntaxa,sumRate = 0.2){ #anything else should be indep in a b-d process #lets make the phylogeny #require(ape) - tree <- rtree(ntaxa) + tree <- ape::rtree(ntaxa) tree$edge.length <- rexp(ntaxa+ntaxa-2,sumRate) tree$root.time <- max(node.depth.edgelength(tree))+200 return(tree) diff --git a/R/timePaleoPhy.R b/R/timePaleoPhy.R index b38a4389..6bf6b283 100644 --- a/R/timePaleoPhy.R +++ b/R/timePaleoPhy.R @@ -7,7 +7,8 @@ #' (Bapst, 2014, Paleobiology), this function is largely retained for legacy purposes #' and plotting applications. The time-scaling methods implemented #' by the functions listed here do \bold{not} return realistic estimates of -#' divergence dates, users should investigate other time-scaling methods such as \code{\link{cal3TimePaleoPhy}}. +#' divergence dates, users should investigate other time-scaling +#' methods such as \code{\link{cal3TimePaleoPhy}}. #' #' @details #' \emph{Time-Scaling Methods} @@ -17,10 +18,11 @@ #' Unfortunately, it can be difficult to attribute some time-scaling methods to #' specific references in the literature. #' -#' There are five main \emph{a posteriori} approaches that can be used by \code{timePaleoPhy}. Four of these -#' main types use some value of absolute time, chosen a priori, to time-scale the tree. -#' This is handled by the argument \code{vartime}, which is NULL by default and unused -#' for type "basic". +#' There are five main \emph{a posteriori} approaches that +#' can be used by \code{timePaleoPhy}. Four of these +#' main types use some value of absolute time, chosen \emph{a priori}, to time-scale the tree. +#' This is handled by the argument \code{vartime}, which is \code{NULL} by default and unused +#' for type \code{"basic"}. #' #' \describe{ @@ -111,7 +113,7 @@ #' is known from. Presumably, the first and last appearances of that taxon in #' the fossil record is at unknown dates within these bounds. #' -#' As of paleotree version 2.0. the treatment of taxon ages in +#' As of paleotree v2.0. the treatment of taxon ages in #' \code{timePaleoPhy} is handled by the argument \code{dateTreatment}. #' \emph{By default,} this argument is set to 'firstLast' which means the matrix of ages are treated #' as precise first and last appearance dates (i.e. FADs and LADs). The earlier FADs will be used @@ -142,7 +144,8 @@ #' where specimens come from the same fossil site. #' #' The input \code{timeList} object for \code{bin_timePaleoPhy} can have overlapping -#' (i.e. non-sequential) intervals, and intervals of uneven size. Taxa alive in the modern should be listed as last +#' (i.e. non-sequential) intervals, and intervals of +#' uneven size. Taxa alive in the modern should be listed as last #' occurring in a time interval that begins at time 0 and ends at time 0. If taxa #' occur only in single collections (i.e. their first and last appearance in the #' fossil record is synchronous, the argument point.occur will force all taxa @@ -180,15 +183,17 @@ #' when taxa appear in the fossil record, it is preferable to use the 'bin' #' time-scaling functions; however, see the argument \code{dateTreatment}. -#' @param type Type of time-scaling method used. Can be "basic", "equal", "equal_paleotree_legacy", "equal_date.phylo_legacy" -#' "aba", "zbla" or "mbl". Type = "basic" by default. See details below. +#' @param type Type of time-scaling method used. Can be \code{"basic"}, +#' \code{"equal"}, \code{"equal_paleotree_legacy"}, \code{"equal_date.phylo_legacy"} +#' \code{"aba"}, \code{"zbla"} or \code{"mbl"}. +#' \code{Type = "basic"} by default. See details below. -#' @param vartime Time variable; usage depends on the method 'type' argument. -#' Ignored if type = "basic". +#' @param vartime Time variable; usage depends on the \code{type} argument. +#' Ignored if \code{type = "basic"}. #' @param ntrees Number of time-scaled trees to output. If ntrees is greater #' than one and both randres is false and dateTreatment is neither -#' 'minMax' or 'randObs', the function will fail and +#' \code{'minMax'} or \code{'randObs'}, the function will fail and #' a warning is issued, as these arguments would simply produce multiple #' identical time-scaled trees. @@ -209,16 +214,16 @@ #' resolving of polytomies generally does not differ across multiple uses, #' unlike use of \code{multi2di}. -#' @param add.term If true, adds terminal ranges. By default, this function will +#' @param add.term If \code{TRUE}, adds terminal ranges. By default, this function will #' not add the ranges of taxa when time-scaling a tree, so that the tips #' correspond temporally to the first appearance datums of the given taxa. If -#' \code{add.term = T}, then the 'terminal ranges' of the taxa are added to the tips +#' \code{add.term = TRUE}, then the 'terminal ranges' of the taxa are added to the tips #' after tree is time-scaled, such that the tips now correspond to the last #' appearance datums. #' @param inc.term.adj If true, includes terminal ranges in branch length #' estimates for the various adjustment of branch lengths under all methods -#' except 'basic' (i.e. a terminal length branch will not be treated as zero +#' except \code{type = 'basic'} (i.e. a terminal length branch will not be treated as zero #' length is this argument is \code{TRUE} if the taxon at this tip has a non-zero #' duration). By default, this argument is \code{FALSE} and this function will not #' include the ranges of taxa when adjusting branch lengths, so that @@ -229,14 +234,18 @@ #' @param dateTreatment This argument controls the interpretation of \code{timeData}. #' The default setting \code{dateTreatment = "firstLast"} treats the dates -#' in \code{timeData} as a column of precise first and last appearances. +#' in \code{timeData} as a column of precise first and last appearances. +#' #' A second option is \code{dateTreatment = "minMax"}, which #' treats these dates as minimum and maximum bounds on single point dates. Under this option, #' all taxa in the analysis will be treated as being point dates, such that the first appearance -#' is also the last. These dates will be pulled under a uniform distribution. If \code{dateTreatment = "minMax"} is used, -#' \code{add.term} becomes meaningless, and the use of it will return an error message. A third option -#' is \code{dateTreatment = "randObs"}. This assumes that the dates in the matrix are first and last appearance times, -#' but that the desired time of observation is unknown. Thus, this is much like \code{dateTreatment = "firstLast"} except +#' is also the last. These dates will be pulled under a uniform distribution. +#' If \code{dateTreatment = "minMax"} is used, +#' \code{add.term} becomes meaningless, and the use of it will return an error message. +#' +#' A third option is \code{dateTreatment = "randObs"}. This assumes that the dates in the +#' matrix are first and last appearance times, but that the desired time of observation is unknown. +#' Thus, this is much like \code{dateTreatment = "firstLast"} except #' the effective time of observation (the taxon's LAD under #' \code{dateTreatment = "firstLast"}) is treated as an uncertain date, and #' is randomly sampled between the first and last appearance times. The FAD still is treated as a fixed number, used @@ -245,6 +254,7 @@ #' for clarity. This temporal uncertainty in times of observation might be useful if #' a user is interested in applying phylogeny-based approaches to studying trait evolution, but have #' per-taxon measurements of traits that come from museum specimens with uncertain temporal placement. +#' #' With both arguments \code{dateTreatment = "minMax"} and #' \code{dateTreatment = "randObs"}, the sampling of dates from random distributions should #' compel users to produce many time-scaled trees for any given analytical purpose. @@ -341,10 +351,10 @@ #' @seealso \code{\link{cal3TimePaleoPhy}}, \code{\link{binTimeData}}, #' \code{\link{multi2di}} #' -#' For an alternative time-scaling function, which includes the 'ruta' method +#' For an alternative time-scaling function, which includes the \code{'ruta'} method #' that weights the time-scaling of branches by estimates of character change -#' along with implementations of the 'basic' and 'equal' methods described here, -#' please see function \code{DatePhylo} in package \code{strap}. +#' along with implementations of the \code{'basic'} and \code{'equal'} +#' methods described here, please see function \code{DatePhylo} in package \code{strap}. #' @references #' Bapst, D. W. 2013. A stochastic rate-calibrated method for time-scaling @@ -388,16 +398,33 @@ #' taxicDivDisc(retioRanges) #' #' #Use basic time-scaling (terminal branches only go to FADs) -#' ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "basic", -#' ntrees = 1, plot = TRUE) +#' ttree <- bin_timePaleoPhy( +#' tree = retioTree, +#' timeList = retioRanges, +#' type = "basic", +#' ntrees = 1, +#' plot = TRUE +#' ) #' #' #Use basic time-scaling (terminal branches go to LADs) -#' ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "basic", -#' add.term = TRUE, ntrees = 1, plot = TRUE) +#' ttree <- bin_timePaleoPhy( +#' tree = retioTree, +#' timeList = retioRanges, +#' type = "basic", +#' add.term = TRUE, +#' ntrees = 1, +#' plot = TRUE +#' ) #' #' #mininum branch length time-scaling (terminal branches only go to FADs) -#' ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "mbl", -#' vartime = 1, ntrees = 1, plot = TRUE) +#' ttree <- bin_timePaleoPhy( +#' tree = retioTree, +#' timeList = retioRanges, +#' type = "mbl", +#' vartime = 1, +#' ntrees = 1, +#' plot = TRUE +#' ) #' #' ################### #' @@ -405,121 +432,334 @@ #' #' #Simulate some fossil ranges with simFossilRecord #' set.seed(444) -#' record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, -#' nTotalTaxa = c(30,40), nExtant = 0) +#' record <- simFossilRecord( +#' p = 0.1, q = 0.1, +#' nruns = 1, +#' nTotalTaxa = c(30,40), +#' nExtant = 0 +#' ) #' taxa <- fossilRecord2fossilTaxa(record) +#' #' #simulate a fossil record with imperfect sampling with sampleRanges -#' rangesCont <- sampleRanges(taxa,r = 0.5) +#' rangesCont <- sampleRanges(taxa, r = 0.5) #' #let's use taxa2cladogram to get the 'ideal' cladogram of the taxa -#' cladogram <- taxa2cladogram(taxa,plot = TRUE) +#' cladogram <- taxa2cladogram(taxa, +#' plot = TRUE) +#' #' #Now let's try timePaleoPhy using the continuous range data -#' ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",plot = TRUE) +#' ttree <- timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "basic", +#' plot = TRUE +#' ) +#' #' #plot diversity curve #' phyloDiv(ttree) #' -#' #that tree lacked the terminal parts of ranges (tips stops at the taxon FADs) -#' #let's add those terminal ranges back on with add.term -#' ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",add.term = TRUE,plot = TRUE) +#' +#' ################################################ +#' # that tree lacked the terminal parts of ranges +#' # (tips stops at the taxon FADs) +#' # let's add those terminal ranges back on with add.term +#' ttree <- timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "basic", +#' add.term = TRUE, +#' plot = TRUE +#' ) +#' #' #plot diversity curve #' phyloDiv(ttree) #' -#' #that tree didn't look very resolved, does it? (See Wagner and Erwin 1995 to see why) -#' #can randomly resolve trees using the argument randres -#' #each resulting tree will have polytomies randomly resolved in different ways using multi2di -#' ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",ntrees = 1,randres = TRUE, -#' add.term = TRUE,plot = TRUE) -#' #notice well the warning it prints! -#' #we would need to set ntrees to a large number to get a fair sample of trees -#' -#' #if we set ntrees>1, timePaleoPhy will make multiple time-trees -#' ttrees <- timePaleoPhy(cladogram,rangesCont,type = "basic",ntrees = 9,randres = TRUE, -#' add.term = TRUE,plot = TRUE) +#' +#' ################################################# +#' # that tree didn't look very resolved, does it? +#' # (See Wagner and Erwin 1995 to see why) +#' # can randomly resolve trees using the argument randres +#' # each resulting tree will have polytomies +#' # randomly resolved stochastically using ape::multi2di +#' ttree <- timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "basic", +#' ntrees = 1, +#' randres = TRUE, +#' add.term = TRUE, +#' plot = TRUE +#' ) +#' +#' # Notice the warning it prints! PAY ATTENTION! +#' # We would need to set ntrees to a large number +#' # to get a fair sample of trees +#' +#' # if we set ntrees > 1, timePaleoPhy will make multiple time-trees +#' ttrees <- timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "basic", +#' ntrees = 9, +#' randres = TRUE, +#' add.term = TRUE, +#' plot = TRUE) #' #let's compare nine of them at once in a plot -#' layout(matrix(1:9,3,3)) +#' layout(matrix(1:9, 3, 3)) #' parOrig <- par(no.readonly = TRUE) -#' par(mar = c(1,1,1,1)) -#' for(i in 1:9){plot(ladderize(ttrees[[i]]),show.tip.label = FALSE,no.margin = TRUE)} +#' par(mar = c(1, 1, 1, 1)) +#' for(i in 1:9){ +#' plot( +#' ladderize(ttrees[[i]]), +#' show.tip.label = FALSE, +#' no.margin = TRUE +#' ) +#' } #' #they are all a bit different! -#' -#' #we can also resolve the polytomies in the tree according to time of first appearance -#' #via the function timeLadderTree, by setting the argument 'timeres' to TRUE -#' ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",ntrees = 1,timeres = TRUE, -#' add.term = TRUE,plot = TRUE) +#' +#' +#' ############################################## +#' # we can also resolve the polytomies in the tree +#' # according to time of first appearance via the function timeLadderTree +#' # by setting the argument 'timeres = TRUE' +#' ttree <- timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "basic", +#' ntrees = 1, +#' timeres = TRUE, +#' add.term = TRUE, +#' plot = TRUE +#' ) #' #' #can plot the median diversity curve with multiDiv -#' layout(1);par(parOrig) +#' layout(1) +#' par(parOrig) #' multiDiv(ttrees) #' #' #compare different methods of timePaleoPhy -#' layout(matrix(1:6,3,2)) +#' layout(matrix(1:6, 3, 2)) #' parOrig <- par(no.readonly = TRUE) -#' par(mar = c(3,2,1,2)) -#' plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "basic",vartime = NULL,add.term = TRUE))) -#' axisPhylo();text(x = 50,y = 23,"type = basic",adj = c(0,0.5),cex = 1.2) -#' plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "equal",vartime = 10,add.term = TRUE))) -#' axisPhylo();text(x = 55,y = 23,"type = equal",adj = c(0,0.5),cex = 1.2) -#' plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "aba",vartime = 1,add.term = TRUE))) -#' axisPhylo();text(x = 55,y = 23,"type = aba",adj = c(0,0.5),cex = 1.2) -#' plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "zlba",vartime = 1,add.term = TRUE))) -#' axisPhylo();text(x = 55,y = 23,"type = zlba",adj = c(0,0.5),cex = 1.2) -#' plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "mbl",vartime = 1,add.term = TRUE))) -#' axisPhylo();text(x = 55,y = 23,"type = mbl",adj = c(0,0.5),cex = 1.2) -#' layout(1);par(parOrig) +#' par(mar = c(3, 2, 1, 2)) +#' plot(ladderize(timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "basic", +#' vartime = NULL, +#' add.term = TRUE +#' ))) +#' axisPhylo() +#' text(x = 50,y = 23,"type = basic",adj = c(0,0.5),cex = 1.2) +#' # +#' plot(ladderize(timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "equal", +#' vartime = 10, +#' add.term = TRUE +#' ))) +#' axisPhylo() +#' text(x = 55,y = 23,"type = equal",adj = c(0,0.5),cex = 1.2) +#' # +#' plot(ladderize(timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "aba", +#' vartime = 1, +#' add.term = TRUE +#' ))) +#' axisPhylo() +#' text(x = 55,y = 23,"type = aba",adj = c(0,0.5),cex = 1.2) +#' # +#' plot(ladderize(timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "zlba", +#' vartime = 1, +#' add.term = TRUE +#' ))) +#' axisPhylo() +#' text(x = 55, y = 23, "type = zlba", +#' adj = c(0,0.5), cex = 1.2) +#' # +#' plot(ladderize(timePaleoPhy( +#' cladogram, +#' rangesCont, +#' type = "mbl", +#' vartime = 1, +#' add.term = TRUE +#' ))) +#' axisPhylo() +#' text(x = 55,y = 23,"type = mbl",adj = c(0,0.5),cex = 1.2) +#' layout(1) +#' par(parOrig) #' +#' +#' ############################################## #' #using node.mins -#' #let's say we have (molecular??) evidence that node #5 is at least 1200 time-units ago +#' #let's say we have (molecular??) evidence that +#' # node #5 is at least 1200 time-units ago #' #to use node.mins, first need to drop any unshared taxa +#' #' droppers <- cladogram$tip.label[is.na( -#' match(cladogram$tip.label,names(which(!is.na(rangesCont[,1])))))] +#' match(cladogram$tip.label, +#' names(which(!is.na(rangesCont[,1]))) +#' ) +#' )] #' cladoDrop <- drop.tip(cladogram, droppers) +#' #' # now make vector same length as number of nodes #' nodeDates <- rep(NA, Nnode(cladoDrop)) #' nodeDates[5] <- 1200 -#' ttree1 <- timePaleoPhy(cladoDrop,rangesCont,type = "basic", -#' randres = FALSE,node.mins = nodeDates,plot = TRUE) -#' ttree2 <- timePaleoPhy(cladoDrop,rangesCont,type = "basic", -#' randres = TRUE,node.mins = nodeDates,plot = TRUE) +#' ttree1 <- timePaleoPhy( +#' cladoDrop,rangesCont, +#' type = "basic", +#' randres = FALSE, +#' node.mins = nodeDates, +#' plot = TRUE) +#' ttree2 <- timePaleoPhy( +#' cladoDrop, +#' rangesCont, +#' type = "basic", +#' randres = TRUE, +#' node.mins = nodeDates, +#' plot = TRUE) #' +#' +#' #################################################### +#' ################################################### +#' #################################################### #' #Using bin_timePaleoPhy to time-scale with discrete interval data +#' #' #first let's use binTimeData() to bin in intervals of 1 time unit #' rangesDisc <- binTimeData(rangesCont,int.length = 1) -#' ttreeB1 <- bin_timePaleoPhy(cladogram,rangesDisc,type = "basic",ntrees = 1,randres = TRUE, -#' add.term = TRUE,plot = FALSE) +#' +#' ttreeB1 <- bin_timePaleoPhy( +#' cladogram, +#' rangesDisc, +#' type = "basic", +#' ntrees = 1, +#' randres = TRUE, +#' add.term = TRUE, +#' plot = FALSE +#' ) +#' #' #notice the warning it prints! #' phyloDiv(ttreeB1) +#' #' #with time-order resolving via timeLadderTree -#' ttreeB2 <- bin_timePaleoPhy(cladogram,rangesDisc,type = "basic",ntrees = 1,timeres = TRUE, -#' add.term = TRUE,plot = FALSE) +#' ttreeB2 <- bin_timePaleoPhy( +#' cladogram, +#' rangesDisc, +#' type = "basic", +#' ntrees = 1, +#' timeres = TRUE, +#' add.term = TRUE, +#' plot = FALSE +#' ) +#' #' phyloDiv(ttreeB2) +#' +#' #' #can also force the appearance timings not to be chosen stochastically -#' ttreeB3 <- bin_timePaleoPhy(cladogram,rangesDisc,type = "basic",ntrees = 1, -#' nonstoch.bin = TRUE,randres = TRUE,add.term = TRUE,plot = FALSE) +#' ttreeB3 <- bin_timePaleoPhy( +#' cladogram,rangesDisc, +#' type = "basic", +#' ntrees = 1, +#' nonstoch.bin = TRUE, +#' randres = TRUE, +#' add.term = TRUE, +#' plot = FALSE +#' ) +#' #' phyloDiv(ttreeB3) -#' +#' +#' #' # testing node.mins in bin_timePaleoPhy -#' ttree <- bin_timePaleoPhy(cladoDrop,rangesDisc,type = "basic",ntrees = 1, -#' add.term = TRUE,randres = FALSE,node.mins = nodeDates,plot = TRUE) +#' ttree <- bin_timePaleoPhy( +#' cladoDrop, +#' rangesDisc, +#' type = "basic", +#' ntrees = 1, +#' add.term = TRUE, +#' randres = FALSE, +#' node.mins = nodeDates, +#' plot = TRUE +#' ) +#' #' # with randres = TRUE -#' ttree <- bin_timePaleoPhy(cladoDrop,rangesDisc,type = "basic",ntrees = 1, -#' add.term = TRUE,randres = TRUE,node.mins = nodeDates,plot = TRUE) +#' ttree <- bin_timePaleoPhy( +#' cladoDrop, +#' rangesDisc, +#' type = "basic",ntrees = 1, +#' add.term = TRUE, +#' randres = TRUE, +#' node.mins = nodeDates, +#' plot = TRUE +#' ) #' #' \donttest{ #' #simple three taxon example for testing inc.term.adj -#' ranges1 <- cbind(c(3,4,5),c(2,3,1));rownames(ranges1) <- paste("t",1:3,sep = "") -#' clado1 <- read.tree(file = NA,text = "(t1,(t2,t3));") -#' ttree1 <- timePaleoPhy(clado1,ranges1,type = "mbl",vartime = 1) -#' ttree2 <- timePaleoPhy(clado1,ranges1,type = "mbl",vartime = 1,add.term = TRUE) -#' ttree3 <- timePaleoPhy(clado1,ranges1,type = "mbl",vartime = 1,add.term = TRUE,inc.term.adj = TRUE) -#' layout(1:3) -#' ttree1$root.time;plot(ttree1);axisPhylo() -#' ttree2$root.time;plot(ttree2);axisPhylo() -#' ttree3$root.time;plot(ttree3);axisPhylo() +#' ranges1 <- cbind(c(3,4,5), c(2,3,1)) +#' rownames(ranges1) <- paste("t", 1:3, sep = "") +#' clado1 <- read.tree(file = NA, +#' text = "(t1,(t2,t3));") +#' ttree1 <- timePaleoPhy( +#' clado1, +#' ranges1, +#' type = "mbl", +#' vartime = 1 +#' ) +#' ttree2 <- timePaleoPhy( +#' clado1, +#' ranges1, +#' type = "mbl", +#' vartime = 1, +#' add.term = TRUE +#' ) +#' ttree3 <- timePaleoPhy( +#' clado1, +#' ranges1, +#' type = "mbl", +#' vartime = 1, +#' add.term = TRUE, +#' inc.term.adj = TRUE +#' ) +#' +#' # see differences in root times +#' ttree1$root.time +#' ttree2$root.time +#' ttree3$root.time #' -apply(ranges1,1,diff) +#' +#' layout(1:3) +#' plot(ttree1) +#' axisPhylo() +#' plot(ttree2) +#' axisPhylo() +#' plot(ttree3) +#' axisPhylo() +#' #' } #' + + #' @export -timePaleoPhy <- function(tree,timeData,type = "basic",vartime = NULL,ntrees = 1,randres = FALSE,timeres = FALSE,add.term = FALSE, - inc.term.adj = FALSE,dateTreatment = "firstLast",node.mins = NULL,noisyDrop = TRUE,plot = FALSE){ +timePaleoPhy <- function( + tree, + timeData, + type = "basic", + vartime = NULL, + ntrees = 1, + randres = FALSE, + timeres = FALSE, + add.term = FALSE, + inc.term.adj = FALSE, + dateTreatment = "firstLast", + node.mins = NULL, + noisyDrop = TRUE, + plot = FALSE + ){ + ########################################################### #fast time calibration for phylogenies of fossil taxa; basic methods #this code inspired by similar code from G. Lloyd and G. Hunt #INITIAL: @@ -533,22 +773,37 @@ timePaleoPhy <- function(tree,timeData,type = "basic",vartime = NULL,ntrees = 1, #will make multiple randomly resolved trees if ntrees>1 and randres = T; polytomies resolved with multi2di() from ape #not any reason to do this unless you have polytomies #do !not! !ever! trust a single tree like that!! ever!! - #TYPES - #if (type = "basic") just gives initial raw time-scaled tree (vartime is ignored), many zero-length branches - #if (type = "aba") then adds vartime to all branches (All Branch Additive) - #if (type = "zlba") then adds vartime to zero length branches (Zero Length Branch Additive) - #if (type = "mbl") scales up all branches greater than vartime and subtracts from lower (Min Branch Length) - #if (type = "equal") "equal" method of G. Lloyd, recreated here; vartime is used as time added to root - #ADDING TERMINAL BRANCHES TO PHYLOGENY - #if addterm != FALSE, then observed taxon ranges (LAD-FAD) are added to the tree, with LADs as the location of the tips - #to allow for tips to be at range midpoints (recc. for trait evol analyses), replace LADs in timeData with mid-range dates - #root.time - #ALL TREES ARE OUTPUT WITH ELEMENTs "$root.time" - #this is the time of the root on the tree, which is important for comparing across trees - #this must be calculated prior to adding anything to terminal branches - #tree <- rtree(10);tree$edge.length <- NULL;type = "basic";vartime = NULL;add.term = FALSE;node.mins = NULL - #timeData <- runif(10,30,200);timeData <- cbind(timeData,timeData-runif(10,1,20));rownames(timeData) <- tree$tip.label + # TYPES + # if (type = "basic") + # just gives initial raw time-scaled tree (vartime is ignored), many zero-length branches + # if (type = "aba") + # then adds vartime to all branches (All Branch Additive) + # if (type = "zlba") + # then adds vartime to zero length branches (Zero Length Branch Additive) + # if (type = "mbl") + # scales up all branches greater than vartime and subtracts from lower (Min Branch Length) + # if (type = "equal") + # "equal" method of G. Lloyd, recreated here; vartime is used as time added to root + # ADDING TERMINAL BRANCHES TO PHYLOGENY + # if addterm != FALSE, + # then observed taxon ranges (LAD-FAD) are added to the tree + # with LADs as the location of the tips + # to allow for tips to be at range midpoints (recc. for trait evol analyses) + # replace LADs in timeData with mid-range dates + # root.time + # ALL TREES ARE OUTPUT WITH ELEMENTs "$root.time" + # this is the time of the root on the tree, which is important for comparing across trees + # this must be calculated prior to adding anything to terminal branches + # + ##################################################### + # tree <- rtree(10);tree$edge.length <- NULL;type = "basic" + # vartime = NULL;add.term = FALSE;node.mins = NULL + # timeData <- runif(10,30,200) + # timeData <- cbind(timeData,timeData-runif(10,1,20)) + # rownames(timeData) <- tree$tip.label #node.mins <- runif(9,50,300) + # + ################################### #require(ape) if(!inherits(tree, "phylo")){ stop("tree is not of class phylo")} @@ -562,156 +817,295 @@ timePaleoPhy <- function(tree,timeData,type = "basic",vartime = NULL,ntrees = 1, if(!is.character(tree$tip.label)){ stop("tree tip labels are not a character vector") } - if(ntrees<1){stop("ntrees<1")} + if(ntrees<1){ + stop("ntrees<1") + } if(!any(dateTreatment == c("firstLast","minMax","randObs"))){ - stop("dateTreatment must be one of 'firstLast', 'minMax' or 'randObs'!")} - if(!any(type == c("basic","mbl","equal","equal_paleotree_legacy","equal_date.phylo_legacy","aba","zlba"))){ - stop("type must be one of the types listed in the help file for timePaleoPhy")} - if(!add.term & dateTreatment == "randObs"){stop( - "Inconsistent arguments: randomized observation times are treated as LAST appearance times, so add.term must be true for dateTreatment selection to have any effect on output!" - )} - if(add.term & dateTreatment == "minMax"){stop( - "Inconsistent arguments: randomized dates (dateTreatment = minMax) are treated as point occurrences, so there are effectively no terminal ranges for add.term to add!" - )} - if(ntrees>1 & !randres & dateTreatment == "firstLast"){stop("Time-scale more trees without randomly resolving or random dates?!")} - if(ntrees == 1 & randres){message("Warning: Do not interpret a single randomly-resolved tree")} - if(ntrees == 1 & dateTreatment == "randObs"){message("Warning: Do not interpret a single tree with randomly-placed observation times")} - if(ntrees == 1 & dateTreatment == "minMax"){message("Warning: Do not interpret a single tree with randomly-placed taxon dates")} - if(randres & timeres){stop( - "Inconsistent arguments: You cannot randomly resolve polytomies and resolve with respect to time simultaneously!")} - if(!add.term & inc.term.adj){stop( - "Inconsistent arguments: Terminal ranges cannot be used in adjustment of branch lengths if not added to tree!")} - if(type == "basic" & inc.term.adj){stop( - "Inconsistent arguments: Terminal range adjustment of branch lengths does not affect basic time-scaling method!")} + stop("dateTreatment must be one of 'firstLast', 'minMax' or 'randObs'!") + } + allowedTypesTPP <- c( + "basic","mbl", + "equal","equal_paleotree_legacy","equal_date.phylo_legacy", + "aba","zlba" + ) + if(!any(type == allowedTypesTPP)){ + stop("type must be one of the types listed in the help file for timePaleoPhy") + } + if(!add.term & dateTreatment == "randObs"){ + stop(paste0( + "Inconsistent arguments: randomized observation times are treated as LAST appearance times,\n", + " so add.term must be true for dateTreatment selection to have any effect on output!" + )) + } + if(add.term & dateTreatment == "minMax"){ + stop(paste0( + "Inconsistent arguments: randomized dates (dateTreatment = minMax) are treated as point occurrences,\n", + " so there are effectively no terminal ranges for add.term to add!" + )) + } + if(ntrees>1 & !randres & dateTreatment == "firstLast"){ + stop("Time-scale more trees without randomly resolving or random dates?!") + } + if(ntrees == 1 & randres){ + message("Warning: Do not interpret a single randomly-resolved tree") + } + if(ntrees == 1 & dateTreatment == "randObs"){ + message("Warning: Do not interpret a single tree with randomly-placed observation times") + } + if(ntrees == 1 & dateTreatment == "minMax"){ + message("Warning: Do not interpret a single tree with randomly-placed taxon dates") + } + if(randres & timeres){ + stop(paste0( + "Inconsistent arguments:\n", + " You cannot randomly resolve polytomies and resolve with respect to time simultaneously!" + )) + } + if(!add.term & inc.term.adj){ + stop(paste0( + "Inconsistent arguments:\n", + " Terminal ranges cannot be used in adjustment of branch lengths if not added to tree!" + )) + } + if(type == "basic" & inc.term.adj){ + stop(paste0( + "Inconsistent arguments:\n", + "Terminal range adjustment of branch lengths does not affect basic time-scaling method!")) + } originalInputTree <- tree #remove taxa that are NA or missing in timeData - droppers <- tree$tip.label[is.na(match(tree$tip.label,names(which(!is.na(timeData[,1])))))] + droppers <- tree$tip.label[ + is.na( + match(tree$tip.label, + names(which(!is.na(timeData[,1]))) + ) + ) + ] if(length(droppers)>0){ - if(length(droppers) == Ntip(tree)){stop("Absolutely NO valid taxa shared between the tree and temporal data!")} - if(noisyDrop){message(paste("Warning: Following taxa dropped from tree:",paste0(droppers,collapse = ", ")))} + if(length(droppers) == Ntip(tree)){ + stop("Absolutely NO valid taxa shared between the tree and temporal data!" + )} + if(noisyDrop){ + message(paste( + "Warning: Following taxa dropped from tree:", + paste0(droppers,collapse = ", ") + ))} tree <- drop.tip(tree,droppers) - if(is.null(tree)){stop("Absolutely NO valid taxa shared between the tree and temporal data!")} - timeData[which(!sapply(rownames(timeData),function(x) any(x == tree$tip.label))),1] <- NA + if(is.null(tree)){ + stop("Absolutely NO valid taxa shared between the tree and temporal data!" + )} + timeData[which(!sapply(rownames(timeData), + function(x) any(x == tree$tip.label))),1] <- NA } timeData <- timeData[!is.na(timeData[,1]),] - if(any(is.na(timeData))){stop("Weird NAs in Data??")} - if(any(timeData[,1]0){ - stop("node.mins not compatible with datasets where some taxa are dropped; drop before analysis instead")} + stop(paste0( + "node.mins not compatible with datasets where some taxa are dropped\n", + "Drop unshared taxa and recalculate location of constrained nodes before analysis instead" + )) + } if((!ape::is.binary.phylo(originalInputTree) | !is.rooted(tree)) & randres){ - origDesc <- lapply(prop.part(originalInputTree),function(x) sort(originalInputTree$tip.label[x])) - treeDesc <- lapply(prop.part(tree),function(x) sort(tree$tip.label[x])) - node_changes <- match(origDesc,treeDesc) + origDesc <- lapply(prop.part(originalInputTree), + function(x) sort(originalInputTree$tip.label[x]) + ) + treeDesc <- lapply(prop.part(tree), + function(x) sort(tree$tip.label[x]) + ) + node_changes <- match(origDesc, treeDesc) node.mins1 <- rep(NA,Nnode(tree)) node.mins1[node_changes] <- node.mins }else{ node.mins1 <- node.mins } + # #require(phangorn) - for(i in (Ntip(tree)+1):length(ntime)){ #all internal nodes + # + #all internal nodes + for(i in (Ntip(tree)+1):length(ntime)){ desc_all <- unlist(Descendants(tree,i,type = "all")) desc_nodes <- c(desc_all[desc_all>Ntip(tree)],i)-Ntip(tree) #INCLUDING ITSELF node_times <- node.mins1[desc_nodes] ntime[i] <- max(ntime[i],node_times[!is.na(node_times)]) } } - if((type == "equal"|type == "equal_paleotree_legacy") & !is.null(vartime)){ #add to root, if method = "equal" + # + #add to root, if method = "equal" + if((type == "equal"|type == "equal_paleotree_legacy") & !is.null(vartime)){ ntime[Ntip(tree)+1] <- vartime+ntime[Ntip(tree)+1] #anchor_adjust <- vartime+anchor_adjust } ttree <- tree ttree$edge.length <- sapply(1:Nedge(ttree),function(x) - ntime[ttree$edge[x,1]]-ntime[ttree$edge[x,2]]) #finds each edge length easy peasy, based on G. Lloyd's code + #finds each edge length easy peasy, based on G. Lloyd's code + ntime[ttree$edge[x,1]]-ntime[ttree$edge[x,2]] + ) #okay, now time to do add terminal branch lengths if inc.term.adj if(add.term & inc.term.adj){ obs_ranges <- timeData[,1]-timeData[,2] term_id <- ttree$tip.label[ttree$edge[ttree$edge[,2] <= Ntip(ttree),2]] term_add <- sapply(term_id,function(x) obs_ranges[x]) - ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)] <- ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)]+term_add + new_edge_length_with_term <- ttree$edge.length[ + ttree$edge[,2] <= Ntip(ttree)]+term_add + ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)] <- new_edge_length_with_term } #ttree_basic <- ttree ##if type = basic, I don't have to do anything but set root.time - if(type == "aba"){ #if (type = "aba") then adds vartime to all branches (All Branch Additive) - if(is.na(vartime)){stop("No All Branch Additive Value Given!")} + if(type == "aba"){ + #if (type = "aba") then adds vartime to all branches (All Branch Additive) + if(is.na(vartime)){ + stop("No All Branch Additive Value Given!") + } ttree$edge.length <- ttree$edge.length+vartime } if(type == "zlba"){ #if (type = "zlba") then adds vartime to zero length branches (Zero Length Branch Additive) - if(is.na(vartime)){stop("No Branch Additive Value Given!")} - ttree$edge.length[ttree$edge.length<0.0001] <- ttree$edge.length[ttree$edge.length<0.0001]+vartime + if(is.na(vartime)){ + stop("No Branch Additive Value Given!") + } + new_edge_lengths <- ttree$edge.length[ttree$edge.length<0.0001]+vartime + ttree$edge.length[ttree$edge.length<0.0001] <- new_edge_lengths } if(type == "mbl"){ #if (type = "mbl") scales up all branches greater than vartime and subtracts from lower #as long as there are branches smaller than vartime - if(is.na(vartime)){stop("No Minimum Branch Length Value Given!")} + if(is.na(vartime)){ + stop("No Minimum Branch Length Value Given!") + } ttree <- minBranchLength(tree = ttree, mbl = vartime) } - if(type == "equal"|type == "equal_paleotree_legacy"|type == "equal_date.phylo_legacy"){ #G. Lloyd's "equal" method(s) + if(type == "equal" | type == "equal_paleotree_legacy" | type == "equal_date.phylo_legacy"){ + #G. Lloyd's "equal" method(s) if(type == "equal"){ #Newest of the NEW 08-19-14 - the most logical choice - #get a vector of zero-length branches ordered by the number of nodes separating the edge from the root - zbr <- cbind(1:Nedge(ttree),-node.depth.edgelength(unitLengthTree(ttree))[ttree$edge[,2]]) #Get branch list; 1st col = end-node, 2nd = # of nodes from root + #get a vector of zero-length branches ordered by + # the number of nodes separating the edge from the root + # + #Get branch list; 1st col = end-node, 2nd = # of nodes from root + zbr <- cbind(1:Nedge(ttree), + -node.depth.edgelength(unitLengthTree(ttree))[ttree$edge[,2]]) + } if(type == "equal_paleotree_legacy"){ #OLD #get a depth-ordered vector that identifies zero-length branches - zbr <- cbind(1:Nedge(ttree),node.depth(ttree)[ttree$edge[,2]]) #Get branch list; 1st col = end-node, 2nd = depth + # + #Get branch list; 1st col = end-node, 2nd = depth + zbr <- cbind(1:Nedge(ttree),node.depth(ttree)[ttree$edge[,2]]) } if(type == "equal_date.phylo_legacy"){ #NEW 02-03-04 - #get a TIME-TO-ROOT-ordered vector that identifies zero-length branches, as Graeme's DatePhylo originally worked prior to August 2014 - zbr <- cbind(1:Nedge(ttree),node.depth.edgelength(ttree)[ttree$edge[,2]]) #Get branch list; 1st col = end-node, 2nd = abs distance (time) from root + #get a TIME-TO-ROOT-ordered vector that identifies zero-length branches + # as Graeme's DatePhylo originally worked prior to August 2014 + # + #Get branch list; 1st col = end-node, 2nd = abs distance (time) from root + zbr <- cbind(1:Nedge(ttree),node.depth.edgelength(ttree)[ttree$edge[,2]]) } - zbr <- zbr[ttree$edge.length == 0,] #Parses zbr to just zero-length branches - zbr <- zbr[order(zbr[,2]),1] #order zbr by depth + # + #Parses zbr to just zero-length branches + zbr <- zbr[ttree$edge.length == 0,] + #order zbr by depth + zbr <- zbr[order(zbr[,2]),1] #if the edge lengths leading away from the root are somehow ZERO issue a warning if(is.null(vartime) & any(ttree$edge.length[ttree$edge[,1] == (Ntip(ttree)+1)] == 0)){ - stop("The equal method requires the edges leading away from the root to have non-zero length to begin with, perhaps increase vartime?")} - for(i in zbr){if (ttree$edge.length[i] == 0) { #starting with most shallow zlb, is this branch a zlb? - #if zlb, make a vector of mom-zlbs, going down the tree - brs <- ttree$edge[i,2] #branches to rescale, starting with picked branch - mom <- which(ttree$edge[i,1] == ttree$edge[,2]) - while(ttree$edge[mom,1] != (Ntip(ttree)+1) & ttree$edge.length[mom] == 0){ #keep going while preceding edge is zero len and isn't the root - brs[length(brs)+1] <- ttree$edge[mom,2] #keep adding these branches to brs - mom <- which(ttree$edge[mom,1] == ttree$edge[,2]) #reset mom + stop(paste0( + "The equal method requires edges leading away from root to initially have non-zero length\n", + " This expectation is current not met. Perhaps increase vartime?" + )) + } + # + + for(i in zbr){ + #starting with most shallow zlb, is this branch a zlb? + if (ttree$edge.length[i] == 0) { + #if zlb, make a vector of mom-zlbs, going down the tree + # + #branches to rescale, starting with picked branch + brs <- ttree$edge[i,2] + mom <- which(ttree$edge[i,1] == ttree$edge[,2]) + while(ttree$edge[mom,1] != (Ntip(ttree)+1) & ttree$edge.length[mom] == 0){ + #keep going while preceding edge is zero len and isn't the root + # + #keep adding these branches to brs + brs[length(brs)+1] <- ttree$edge[mom,2] + #reset mom + mom <- which(ttree$edge[mom,1] == ttree$edge[,2]) + } + #Add final branch (which isn't zlb) + brs[length(brs)+1] <- ttree$edge[mom,2] + #Amount of time to be shared + totbl <- sum(ttree$edge.length[match(brs,ttree$edge[,2])]) + ntime[brs[-1]] <- ntime[brs[-1]] +cumsum( + rep(totbl / length(brs), length(brs) - 1) + ) + #update branch lengths using ntime + ttree$edge.length <- sapply( + 1:Nedge(ttree),function(x) + ntime[ttree$edge[x,1]] - ntime[ttree$edge[x,2]] + ) } - brs[length(brs)+1] <- ttree$edge[mom,2] #Add final branch (which isn't zlb) - totbl <- sum(ttree$edge.length[match(brs,ttree$edge[,2])]) #Amount of time to be shared - ntime[brs[-1]] <- ntime[brs[-1]]+cumsum(rep(totbl/length(brs),length(brs)-1)) - ttree$edge.length <- sapply(1:Nedge(ttree),function(x) - ntime[ttree$edge[x,1]]-ntime[ttree$edge[x,2]]) #update branch lengths using ntime - }} + } } - #if add.term != FALSE, then taxon observed ranges are added to the tree, with the LADs becoming the location of the tips + # + #if add.term != FALSE + # then taxon observed ranges are added to the tree + # with the LADs becoming the location of the tips if(add.term & !inc.term.adj){ obs_ranges <- timeData[,1]-timeData[,2] term_id <- ttree$tip.label[ttree$edge[ttree$edge[,2] <= Ntip(ttree),2]] term_add <- sapply(term_id,function(x) obs_ranges[x]) - ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)] <- ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)]+term_add + new_term_edge_lengths <- ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)]+term_add + ttree$edge.length[ttree$edge[,2] <= Ntip(ttree)] <- new_term_edge_lengths } #now add root.time: if(add.term){ @@ -743,21 +1137,31 @@ bin_timePaleoPhy <- function(tree,timeList,type = "basic",vartime = NULL,ntrees nonstoch.bin = FALSE,randres = FALSE,timeres = FALSE, sites = NULL,point.occur = FALSE,add.term = FALSE,inc.term.adj = FALSE, dateTreatment = "firstLast",node.mins = NULL,noisyDrop = TRUE,plot = FALSE){ - #wrapper for applying non-SRC time-scaling to timeData where FADs and LADs are given as bins - #see timePaleoPhy function for more details + # wrapper for applying non-SRC time-scaling to timeData + # where FADs and LADs are given as bins + # see timePaleoPhy function for more details #input is a list with (1) interval times matrix and (2) species FOs and LOs - #sites is a matrix, used to indicate if binned FADs or LADs of multiple species were obtained from the locality / time point - #i.e. the first appearance of species A, B and last appearance of C are all from the same lagerstatten - #this will fix these to always have the same date relative to each other across many trees - #this will assume that species listed for a site all are listed as being from the same interval... - #this function also assumes that the sites matrix is ordered exactly as the timeList data is - #if rand.obs = TRUE, the the function assumes that the LADs in timeList aren't where you actually want the tips (OLD) + # + #sites is a matrix, used to indicate if binned FADs or LADs of + # multiple species were obtained from the locality / time point + #i.e. the first appearance of species A, B and last appearance of C are all from the same lagerstatten + #this will fix these to always have the same date relative to each other across many trees + #this will assume that species listed for a site all are listed as being from the same interval... + #this function also assumes that the sites matrix is ordered exactly as the timeList data is + # + ################ + # (OLD) + #if rand.obs = TRUE, the the function assumes that the LADs in timeList aren't + # where you actually want the tips #instead, tips will be randomly placed anywhere in that taxon's range with uniform probability #thus, tip locations will differ slightly for each tree in the sample #this is useful when you have a specimen or measurement but you don't know its placement in the species' range + # + ###################### #default options #type = "basic";vartime = NULL;ntrees = 1;nonstoch.bin = FALSE;randres = FALSE;timeres = FALSE - #sites = NULL;point.occur = FALSE;add.term = FALSE;inc.term.adj = FALSE;rand.obs = FALSE;node.mins = NULL;plot = FALSE + # sites = NULL;point.occur = FALSE;add.term = FALSE;inc.term.adj = FALSE + # rand.obs = FALSE;node.mins = NULL;plot = FALSE #require(ape) if(!inherits(tree, "phylo")){ stop("tree is not of class phylo") @@ -777,105 +1181,187 @@ bin_timePaleoPhy <- function(tree,timeList,type = "basic",vartime = NULL,ntrees } } if(ntrees == 1 & !nonstoch.bin){ - message("Warning: Do not interpret a single tree; dates are stochastically pulled from uniform distributions")} - if(ntrees<1){stop("ntrees<1")} - if(!is.null(sites) & point.occur){stop("Inconsistent arguments, point.occur = TRUE would replace input 'sites' matrix\n Why not just make site assignments for first and last appearance the same in your input site matrix?")} + message( + "Warning: Do not interpret a single tree; dates are stochastically pulled from uniform distributions" + ) + } + if(ntrees<1){ + stop("ntrees<1") + } + if(!is.null(sites) & point.occur){ + stop( + "Inconsistent arguments, point.occur = TRUE would replace input 'sites' matrix\n Why not just make site assignments for first and last appearance the same in your input site matrix?" + ) + } if(!any(dateTreatment == c("firstLast","randObs"))){ - stop("dateTreatment must be one of 'firstLast' or 'randObs'!")} + stop("dateTreatment must be one of 'firstLast' or 'randObs'!") + } #clean out all taxa which are NA or missing for timeData - if(ntrees == 1 & randres){message("Warning: Do not interpret a single randomly-resolved tree")} - if(randres & timeres){stop( - "Inconsistent arguments: You cannot randomly resolve polytomies and resolve with respect to time simultaneously!")} - if(!is.null(sites) & point.occur){stop("Inconsistent arguments, point.occur = TRUE will replace input 'sites' matrix")} + if(ntrees == 1 & randres){ + message("Warning: Do not interpret a single randomly-resolved tree") + } + if(randres & timeres){ + stop( + "Inconsistent arguments: You cannot randomly resolve polytomies and resolve with respect to time simultaneously!" + ) + } + if(!is.null(sites) & point.occur){ + stop("Inconsistent arguments, point.occur = TRUE will replace input 'sites' matrix") + } droppers <- tree$tip.label[is.na(match(tree$tip.label,names(which(!is.na(timeList[[2]][,1])))))] if(length(droppers)>0){ - if(length(droppers) == Ntip(tree)){stop("Absolutely NO valid taxa shared between the tree and temporal data!")} + if(length(droppers) == Ntip(tree)){ + stop("Absolutely NO valid taxa shared between the tree and temporal data!") + } if(noisyDrop){ - message(paste("Warning: Following taxa dropped from tree:",paste0(droppers,collapse = ", ")))} + message(paste( + "Warning: Following taxa dropped from tree:",paste0(droppers,collapse = ", ") + )) + } tree <- drop.tip(tree,droppers) - if(Ntip(tree)<2){stop("Less than two valid taxa shared between the tree and temporal data!")} + if(Ntip(tree)<2){ + stop("Less than two valid taxa shared between the tree and temporal data!") + } timeList[[2]][which(!sapply(rownames(timeList[[2]]),function(x) any(x == tree$tip.label))),1] <- NA } if(!is.null(node.mins)){ if(length(droppers)>0){ #then... the tree has changed unpredictably, node.mins unusable - stop("node.mins not compatible with datasets where some taxa are drop; drop before analysis instead")} + stop( + "node.mins not compatible with datasets where some taxa are drop; drop before analysis instead" + ) + } if(Nnode(tree) != length(node.mins)){ - stop("node.mins must be same length as number of nodes in the input tree!")} + stop( + "node.mins must be same length as number of nodes in the input tree!" + ) + } } #best to drop taxa from timeList that aren't represented on the tree notTree <- rownames(timeList[[2]])[is.na(match(rownames(timeList[[2]]),tree$tip.label))] if(length(notTree)>0){ if(is.null(sites)){ if(noisyDrop){ - message(paste("Warning: Following taxa dropped from timeList:",paste0(notTree,collapse = ", ")))} + message(paste( + "Warning: Following taxa dropped from timeList:",paste0(notTree,collapse = ", ") + )) + } timeList[[2]] <- timeList[[2]][!is.na(match(rownames(timeList[[2]]),tree$tip.label)),] }else{ - stop("Some taxa in timeList not included on tree: no automatic taxon drop if 'sites' are given. Please remove from both sites and timeList and try again.") + stop( + "Some taxa in timeList not included on tree: no automatic taxon drop if 'sites' are given. Please remove from both sites and timeList and try again." + ) } } timeList[[2]] <- timeList[[2]][!is.na(timeList[[2]][,1]),] # if(ncol(timeList[[1]]) != 2 | ncol(timeList[[2]]) != 2){ - stop("Both timeList[[1]] and timeList[[2]] should have only two columns")} + stop("Both timeList[[1]] and timeList[[2]] should have only two columns") + } if(any(is.na(timeList[[1]])) | any(is.na(timeList[[2]]))){ - stop("Unexpected NAs in timeList")} + stop("Unexpected NAs in timeList") + } if(any(is.character(timeList[[1]])) | any(is.character(timeList[[2]]))){ - stop("Unexpected character-type data in timeList")} + stop("Unexpected character-type data in timeList") + } # - if(any(apply(timeList[[1]],1,diff)>0)){stop("timeList[[1]] not in intervals in time relative to modern")} - if(any(timeList[[1]][,2]<0)){stop("Some dates in timeList[[1]] <0 ?")} - if(any(apply(timeList[[2]],1,diff)<0)){stop("timeList[[2]] not in intervals numbered from first to last (1 to infinity)")} - if(any(timeList[[2]][,2]<0)){stop("Some dates in timeList[[2]] <0 ?")} - if(length(unique(rownames(timeList[[2]]))) != length(rownames(timeList[[2]]))){stop("Duplicate taxa in timeList[[2]]")} - if(length(unique(tree$tip.label)) != length(tree$tip.label)){stop("Duplicate tip taxon names in tree$tip.label")} + if(any(apply(timeList[[1]],1,diff)>0)){ + stop("timeList[[1]] not in intervals in time relative to modern") + } + if(any(timeList[[1]][,2]<0)){ + stop("Some dates in timeList[[1]] <0 ?") + } + if(any(apply(timeList[[2]],1,diff)<0)){ + stop("timeList[[2]] not in intervals numbered from first to last (1 to infinity)") + } + if(any(timeList[[2]][,2]<0)){ + stop("Some dates in timeList[[2]] <0 ?") + } + if(length(unique(rownames(timeList[[2]]))) != length(rownames(timeList[[2]]))){ + stop("Duplicate taxa in timeList[[2]]") + } + if(length(unique(tree$tip.label)) != length(tree$tip.label)){ + stop("Duplicate tip taxon names in tree$tip.label") + } if(length(rownames(timeList[[2]])) != length(tree$tip.label)){ - stop("Odd irreconcilable mismatch between timeList[[2]] and tree$tip.labels")} + stop("Odd irreconcilable mismatch between timeList[[2]] and tree$tip.labels") + } if(is.null(sites)){ if(point.occur){ if(any(timeList[[2]][,1] != timeList[[2]][,2])){ - stop("point.occur = TRUE but some taxa have FADs and LADs listed in different intervals?!")} + stop( + "point.occur = TRUE but some taxa have FADs and LADs listed in different intervals?!" + ) + } sites <- matrix(c(1:Ntip(tree),1:Ntip(tree)),Ntip(tree),2) }else{ sites <- matrix(1:(Ntip(tree)*2),,2) } }else{ #make sites a bunch of nicely behaved sorted integers - sites[,1] <- sapply(sites[,1],function(x) which(x == sort(unique(as.vector(sites))))) - sites[,2] <- sapply(sites[,2],function(x) which(x == sort(unique(as.vector(sites))))) + sites[,1] <- sapply(sites[,1],function(x) + which(x == sort(unique(as.vector(sites)))) + ) + sites[,2] <- sapply(sites[,2],function(x) + which(x == sort(unique(as.vector(sites)))) + ) } ttrees <- rmtree(ntrees,3) #get time ranges for sites siteTime <- matrix(,max(sites),2) - for (i in unique(as.vector(sites))){ #build two-col matrix of site's FADs and LADs - go <- timeList[[2]][which(sites == i)[1]] #find an interval for this site + # + for (i in unique(as.vector(sites))){ + #build two-col matrix of site's FADs and LADs + #find an interval for this site + go <- timeList[[2]][which(sites == i)[1]] siteTime[i,] <- timeList[[1]][go,] } + # #now let's stochastically draw new dates from the site times for(ntrb in 1:ntrees){ if(!nonstoch.bin){ bad_sites <- unique(as.vector(sites)) siteDates <- apply(siteTime,1,function(x) runif(1,x[2],x[1])) while(length(bad_sites)>0){ - siteDates[bad_sites] <- apply(siteTime[bad_sites,],1,function(x) runif(1,x[2],x[1])) - bad_sites <- unique(as.vector(sites[(siteDates[sites[,1]]-siteDates[sites[,2]])<0,])) + siteDates[bad_sites] <- apply(siteTime[bad_sites,],1, + function(x) runif(1,x[2],x[1]) + ) + bad_sites <- unique( + as.vector(sites[(siteDates[sites[,1]]-siteDates[sites[,2]])<0,]) + ) #print(length(bad_sites)) } - timeData <- cbind(siteDates[sites[,1]],siteDates[sites[,2]]) + timeData <- cbind(siteDates[sites[,1]], + siteDates[sites[,2]]) }else{ - timeData <- cbind(siteTime[sites[,1],1],siteTime[sites[,2],2]) + timeData <- cbind(siteTime[sites[,1],1], + siteTime[sites[,2],2]) } rownames(timeData) <- rownames(timeList[[2]]) # okay now send to timePaleoPhy - tree1 <- suppressMessages(timePaleoPhy(tree = tree,timeData = timeData, - type = type,vartime = vartime,ntrees = 1, - randres = randres,timeres = timeres, - add.term = add.term,inc.term.adj = inc.term.adj, - dateTreatment = dateTreatment, - node.mins = node.mins,plot = plot)) + tree1 <- suppressMessages( + timePaleoPhy( + tree = tree, + timeData = timeData, + type = type, + vartime = vartime, + ntrees = 1, + randres = randres, + timeres = timeres, + add.term = add.term, + inc.term.adj = inc.term.adj, + dateTreatment = dateTreatment, + node.mins = node.mins, + plot = plot + ) + ) colnames(timeData) <- c("FAD","LAD") tree1$ranges.used <- timeData names(tree1$edge.length) <- NULL ttrees[[ntrb]] <- tree1 } - if(ntrees == 1){ttrees <- ttrees[[1]]} + # + if(ntrees == 1){ + ttrees <- ttrees[[1]] + } + # return(ttrees) } \ No newline at end of file diff --git a/cran-comments.md b/cran-comments.md index 649aba39..7ad0b07d 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,7 @@ ##### -10-01-18: Some new features, passes R CHECK both locally and on win builder. \ No newline at end of file +06-04-19: +Lots of revision to pre-existing code, passes R CHECK locally, and on win builder. + +10-01-18: +Some new features, passes R CHECK both locally and on win builder. \ No newline at end of file diff --git a/man/binTimeData.Rd b/man/binTimeData.Rd index 91026207..d402def0 100644 --- a/man/binTimeData.Rd +++ b/man/binTimeData.Rd @@ -8,9 +8,9 @@ binTimeData(timeData, int.length = 1, start = NA, int.times = NULL) } \arguments{ \item{timeData}{Two-column matrix of simulated first and last occurrences in -absolute continuous time} +absolute continuous time.} -\item{int.length}{Time interval length, default is 1 time unit} +\item{int.length}{Time interval length, default is 1 time-unit.} \item{start}{Starting time for calculating the intervals.} @@ -18,11 +18,11 @@ absolute continuous time} intervals to be used.} } \value{ -A list containing: \item{int.times}{A 2 column matrix with the start +A list containing: \item{\code{int.times}}{A 2-column matrix with the start and end times of the intervals used; time decreases relative to the -present.} \item{taxon.times}{A 2 column matrix with the first and last -occurrences of taxa in the intervals listed in int.times, with numbers -referring to the row of int.times.} +present.} \item{\code{taxon.times}}{A 2-column matrix with the first and last +occurrences of taxa in the intervals listed in \code{int.times}, with numbers +referring to the row of \code{int.times}.} } \description{ Converts a matrix of simulated continuous-time first occurrences and last @@ -59,19 +59,21 @@ decreasing, i.e. the present day is zero. However, the numbering of intervals giving in the output increases with time, as these are numbered relative to each other, from first to last. -As of version 1.7 of paleotree, taxa which are extant as indicated in timeData as being -in a time interval bounded (0,0), unless time-bins are preset using -argument int.times (prior to version 1.5 they were erroneously listed as +As of version 1.7 of paleotree, taxa which are +extant as indicated in timeData as being +in a time interval bounded \code{(0, 0)}, unless time-bins are preset using +argument \code{int.times} (prior to version 1.5 they were erroneously listed as NA). } \examples{ #Simulate some fossil ranges with simFossilRecord set.seed(444) -record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, - nTotalTaxa = c(30,40), nExtant = 0) +record <- simFossilRecord( + p = 0.1, q = 0.1, nruns = 1, + nTotalTaxa = c(30,40), nExtant = 0) taxa <- fossilRecord2fossilTaxa(record) -#simulate a fossil record with imperfect sampling with sampleRanges() +#simulate a fossil record with imperfect sampling via sampleRanges rangesCont <- sampleRanges(taxa,r = 0.5) #Now let's use binTimeData() to bin in intervals of 1 time unit rangesDisc <- binTimeData(rangesCont,int.length = 1) @@ -79,27 +81,48 @@ rangesDisc <- binTimeData(rangesCont,int.length = 1) equalDiscInt <- taxicDivDisc(rangesDisc) #example with pre-set intervals input (including overlapping) -presetIntervals <- cbind(c(1000,990,970,940),c(980,970,950,930)) -rangesDisc1 <- binTimeData(rangesCont,int.times = presetIntervals) +presetIntervals <- cbind( + c(1000, 990, 970, 940), + c(980, 970, 950, 930) + ) +rangesDisc1 <- binTimeData(rangesCont, + int.times = presetIntervals) + +# plot the diversity curve with these uneven bins taxicDivDisc(rangesDisc1) -#now let's plot diversity with (different) equal length intervals used above -taxicDivDisc(rangesDisc1,int.times = equalDiscInt[,1:2]) +# now let's plot the diversity from these unequal-length bins + # with the original equal length intervals from above +taxicDivDisc(rangesDisc1, int.times = equalDiscInt[,1:2]) + + +#################################### #example with extant taxa set.seed(444) -record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, - nTotalTaxa = c(30,40)) +record <- simFossilRecord( + p = 0.1, q = 0.1, nruns = 1, + nTotalTaxa = c(30,40)) taxa <- fossilRecord2fossilTaxa(record) -#simulate a fossil record with imperfect sampling with sampleRanges() -rangesCont <- sampleRanges(taxa,r = 0.5,,modern.samp.prob = 1) -#Now let's use binTimeData() to bin in intervals of 1 time unit -rangesDisc <- binTimeData(rangesCont,int.length = 1) +#simulate a fossil record with imperfect sampling via sampleRanges +rangesCont <- sampleRanges( + taxa, r = 0.5, + modern.samp.prob = 1) +#Now let's use binTimeDat to bin into intervals of 1 time-unit +rangesDisc <- binTimeData(rangesCont, + int.length = 1) #plot with taxicDivDisc() taxicDivDisc(rangesDisc) -#example with pre-set intervals input (including overlapping) -presetIntervals <- cbind(c(40,30,20,10),c(30,20,10,0)) -rangesDisc1 <- binTimeData(rangesCont,int.times = presetIntervals) + +################################################# +#example with pre-set intervals input + # (including overlapping) +presetIntervals <- cbind( + c(40, 30, 20, 10), + c(30, 20, 10, 0) + ) +rangesDisc1 <- binTimeData(rangesCont, + int.times = presetIntervals) taxicDivDisc(rangesDisc1) } diff --git a/man/createMrBayesTipDatingNexus.Rd b/man/createMrBayesTipDatingNexus.Rd index 5e4d1cd6..92ee1ebc 100644 --- a/man/createMrBayesTipDatingNexus.Rd +++ b/man/createMrBayesTipDatingNexus.Rd @@ -33,7 +33,8 @@ the first and last appearances has uncertainty bounds of zero.} All taxa not listed in the outgroup will be constrained to be a monophyletic ingroup, for sake of rooting the resulting dated tree. Either \code{treeConstraints} or \code{outgroupTaxa} must be defined, but \emph{not both}. -If the outgroup-ingroup split is not present on the supplied \code{treeConstraints}, add that split to \code{treeConstraints} manually.} +If the outgroup-ingroup split is not present on the supplied +\code{treeConstraints}, add that split to \code{treeConstraints} manually.} \item{treeConstraints}{An object of class \code{phylo}, from which (if \code{treeConstraints} is supplied) the set topological constraints are derived, as @@ -71,9 +72,9 @@ the resulting posterior trees may place the OTU assigned to the last occurrence first occurrence in temporal order (but the assignment, in that case, was entirely arbitrary). When \code{whichAppearance = "rangeThrough"}, each taxon will be duplicated into as many OTUs as each -interval that a taxon ranges through (in a timeList format, see other -paleotree functions), with the corresponding age uncertainties for those intervals. -If the input tipTimes is not a list of length = 2, however, the function will +interval that a taxon ranges through (in a \code{timeList} format, see other +\code{paleotree} functions), with the corresponding age uncertainties for those intervals. +If the input \code{tipTimes} is not a list of \code{length = 2}, however, the function will return an error under this option.} \item{treeAgeOffset}{A parameter given by the user controlling the offset @@ -148,7 +149,7 @@ is defined, then an error will be returned.} \item{morphModel}{This argument can be used to switch between two end-member models of morphological evolution in MrBayes, here named 'strong' and 'relaxed', for the 'strong assumptions' -and 'relaxed assumptions' models described by Bapst, Schreiber and Carlson (Systematic Biology). +and 'relaxed assumptions' models described by Bapst et al. (2018, Syst. Biol.). The default is a model which makes very 'strong' assumptions about the process of morphological evolution, while the 'relaxed' alternative allows for considerably more heterogeneity in the rate of morphological evolution across characters, and in the forward and reverse transition @@ -212,15 +213,21 @@ Users must supply a data set of tip ages (in various formats), which are used to construct age calibrations commands on the tip taxa (via paleotree function \code{\link{createMrBayesTipCalibrations}}). The user must also supply some topological constraint: -either a set of taxa designated as the outgroup, which is then converted into a command constraining -the monophyly on the ingroup taxa, which is presumed to be all taxa \emph{not} listed in the outgroup. -Alternatively, a user may supply a tree which is then converted into a series of hard topological -constraints (via function \code{\link{createMrBayesConstraints}}. Both types of topological constraints -cannot be applied. Many of the options available with \code{\link{createMrBayesTipCalibrations}} are available with this function, -allowing users to choose between fixed calibrations or uniform priors that approximate stratigraphic uncertainty. +either a set of taxa designated as the outgroup, which +is then converted into a command constraining +the monophyly on the ingroup taxa, which is presumed to be +all taxa \emph{not} listed in the outgroup. +Alternatively, a user may supply a tree which is then +converted into a series of hard topological constraints +(via function \code{\link{createMrBayesConstraints}}. +Both types of topological constraints cannot be applied. + +Many of the options available with \code{\link{createMrBayesTipCalibrations}} +are available with this function, allowing users to choose between fixed +calibrations or uniform priors that approximate stratigraphic uncertainty. In addition, the user may also supply a path to a text file -presumed to be a NEXUS file containing character data formatted for use with \emph{MrBayes}. - +presumed to be a NEXUS file containing character +data formatted for use with \emph{MrBayes}. The taxa listed in \code{tipTimes} must match the taxa in \code{treeConstraints}, if such is supplied. If supplied, the taxa in \code{outgroupTaxa} @@ -249,8 +256,6 @@ patterns of ancestor-descendant relationships in \emph{cal3}, than in tip-dating } \examples{ -# let's do some examples - # load retiolitid dataset data(retiolitinae) @@ -339,8 +344,9 @@ in fossil Canidae: the importance of tree priors. \emph{Biology Letters} 12(8). The rationale behind the two alternative morphological models are described in more detail here: -Bapst, D. W., H. A. Schreiber, and S. J. Carlson. In press. Combined analysis of extant Rhynchonellida -(Brachiopoda) using morphological and molecular data. \emph{Systematic Biology} doi: 10.1093/sysbio/syx049 +Bapst, D. W., H. A. Schreiber, and S. J. Carlson. 2018. +Combined Analysis of Extant Rhynchonellida (Brachiopoda) using Morphological +and Molecular Data. \emph{Systematic Biology} 67(1):32-48. doi: 10.1093/sysbio/syx049 } \seealso{ This function wraps various aspects of the functions \code{\link{createMrBayesConstraints}} diff --git a/man/exhaustionFunctions.Rd b/man/exhaustionFunctions.Rd index a058c6a3..143ee069 100644 --- a/man/exhaustionFunctions.Rd +++ b/man/exhaustionFunctions.Rd @@ -18,7 +18,8 @@ charExhaustPlot(exhaustion_info, changesType, xlab = "Total Characters", ylab = NULL, main = NULL, xsize = 3) } \arguments{ -\item{phyloTree}{A phylogenetic tree of class \code{phylo} as used by package \code{ape}.} +\item{phyloTree}{A phylogenetic tree of class \code{phylo} +as used by package \code{ape}.} \item{charData}{A \code{data.frame} of morphological character codings (a morphological 'matrix'), with taxon units @@ -42,8 +43,7 @@ same number of taxa (rows) as in character coding value, by default \code{"?"}.} \item{inapplicableValue}{The string value indicating an -inapplicable character coding value, by default \code{"-"}. -the extraction of this is automated if you use .} +inapplicable character coding value, by default \code{"-"}.} \item{exhaustion_info}{The list of results output from function \code{accioExhaustionCurve}.} @@ -60,8 +60,10 @@ modeling the size and distribution of ideal for modeling the size and distribution of \emph{character} space).} -\item{models}{A vector of type \code{character} naming models to be fit. -Default is \code{c("exponential","gamma","lognormal","zipf")}.} +\item{models}{A vector of type \code{character} +naming various models to be fit. +The default fits the models \code{"exponential"}, \code{"gamma"}, +\code{"lognormal"}, and \code{"zipf"}.} \item{xlab}{Label for the X axis; \code{"Total Characters"} by default.} diff --git a/man/fixRootTime.Rd b/man/fixRootTime.Rd index a1248cba..825ed5e7 100644 --- a/man/fixRootTime.Rd +++ b/man/fixRootTime.Rd @@ -24,9 +24,9 @@ not greater than the new \code{$root.time} appended to the output tree.} \item{fixingMethod}{must be an character value, with a length of 1. -If \code{fixingMethod = "matchCladeTransferNodeAge"}, -the default option, the -\code{$root.time} of the new tree is determined by + +The default option \code{fixingMethod = "matchCladeTransferNodeAge"}, +will determine the \code{$root.time} of the new tree by comparing the clades of taxa between the two input trees. The new root age assigned is the age of (\emph{1}) the \code{treeOrig} clade that contains \emph{all} @@ -34,6 +34,7 @@ taxa present in \code{treeNew} and, if the set of (1) contains multiple clades, (\emph{2}) the clade in the first set that contains the fewest taxa not in \code{treeNew}. + If \code{fixingMethod = "rescaleUsingTipToRootDist"}, the \code{root.time} assigned to \code{treeNew} is the \code{$root.time} of \code{treeOrig}, adjusted diff --git a/man/multiDiv.Rd b/man/multiDiv.Rd index af98ab33..250a7da1 100644 --- a/man/multiDiv.Rd +++ b/man/multiDiv.Rd @@ -124,50 +124,96 @@ the standard error. \examples{ set.seed(444) -record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, - nTotalTaxa = c(30,40), nExtant = 0) +record <- simFossilRecord( + p = 0.1, q = 0.1, + nruns = 1, + nTotalTaxa = c(30,40), + nExtant = 0) taxa <- fossilRecord2fossilTaxa(record) rangesCont <- sampleRanges(taxa, r = 0.5) rangesDisc <- binTimeData(rangesCont, int.length = 1) cladogram <- taxa2cladogram(taxa, plot = TRUE) #using multiDiv with very different data types -ttree <- timePaleoPhy(cladogram, rangesCont, type = "basic", add.term = TRUE, plot = FALSE) +ttree <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + add.term = TRUE, + plot = FALSE + ) + input <- list(rangesCont, rangesDisc, ttree) multiDiv(input, plot = TRUE) #using fixed interval times -multiDiv(input, int.times = rangesDisc[[1]], plot = TRUE) +multiDiv(input, + int.times = rangesDisc[[1]], + plot = TRUE) #using multiDiv with samples of trees -ttrees <- timePaleoPhy(cladogram, rangesCont, type = "basic", - randres = TRUE, ntrees = 10, add.term = TRUE) +ttrees <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + randres = TRUE, + ntrees = 10, + add.term = TRUE + ) + multiDiv(ttrees) -#uncertainty in diversity history is solely due to - #the random resolution of polytomies + +# uncertainty in diversity history is solely due to + # the random resolution of polytomies -#multiDiv can also take output from simFossilRecord, via fossilRecord2fossilTaxa -#what do many simulations run under some set of conditions 'look' like on average? +################ +# multiDiv can also take output from simFossilRecord + # via fossilRecord2fossilTaxa + +# what do many simulations run under some set of + # conditions 'look' like on average? set.seed(444) -records <- simFossilRecord(p = 0.1, q = 0.1, nruns = 10, - totalTime = 30, plot = TRUE) -taxa <- sapply(records,fossilRecord2fossilTaxa) +records <- simFossilRecord( + p = 0.1, + q = 0.1, + nruns = 10, + totalTime = 30, + plot = TRUE + ) + +taxa <- sapply(records, fossilRecord2fossilTaxa) + multiDiv(taxa) #increasing cone of diversity! -#Even better on a log scale: + +#Its even better on a log scale: multiDiv(taxa, plotLogRich = TRUE) +####################################### #pure-birth example with simFossilRecord #note that conditioning is tricky + set.seed(444) -recordsPB <- simFossilRecord(p = 0.1, q = 0, nruns = 10, - totalTime = 30,plot = TRUE) -taxaPB <- sapply(recordsPB,fossilRecord2fossilTaxa) -multiDiv(taxaPB,plotLogRich = TRUE) +recordsPB <- simFossilRecord( + p = 0.1, + q = 0, + nruns = 10, + totalTime = 30, + plot = TRUE + ) + +taxaPB <- sapply(recordsPB, fossilRecord2fossilTaxa) +multiDiv(taxaPB, plotLogRich = TRUE) #compare many discrete diversity curves -discreteRanges <- lapply(taxa,function(x) - binTimeData(sampleRanges(x, r = 0.5, - min.taxa = 1), int.length = 7)) +discreteRanges <- lapply(taxa, function(x) + binTimeData( + sampleRanges(x, + r = 0.5, + min.taxa = 1 + ), + int.length = 7) + ) + multiDiv(discreteRanges) layout(1) diff --git a/man/occData2timeList.Rd b/man/occData2timeList.Rd index e1b941f3..b4da2a24 100644 --- a/man/occData2timeList.Rd +++ b/man/occData2timeList.Rd @@ -17,8 +17,10 @@ the 'pbdb' vocab or 'com' (compact) vocab for early and late age bounds.} "zoneOverlap". Please see details below.} } \value{ -Returns a standard timeList data object, as used by many other paleotree functions, like -\code{\link{bin_timePaleoPhy}}, \code{\link{bin_cal3TimePaleoPhy}} and \code{\link{taxicDivDisc}} +Returns a standard timeList data object, as used by +many other paleotree functions, like +\code{\link{bin_timePaleoPhy}}, \code{\link{bin_cal3TimePaleoPhy}} +and \code{\link{taxicDivDisc}} } \description{ This function converts occurrence data, given as a list where each element @@ -76,13 +78,20 @@ endpoints of a taxon's range without ignoring uncertainty from any particular se \examples{ data(graptPBDB) -graptOccSpecies <- taxonSortPBDBocc(graptOccPBDB,rank = "species",onlyFormal = FALSE) +graptOccSpecies <- taxonSortPBDBocc( + data = graptOccPBDB, + rank = "species", + onlyFormal = FALSE) graptTimeSpecies <- occData2timeList(occList = graptOccSpecies) head(graptTimeSpecies[[1]]) head(graptTimeSpecies[[2]]) -graptOccGenus <- taxonSortPBDBocc(graptOccPBDB,rank = "genus",onlyFormal = FALSE) +graptOccGenus <- taxonSortPBDBocc( + data = graptOccPBDB, + rank = "genus", + onlyFormal = FALSE + ) graptTimeGenus <- occData2timeList(occList = graptOccGenus) layout(1:2) @@ -91,47 +100,53 @@ taxicDivDisc(graptTimeGenus) # the default interval calculation is "dateRange" # let's compare to the other option, "occRange" - # for species + # but now for graptolite *species* -graptOccRange <- occData2timeList(occList = graptOccSpecies, intervalType = "occRange") +graptOccRange <- occData2timeList( + occList = graptOccSpecies, + intervalType = "occRange" + ) #we would expect no change in the diversity curve - #because there are only changes in th - #earliest bound for the FAD - #latest bound for the LAD + #because there are only changes in th + #earliest bound for the FAD + #latest bound for the LAD #so if we are depicting ranges within maximal bounds - #dateRanges has no effect + #dateRanges has no effect layout(1:2) taxicDivDisc(graptTimeSpecies) taxicDivDisc(graptOccRange) -#yep, identical +#yep, identical! #so how much uncertainty was gained by using dateRange? -# write a simple function for getting uncertainty in first and last - # appearance dates from a timeList object +# write a function for getting uncertainty in first and last + # appearance dates from a timeList object sumAgeUncert <- function(timeList){ - fourDate <- timeList2fourDate(timeList) - perOcc <- (fourDate[,1]-fourDate[,2])+(fourDate[,3]-fourDate[,4]) - sum(perOcc) - } + fourDate <- timeList2fourDate(timeList) + perOcc <- (fourDate[,1] - fourDate[,2]) + + (fourDate[,3] - fourDate[,4]) + sum(perOcc) + } #total amount of uncertainty in occRange dataset sumAgeUncert(graptOccRange) #total amount of uncertainty in dateRange dataset sumAgeUncert(graptTimeSpecies) #the difference -sumAgeUncert(graptOccRange)-sumAgeUncert(graptTimeSpecies) +sumAgeUncert(graptOccRange) - sumAgeUncert(graptTimeSpecies) #as a proportion -1-(sumAgeUncert(graptTimeSpecies)/sumAgeUncert(graptOccRange)) +1 - (sumAgeUncert(graptTimeSpecies) / sumAgeUncert(graptOccRange)) #a different way of doing it -dateChange <- timeList2fourDate(graptTimeSpecies)-timeList2fourDate(graptOccRange) -apply(dateChange,2,sum) +dateChange <- timeList2fourDate(graptTimeSpecies) - + timeList2fourDate(graptOccRange) +apply(dateChange, 2, sum) #total amount of uncertainty removed by dateRange algorithm sum(abs(dateChange)) layout(1) + } \seealso{ Occurrence data as commonly used with \code{paleotree} functions can diff --git a/man/parentChild2TaxonTree.Rd b/man/parentChild2TaxonTree.Rd index 008ae54c..b23eedf1 100644 --- a/man/parentChild2TaxonTree.Rd +++ b/man/parentChild2TaxonTree.Rd @@ -27,10 +27,12 @@ is done and users should beware, as such trees can lead to hard-crashes of R.} } \value{ -A phylogeny of class 'phylo', with tip taxa as controlled by argument \code{tipSet}. +A phylogeny of class 'phylo', with tip taxa as +controlled by argument \code{tipSet}. The output tree is returned with no edge lengths. -The names of higher taxa than the tips should be appended as the element $node.label for the internal nodes. +The names of higher taxa than the tips should be appended +as the element $node.label for the internal nodes. } \description{ This function takes a two-column matrix of taxon names, @@ -49,64 +51,84 @@ is printed. \examples{ #let's create a small, really cheesy example -pokexample <- rbind(cbind("Squirtadae",c("Squirtle","Blastoise","Wartortle")), - c("Shelloidea","Lapras"),c("Shelloidea","Squirtadae"), - c("Pokezooa","Shelloidea"),c("Pokezooa","Parasect"), - c("Rodentapokemorpha","Linoone"),c("Rodentapokemorpha","Sandshrew"), - c("Rodentapokemorpha","Pikachu"),c("Hirsutamona","Ursaring"), - c("Hirsutamona","Rodentapokemorpha"),c("Pokezooa","Hirsutamona")) +pokexample <- rbind( + cbind("Squirtadae", c("Squirtle","Blastoise","Wartortle")), + c("Shelloidea","Lapras"), c("Shelloidea","Squirtadae"), + c("Pokezooa","Shelloidea"), c("Pokezooa","Parasect"), + c("Rodentapokemorpha","Linoone"), c("Rodentapokemorpha","Sandshrew"), + c("Rodentapokemorpha","Pikachu"), c("Hirsutamona","Ursaring"), + c("Hirsutamona","Rodentapokemorpha"), c("Pokezooa","Hirsutamona") + ) #Default: tipSet = 'nonParents' -pokeTree <- parentChild2taxonTree(pokexample, tipSet = "nonParents") -plot(pokeTree);nodelabels(pokeTree$node.label) +pokeTree <- parentChild2taxonTree( + parentChild = pokexample, + tipSet = "nonParents") +plot(pokeTree) +nodelabels(pokeTree$node.label) #Get ALL taxa as tips with tipSet = 'all' -pokeTree <- parentChild2taxonTree(pokexample, tipSet = "all") -plot(pokeTree);nodelabels(pokeTree$node.label) +pokeTree <- parentChild2taxonTree( + parentChild = pokexample, + tipSet = "all") +plot(pokeTree) +nodelabels(pokeTree$node.label) \dontrun{ -# let's try a dataset where not all the taxon relationships lead to a common root +# let's try a dataset where not all the + # taxon relationships lead to a common root -pokexample_bad <- rbind(cbind("Squirtadae",c("Squirtle","Blastoise","Wartortle")), - c("Shelloidea","Lapras"),c("Shelloidea","Squirtadae"), - c("Pokezooa","Shelloidea"),c("Pokezooa","Parasect"), - c("Rodentapokemorpha","Linoone"),c("Rodentapokemorpha","Sandshrew"), - c("Rodentapokemorpha","Pikachu"),c("Hirsutamona","Ursaring"), - c("Hirsutamona","Rodentapokemorpha"),c("Pokezooa","Hirsutamona"), - c("Umbrarcheota","Gengar")) +pokexample_bad <- rbind( + cbind("Squirtadae", c("Squirtle","Blastoise","Wartortle")), + c("Shelloidea","Lapras"), c("Shelloidea","Squirtadae"), + c("Pokezooa","Shelloidea"), c("Pokezooa","Parasect"), + c("Rodentapokemorpha","Linoone"), c("Rodentapokemorpha","Sandshrew"), + c("Rodentapokemorpha","Pikachu"), c("Hirsutamona","Ursaring"), + c("Hirsutamona","Rodentapokemorpha"), c("Pokezooa","Hirsutamona"), + c("Umbrarcheota","Gengar") + ) -#this should return an error, as Gengar doesn't share common root -pokeTree <- parentChild2taxonTree(pokexample_bad) +# this should return an error + # as Gengar doesn't share common root +pokeTree <- parentChild2taxonTree(parentChild = pokexample_bad) # another example, where a taxon is listed as both parent and child -pokexample_bad2 <- rbind(cbind("Squirtadae",c("Squirtle","Blastoise","Wartortle")), - c("Shelloidea",c("Lapras","Squirtadae","Shelloidea")), - c("Pokezooa","Shelloidea"),c("Pokezooa","Parasect"), - c("Rodentapokemorpha","Linoone"),c("Rodentapokemorpha","Sandshrew"), - c("Rodentapokemorpha","Pikachu"),c("Hirsutamona","Ursaring"), - c("Hirsutamona","Rodentapokemorpha"),c("Pokezooa","Hirsutamona"), - c("Umbrarcheota","Gengar")) +pokexample_bad2 <- rbind( + cbind("Squirtadae", c("Squirtle","Blastoise","Wartortle")), + c("Shelloidea", c("Lapras","Squirtadae","Shelloidea")), + c("Pokezooa","Shelloidea"), c("Pokezooa","Parasect"), + c("Rodentapokemorpha","Linoone"), c("Rodentapokemorpha","Sandshrew"), + c("Rodentapokemorpha","Pikachu"), c("Hirsutamona","Ursaring"), + c("Hirsutamona","Rodentapokemorpha"), c("Pokezooa","Hirsutamona"), + c("Umbrarcheota","Gengar") + ) #this should return an error, as Shelloidea is its own parent -pokeTree <- parentChild2taxonTree(pokexample_bad2) +pokeTree <- parentChild2taxonTree(parentChild = pokexample_bad2) } -# note that we should even be able to do this with ancestor-descendent pairs from - # simulated datasets from simFossilRecord, like so: +# note that we should even be able to do this + # with ancestor-descendent pairs from + # simulated datasets from simFossilRecord, like so: set.seed(444) -record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, - nTotalTaxa = c(30,40), nExtant = 0) +record <- simFossilRecord( + p = 0.1, q = 0.1, nruns = 1, + nTotalTaxa = c(30, 40), + nExtant = 0 + ) taxa <- fossilRecord2fossilTaxa(record) -# need to reorder the columns so parents (ancestors) first, then children -parentChild2taxonTree(taxa[,2:1]) -# now note that it issues a warning that the input wasn't type character - # and it will be coerced to be such +# need to reorder the columns so parents + # (ancestors) first, then children +parentChild2taxonTree(parentChild = taxa[,2:1]) +# now note that it issues a warning that + # the input wasn't type character + # and it will be coerced to be such } \seealso{ diff --git a/man/pqr2Ps.Rd b/man/pqr2Ps.Rd index 81f6c2fe..4deb3873 100644 --- a/man/pqr2Ps.Rd +++ b/man/pqr2Ps.Rd @@ -62,26 +62,50 @@ solution is the default. As it would be very simple for any user to look this up in the code anyway, here's the unpublished equation for the exact solution: \eqn{Ps = 1-(((p+q+r)-(sqrt(((p+q+r)^2)-(4*p*q))))/(2*p))} + +The above exact solution was independent derived and published by Dider et al. (2017). +Also see Wagner (2019) for additional discussion of this value and its importance for +understanding the timing of branching events in an imperfect fossil record. } \examples{ #with exact solution -pqr2Ps(p = 0.1,q = 0.1,r = 0.1,useExact = TRUE) +pqr2Ps( + p = 0.1, + q = 0.1, + r = 0.1, + useExact = TRUE + ) + #with inexact solution -pqr2Ps(p = 0.1,q = 0.1,r = 0.1,useExact = TRUE) +pqr2Ps( + p = 0.1, + q = 0.1, + r = 0.1, + useExact = TRUE + ) + } \references{ -Bapst, D. W., E. A. King and M. W. Pennell. In prep. Probability models +Bapst, D. W., E. A. King and M. W. Pennell. Unpublished. Probability models for branch lengths of paleontological phylogenies. Bapst, D. W. 2013. A stochastic rate-calibrated method for time-scaling phylogenies of fossil taxa. \emph{Methods in Ecology and Evolution}. 4(8):724-733. + +Didier, G., M. Fau, and M. Laurin. 2017. Likelihood of Tree Topologies +with Fossils and Diversification Rate Estimation. \emph{Systematic Biology} +66(6):964-987. + +Wagner, P. J. 2019. On the probabilities of branch durations and +stratigraphic gaps in phylogenies of fossil taxa when rates of +diversification and sampling vary over time. \emph{Paleobiology} 45(1):30-55. } \seealso{ \code{\link{SamplingConv}} } \author{ -This function is entirely the product of a joint effort between the package author +This function is entirely the product of a joint unpublished effort between the package author (David W. Bapst), Emily A. King and Matthew W. Pennell. In particular, Emily King solved a nasty bit of calculus to get the inexact solution and later re-derived the function with a quadratic methodology to get the exact solution. Some elements diff --git a/man/simFossilRecord.Rd b/man/simFossilRecord.Rd index 1a56f1aa..ff261ed4 100644 --- a/man/simFossilRecord.Rd +++ b/man/simFossilRecord.Rd @@ -13,29 +13,43 @@ simFossilRecord(p, q, r = 0, anag.rate = 0, prop.bifurc = 0, sortNames = FALSE, plot = FALSE) } \arguments{ -\item{p, q, r, anag.rate}{These parameters control the instantaneous ('per-capita') rates of branching, extinction, -sampling and anagenesis, respectively. These can be given as a number equal to or greater than zero, or as a -character string which will be interpreted as an algebraic equation. These equations can make use of three -quantities which will/may change throughout the simulation: the standing richness is \code{N}, the -current time passed since the start of the simulation is \code{T}, the present duration of a given still-living +\item{p, q, r, anag.rate}{These parameters control the instantaneous +('per-capita') rates of branching, extinction, +sampling and anagenesis, respectively. These can be given as a number +equal to or greater than zero, or as a +character string which will be interpreted as an algebraic equation. +These equations can make use of three +quantities which will/may change throughout the simulation: the +standing richness is \code{N}, the current time passed since the start +of the simulation is \code{T}, the present duration of a given still-living lineage since its origination time is code{D}, and the current branching rate is \code{P} (corresponding to the argument name \code{p}). Note that \code{P} cannot be used in equations for the branching rate itself; it is for making other rates relative to the branching rate. -By default, the rates \code{r} and \code{anag.rate} are set to zero, so that the default simulator is a birth-death -simulator. -Rates set to \code{ = Inf} are treated as if 0. When a rate is set to 0, this event type will not occur in the simulation. -Setting certain processes to zero, like sampling, may increase simulation efficiency, if the goal is a birth-death or + +By default, the rates \code{r} and \code{anag.rate} are set to zero, +so that the default simulator is a birth-death simulator. +Rates set to \code{ = Inf} are treated as if 0. When a rate is set +to 0, this event type will not occur in the simulation. +Setting certain processes to zero, like sampling, may increase +simulation efficiency, if the goal is a birth-death or pure-birth model. -See documentation for argument \code{negRatesAsZero} about the treatment of rates that decrease below zero. +See documentation for argument \code{negRatesAsZero} about the +treatment of rates that decrease below zero. Notation of branching, extinction and sampling rates as \code{p, q, r} -follows what is typical for the paleobiology literature (e.g. Foote, 1997), not the Greek letters \code{lambda, mu, phi} -found more typically in the biological literature (e.g. Stadler, 2009; Heath et al., 2014; Gavryushkina et al., 2014).} - -\item{prop.cryptic, prop.bifurc}{These parameters control (respectively) the proportion of branching events that have -morphological differentiation, versus those that are cryptic (\code{prop.cryptic}) and the proportion of morphological -branching events that are bifurcating, as opposed to budding. Both of these proportions must be a number between 0 and 1. -By default, both are set to zero, meaning all branching events are events of budding cladogenesis. See description of +follows what is typical for the paleobiology literature +(e.g. Foote, 1997), not the Greek letters \code{lambda, mu, phi} +found more typically in the biological literature (e.g. Stadler, 2009; +Heath et al., 2014; Gavryushkina et al., 2014).} + +\item{prop.cryptic, prop.bifurc}{These parameters control +(respectively) the proportion of branching events that have +morphological differentiation, versus those that are cryptic +(\code{prop.cryptic}) and the proportion of morphological +branching events that are bifurcating, as opposed to budding. +Both of these proportions must be a number between 0 and 1. +By default, both are set to zero, meaning all branching events +are events of budding cladogenesis. See description of the available models of morphological differentiation in the \emph{Description} section.} \item{modern.samp.prob}{The probability that a taxon is sampled at the modern time @@ -43,42 +57,61 @@ the available models of morphological differentiation in the \emph{Description} slice). Must be a number between 0 and 1. If 1, all taxa that survive to the modern day (to the \code{sliceTime}) are sampled, if 0, none are.} -\item{startTaxa}{Number of initial taxa to begin a simulation with. All will have the simulation start date +\item{startTaxa}{Number of initial taxa to begin a simulation with. +All will have the simulation start date listed as their time of origination.} -\item{nruns}{Number of simulation datasets to accept, save and output. If \code{nruns = 1}, output will be a single -object of class \code{fossilRecordSimulation}, and if \code{nruns} is greater than 1, a list will be output composed of +\item{nruns}{Number of simulation datasets to accept, save and output. +If \code{nruns = 1}, output will be a single +object of class \code{fossilRecordSimulation}, and +if \code{nruns} is greater than 1, a list will be output composed of \code{nruns} objects of class \code{fossilRecordSimulation}.} \item{maxAttempts}{Number of simulation attempts allowed before the simulation process is halted with an error message. Default is \code{Inf}.} -\item{totalTime, nTotalTaxa, nExtant, nSamp}{These arguments represent stopping and -acceptance conditions for simulation runs. They are respectively \code{totalTime}, the -total length of the simulation in time-units, \code{nTotalTaxa}, the total number of taxa -over the past evolutionary history of the clade, \code{nExtant}, the total number of extant taxa at -the end of the simulation and \code{nSamp} the total number of sampled taxa (not counting extant -taxa sampled at the modern day). These are used to determine when to end simulation runs, and whether to accept -or reject them as output. They can be input as a vector of two numbers, representing minimum -and maximum values of a range for accepted simulation runs (i.e. the simulation length can be between 0 and -1000 time-steps, by default), or as a single number, representing a point condition (i.e. if -\code{nSamp = 100} then the only simulation runs with exactly 100 taxa sampled will be output). -Note that it is easy to set combinations of parameters and run conditions that are impossible -to produce satisfactory input under, in which case \code{simFossilRecord} would run in a nonstop loop. -How cryptic taxa are counted for the sake of these conditions is controlled by argument \code{count.cryptic}.} - -\item{tolerance}{A small number which defines a tiny interval for the sake of placing run-sampling dates before events and +\item{totalTime, nTotalTaxa, nExtant, nSamp}{These arguments represent +stopping and acceptance conditions for simulation runs. They are +respectively \code{totalTime}, the total length of the simulation +in time-units, \code{nTotalTaxa}, the total number of taxa over the +past evolutionary history of the clade, \code{nExtant}, the total +number of extant taxa at the end of the simulation and \code{nSamp} +the total number of sampled taxa (not counting extant taxa sampled +at the modern day). These are used to determine when to end simulation +runs, and whether to accept or reject them as output. They can be input +as a vector of two numbers, representing minimum and maximum values of +a range for accepted simulation runs (i.e. the simulation length can be +between 0 and 1000 time-steps, by default), or as a single number, +representing a point condition (i.e. if \code{nSamp = 100} then the only +simulation runs with exactly 100 taxa sampled will be output). Note that it +is easy to set combinations of parameters and run conditions that are +impossible to produce satisfactory input under, in which case +\code{simFossilRecord} would run in a nonstop loop. +How cryptic taxa are counted for the sake of these +conditions is controlled by argument \code{count.cryptic}.} + +\item{tolerance}{A small number which defines a tiny interval for +the sake of placing run-sampling dates before events and for use in determining whether a taxon is extant in simFossilRecordMethods.} -\item{maxStepTime}{When rates are time-dependent (i.e. when parameters 'D' or 'T' are used in equations input for one of -the four rate arguments), then protocol used by \code{simFossilRecord} of drawing waiting times to the next event could -produce a serious mismatch of resulting process to the defined model, because the possibility of new events is only -considered at the end of these waiting times. Instead, any time a waiting time greater than \code{maxStepTime} is -selected, then instead \emph{no} event occurs and a time-step equal to \code{maxStepTime} occurs instead, thus effectively -discretizing the progression of time in the simulations run by \code{simFossilRecord}. Decreasing this value will increase -accuracy (as the time-scale is effectively more discretized) but increase computation time, as the computer will need -to stop and check rates to see if an event happened more often. Users should toggle this value relative to the time-dependent -rate equations they input, relative to the rate of change in rates expected in time-dependent rates.} +\item{maxStepTime}{When rates are time-dependent (i.e. when +parameters 'D' or 'T' are used in equations input for one of +the four rate arguments), then protocol used by +\code{simFossilRecord} of drawing waiting times to the next event could +produce a serious mismatch of resulting process to the defined model, +because the possibility of new events is only +considered at the end of these waiting times. Instead, any +time a waiting time greater than \code{maxStepTime} is +selected, then instead \emph{no} event occurs and a +time-step equal to \code{maxStepTime} occurs instead, thus effectively +discretizing the progression of time in the simulations +run by \code{simFossilRecord}. Decreasing this value will increase +accuracy (as the time-scale is effectively more discretized) +but increase computation time, as the computer will need +to stop and check rates to see if an event happened more often. +Users should toggle this value relative to the time-dependent +rate equations they input, relative to the rate of change in +rates expected in time-dependent rates.} \item{shiftRoot4TimeSlice}{Should the dating of events be shifted, so that the date given for \code{sliceTime} is now 0, or should the dates not be shifted, @@ -93,9 +126,12 @@ conditioning limits that count a number of taxon units, such as \code{nTotalTaxa complex (i.e. each distinguishable morphotaxon) is treated as a single taxon. See examples.} -\item{negRatesAsZero}{A logical. Should rates calculated as a negative number cause the simulation to fail -with an error message (\code{ = FALSE}) or should these be treated as zero (\code{" = TRUE"}, the default). This -is equivalent to saying that the \code{rate.as.used = } \code{max(0, rate.as.given)}.} +\item{negRatesAsZero}{A logical. Should rates calculated as a +negative number cause the simulation to fail +with an error message (\code{ = FALSE}) or should these be +treated as zero (\code{" = TRUE"}, the default). This +is equivalent to saying that +the \code{rate.as.used = } \code{max(0, rate.as.given)}.} \item{print.runs}{If TRUE, prints the proportion of simulations accepted for output to the terminal.} @@ -148,9 +184,11 @@ simulations is always decreasing toward the modern; i.e. an absolute date of 50 means a point in time which is 50 time-units before the present-day, if the present-day is zero (the default, but see argument \code{shiftRoot4TimeSlice}). -Each individual element of a \code{fossilRecordSimulation} list object is named, generally of -the form "t1" and "t2", where the number is the \code{taxon.id}. Cryptic taxa are instead -named in the form of "t1.2" and "t5.3", where the first number is the taxon which they are a +Each individual element of a \code{fossilRecordSimulation} list object +is named, generally of the form "t1" and "t2", +where the number is the \code{taxon.id}. +Cryptic taxa are instead named in the form of "t1.2" and "t5.3", +where the first number is the taxon which they are a cryptic descendant of (\code{looks.like}) and the second number, after the period, is the order of appearance of lineage units in that cryptic complex. For example, for "t5.3", the first number is the \code{taxon.id} and the second number communicates @@ -289,69 +327,129 @@ then the \emph{start-time} of the run will always be this maximum value for set.seed(2) -# quick birth-death-sampling run with 1 run, 50 taxa +# quick birth-death-sampling run + # with 1 run, 50 taxa -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - nTotalTaxa = 50, plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nruns = 1, + nTotalTaxa = 50, + plot = TRUE + ) \donttest{ # examining multiple runs of simulations -#example of repeated pure birth simulations over 50 time-units -records <- simFossilRecord(p = 0.1, q = 0, nruns = 10, - totalTime = 50, plot = TRUE) -#plot multiple diversity curves on a log scale -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE,plotLogRich = TRUE) -#histogram of total number of taxa -hist(sapply(records,nrow)) - -#example of repeated birth-death-sampling simulations over 50 time-units -records <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 10, - totalTime = 50, plot = TRUE) -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE) - -#like above, but conditioned instead on having 10 extant taxa - # between 1 and 100 time-units +# example of repeated pure birth simulations over 50 time-units +records <- simFossilRecord( + p = 0.1, + q = 0, + nruns = 10, + totalTime = 50, + plot = TRUE + ) + +# plot multiple diversity curves on a log scale +records <- lapply(records, + fossilRecord2fossilTaxa) +multiDiv(records, + plotMultCurves = TRUE, + plotLogRich = TRUE + ) + +# histogram of total number of taxa +hist(sapply(records, nrow)) + + +############################################## +# example of repeated birth-death-sampling + # simulations over 50 time-units +records <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + nruns = 10, + totalTime = 50, + plot = TRUE) + +records <- lapply(records, + fossilRecord2fossilTaxa) + +multiDiv(records, + plotMultCurves = TRUE) + +# like above... + # but conditioned instead on having 10 extant taxa + # between 1 and 100 time-units + set.seed(4) -records <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 10, - totalTime = c(1,300), nExtant = 10, plot = TRUE) -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE) + +records <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nruns = 10, + totalTime = c(1,300), + nExtant = 10, + plot = TRUE + ) + +records <- lapply(records, + fossilRecord2fossilTaxa) + +multiDiv(records, + plotMultCurves = TRUE + ) ################################################ # How probable were the runs I accepted? - # The effect of conditions + # The effect of conditions set.seed(1) # Let's look at an example of a birth-death process - # with high extinction relative to branching + # with high extinction relative to branching # use default run conditions (barely any conditioning) # use print.runs to look at acceptance probability -records <- simFossilRecord(p = 0.1, q = 0.8, nruns = 10, - print.runs = TRUE, plot = TRUE) + +records <- simFossilRecord( + p = 0.1, + q = 0.8, + nruns = 10, + print.runs = TRUE, + plot = TRUE + ) + # 10 runs accepted from a total of 10 ! # now let's give much more stringent run conditions - # require 3 extant taxa at minimum, 5 taxa total minimum -records <- simFossilRecord(p = 0.1, q = 0.8, nruns = 10, - nExtant = c(3,100), nTotalTaxa = c(5,100), - print.runs = TRUE, plot = TRUE) + # require 3 extant taxa at minimum, 5 taxa total minimum + +records <- simFossilRecord( + p = 0.1, + q = 0.8, + nruns = 10, + nExtant = c(3,100), + nTotalTaxa = c(5,100), + print.runs = TRUE, + plot = TRUE + ) + # thousands of simulations to just obtail 10 accepable runs! - # most ended in extinction before minimums were hit + # most ended in extinction before minimums were hit # beware analysis of simulated where acceptance conditions - # are too stringent: your data will be a 'special case' - # of the simulation parameters + # are too stringent: your data will be a 'special case' + # of the simulation parameters # it will also take you a long time to generate reasonable - # numbers of replicates for whatever analysis you are doing + # numbers of replicates for whatever analysis you are doing # TLDR: You should look at print.runs = TRUE ###################### +############################## +###################################### # Using the rate equation-input for complex diversification models @@ -360,75 +458,97 @@ records <- simFossilRecord(p = 0.1, q = 0.8, nruns = 10, # first, let's write the rate equation # We'll use the diversity dependent rate equation model - # from Ettienne et al. 2012 as an example here - # Under this equation, p = q at carrying capacity K + # from Ettienne et al. 2012 as an example here + # Under this equation, p = q at carrying capacity K # Many others are possible! # Note that we don't need to use max(0,rate) as negative rates - # are converted to zero by default, as controlled by - # the argument negRatesAsZero + # are converted to zero by default, as controlled by + # the argument negRatesAsZero # From Ettiene et al. -# lambda = lambda0 - (lambda0 - mu)*(n/K) + # lambda = lambda0 - (lambda0 - mu)*(n/K) # lambda and mu are branching rate and extinction rate - # lambda and mu == p and q in paleotree (i.e. Foote convention) + # lambda and mu == p and q in paleotree (i.e. Foote convention) # lambda0 is the branching rate at richness = 0 # K is the carrying capacity # n is the richness # 'N' is the algebra symbol for standing taxonomic richness - # for simFossilRecord's simulation capabilities + # for simFossilRecord's simulation capabilities # also branching rate cannot reference extinction rate # we'll have to set lambda0, mu and K in the rate equation directly -lambda0 <- 0.3 # branching rate at 0 richness in Ltu -K <- 40 # carrying capacity -mu <- 0.1 # extinction rate will 0.1 Ltu ( = 1/3 of lambda0) +lambda0 <- 0.3 # branching rate at 0 richness in Ltu +K <- 40 # carrying capacity +mu <- 0.1 # extinction rate will 0.1 Ltu ( = 1/3 of lambda0 ) # technically, mu here represents the lambda at richness = K - # i.e. lambdaK + # i.e. lambdaK # Ettienne et al. are just implicitly saying that the carrying capacity - # is the richness at which lambda == mu + # is the richness at which lambda == mu # construct the equation programmatically using paste0 -branchingRateEq <- paste0(lambda0,"-(",lambda0,"-",mu,")*(N/",K,")") +branchingRateEq <- paste0(lambda0, "-(", lambda0, "-", mu, ")*(N/", K, ")") # and take a look at it... branchingRateEq -# its a thing of beauty, folks +# its a thing of beauty, folks! # now let's try it -records <- simFossilRecord(p = branchingRateEq, q = mu, nruns = 3, - totalTime = 100, plot = TRUE, print.runs = TRUE) -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE) +records <- simFossilRecord( + p = branchingRateEq, + q = mu, + nruns = 3, + totalTime = 100, + plot = TRUE, + print.runs = TRUE + ) + +records <- lapply(records, + fossilRecord2fossilTaxa) + +multiDiv(records, + plotMultCurves = TRUE) + # those are some happy little diversity plateaus! # now let's do diversity-dependent extinction # let's slightly modify the model from Ettiene et al. -# mu = mu0 + (mu0 - muK)*(n/K) + # mu = mu0 + (mu0 - muK)*(n/K) -mu0 <- 0.001 # mu at n = 0 -muK <- 0.1 # mu at n = K (should be equal to lambda at K) -K <- 40 -lambda <- muK # equal to muK +mu0 <- 0.001 # mu at n = 0 +muK <- 0.1 # mu at n = K (should be equal to lambda at K) +K <- 40 # carrying capacity (like above) +lambda <- muK # equal to muK # construct the equation programmatically using paste0 -extRateEq <- paste0(mu0,"-(",mu0,"-",muK,")*(N/",K,")") +extRateEq <- paste0(mu0, "-(", mu0, "-", muK, ")*(N/" ,K, ")") extRateEq # now let's try it -records <- simFossilRecord(p = lambda, q = extRateEq, nruns = 3, - totalTime = 100, plot = TRUE, print.runs = TRUE) -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE) +records <- simFossilRecord( + p = lambda, + q = extRateEq, + nruns = 3, + totalTime = 100, + plot = TRUE, + print.runs = TRUE) + +records <- lapply(records, + fossilRecord2fossilTaxa) + +multiDiv(records, + plotMultCurves = TRUE) # these plateaus looks a little more spiky - #( maybe there is more turnover at K? ) + #( maybe there is more turnover at K? ) # also, it took a longer for the rapid rise to occur +####################################################### +############################### # Now let's try an example with time-dependent origination - # and extinction constrained to equal origination + # and extinction constrained to equal origination # Note! Use of time-dependent parameters "D" and "T" may # result in slower than normal simulation run times @@ -436,31 +556,53 @@ multiDiv(records,plotMultCurves = TRUE) # info for argument maxTimeStep above # First, let's define a time-dependent rate equation - # "T" is the symbol for time passed + # "T" is the symbol for time passed timeEquation <- "0.4-(0.007*T)" #in this equation, 0.4 is the rate at time = 0 - # and it will decrease by 0.007 with every time-unit - # at time = 50, the final rate will be 0.05 + # and it will decrease by 0.007 with every time-unit + # at time = 50, the final rate will be 0.05 # We can easily make it so extinction is always equal to branching rate # "P" is the algebraic equivalent for "branching rate" in simFossilRecord # now let's try it -records <- simFossilRecord(p = timeEquation, q = "P", nruns = 3, - totalTime = 50, plot = TRUE, print.runs = TRUE) -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE) +records <- simFossilRecord( + p = timeEquation, + q = "P", + nruns = 3, + totalTime = 50, + plot = TRUE, + print.runs = TRUE + ) + +records <- lapply(records, + fossilRecord2fossilTaxa) + +multiDiv(records, + plotMultCurves = TRUE) + # high variability that seems to then smooth out as turnover decreases # And duration what about duration-dependent processes? - # let's do a duration-dep extinction equation: + # let's do a duration-dep extinction equation: durDepExt <- "0.01+(0.01*D)" # okay, let's take it for a spin -records <- simFossilRecord(p = 0.1, q = durDepExt, nruns = 3, - totalTime = 50, plot = TRUE, print.runs = TRUE) -records <- lapply(records,fossilRecord2fossilTaxa) -multiDiv(records,plotMultCurves = TRUE) +records <- simFossilRecord( + p = 0.1, + q = durDepExt, + nruns = 3, + totalTime = 50, + plot = TRUE, + print.runs = TRUE + ) + +records <- lapply(records, + fossilRecord2fossilTaxa) + +multiDiv(records, + plotMultCurves = TRUE) + # creates runs full of short lived taxa # Some more stuff to do with rate formulae! @@ -477,28 +619,30 @@ record <- simFossilRecord( nTotalTaxa = 50, plot = TRUE) # Setting up specific time-variable rates can be laborious though - # e.g. one rate during this 10 unit interval, - # another during this interval, etc + # e.g. one rate during this 10 unit interval, + # another during this interval, etc # The problem is setting this up within a fixed function + +############################################################# # Worked Example # What if we want to draw a new rate from a - # lognormal distribution every 10 time units? + # lognormal distribution every 10 time units? # Need to randomly draw these rates *before* running simFossilTaxa # This means also that we will need to individually do each simFossilTaxa run - # since the rates are drawn outside of simFossilTaxa + # since the rates are drawn outside of simFossilTaxa # Get some reasonable log normal rates: rates <- 0.1+rlnorm(100,meanlog = 1,sdlog = 1)/100 # Now paste it into a formulae that describes a function that - # will change the rate output every 10 time units + # will change the rate output every 10 time units rateEquation <- paste0("c(",paste0(rates,collapse = ","),")[1+(T\%/\%10)]") # and let's run it record <- simFossilRecord(p = rateEquation, q = 0.1, r = 0.1, nruns = 1, - totalTime = c(30,40), plot = TRUE) + totalTime = c(30,40), plot = TRUE) ########################################################## @@ -507,78 +651,120 @@ record <- simFossilRecord(p = rateEquation, q = 0.1, r = 0.1, nruns = 1, # Some examples of varying the 'speciation modes' in simFossilRecord # The default is pure budding cladogenesis - # anag.rate = prop.bifurc = prop.cryptic = 0 + # anag.rate = prop.bifurc = prop.cryptic = 0 # let's just set those for the moment anyway record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0, - nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0, plot = TRUE) + anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0, + nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0, plot = TRUE) #convert and plot phylogeny - # note this will not reflect the 'budding' pattern - # branching events will just appear like bifurcation - # its a typical convention for phylogeny plotting + # note this will not reflect the 'budding' pattern + # branching events will just appear like bifurcation + # its a typical convention for phylogeny plotting converted <- fossilRecord2fossilTaxa(record) tree <- taxa2phylo(converted,plot = TRUE) #now, an example of pure bifurcation record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 1, prop.cryptic = 0, - nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) + anag.rate = 0, prop.bifurc = 1, prop.cryptic = 0, + nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) # all the short branches are due to ancestors that terminate - # via pseudoextinction at bifurcation events + # via pseudoextinction at bifurcation events # an example with anagenesis = branching -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0.1, prop.bifurc = 0, prop.cryptic = 0, - nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) -tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0.1, + prop.bifurc = 0, + prop.cryptic = 0, + nruns = 1, + nTotalTaxa = c(20,30), + nExtant = 0 + ) +tree <- taxa2phylo(fossilRecord2fossilTaxa(record), + plot = TRUE) # lots of pseudoextinction # an example with anagenesis, pure bifurcation -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0.1, prop.bifurc = 1, prop.cryptic = 0, - nruns = 1, nTotalTaxa = c(20,30) ,nExtant = 0) -tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0.1, + prop.bifurc = 1, + prop.cryptic = 0, + nruns = 1, + nTotalTaxa = c(20,30) , + nExtant = 0 + ) +tree <- taxa2phylo(fossilRecord2fossilTaxa(record), + plot = TRUE) # lots and lots of pseudoextinction # an example with half cryptic speciation -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, - nruns = 1, nTotalTaxa = c(20,30), nExtant = 0) -tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 0.5, + nruns = 1, + nTotalTaxa = c(20,30), + nExtant = 0 + ) +tree <- taxa2phylo(fossilRecord2fossilTaxa(record), + plot = TRUE) # notice that the tree has many more than the maximum of 30 tips: - # that's because the cryptic taxa are not counted as - # separate taxa by default, as controlled by count.cryptic + # that's because the cryptic taxa are not counted as + # separate taxa by default, as controlled by count.cryptic # an example with anagenesis, bifurcation, cryptic speciation -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0.1, prop.bifurc = 0.5, prop.cryptic = 0.5, - nruns = 1, nTotalTaxa = c(20,30), nExtant = 0) -tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0.1, + prop.bifurc = 0.5, + prop.cryptic = 0.5, + nruns = 1, + nTotalTaxa = c(20,30), + nExtant = 0 + ) +tree <- taxa2phylo(fossilRecord2fossilTaxa(record), + plot = TRUE) # note in this case, 50\% of branching is cryptic - # 25\% is bifurcation, 25\% is budding + # 25\% is bifurcation, 25\% is budding # an example with anagenesis, pure cryptic speciation - # morphotaxon identity will thus be entirely indep of branching! - # I wonder if this is what is really going on, sometimes... -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0.1, prop.bifurc = 0, prop.cryptic = 1, - nruns = 1, nTotalTaxa = c(20,30), nExtant = 0) -tree <- taxa2phylo(fossilRecord2fossilTaxa(record),plot = TRUE) + # morphotaxon identity will thus be entirely indep of branching! + # I wonder if this is what is really going on, sometimes... +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0.1, + prop.bifurc = 0, + prop.cryptic = 1, + nruns = 1, + nTotalTaxa = c(20,30), + nExtant = 0 + ) +tree <- taxa2phylo(fossilRecord2fossilTaxa(record), + plot = TRUE) # merging cryptic taxa when all speciation is cryptic set.seed(1) -record <- simFossilRecord(p = 0.1, - q = 0.1, r = 0.1, - prop.crypt = 1, - totalTime = 50, plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + prop.crypt = 1, + totalTime = 50, + plot = TRUE + ) + # there looks like there is only a single taxon, but... -length(record) #actual number of cryptic lineages +length(record) +#the above is the *actual* number of cryptic lineages -############# +###################################### +############################### # playing with count.cryptic with simulations of pure cryptic speciation @@ -586,100 +772,198 @@ length(record) #actual number of cryptic lineages #or total taxa including cryptic taxa with count.cryptic = FALSE # an example with pure cryptic speciation with count.cryptic = TRUE -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 1, - nruns = 1, totalTime = 50, nTotalTaxa = c(10,100), count.cryptic = TRUE) +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 1, + nruns = 1, + totalTime = 50, + nTotalTaxa = c(10,100), + count.cryptic = TRUE + ) tree <- taxa2phylo(fossilRecord2fossilTaxa(record)) -plot(tree);axisPhylo() -# notice how the tip labels indicate all are the same morphotaxon -# we'll replace the # of taxa constraints with a time constraint - # or else the count.cryptic = FALSE simulation will never end! +# plot the tree +plot(tree) +axisPhylo() +# notice how the tip labels indicate all are the same morphotaxon? +################# # an example with pure cryptic speciation with count.cryptic = FALSE -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 1, - nruns = 1, totalTime = 50, count.cryptic = FALSE) + # Need to be careful with this! +# We'll have to replace the # of taxa constraints with a time constraint + # or else the count.cryptic = FALSE simulation will never end! + +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 1, + nruns = 1, + totalTime = 50, + count.cryptic = FALSE + ) tree <- taxa2phylo(fossilRecord2fossilTaxa(record)) -plot(tree);axisPhylo() +# plot it +plot(tree) +axisPhylo() + +########################################### #let's look at numbers of taxa returned when varying count.cryptic - # with prop.cryptic = 0.5 + # with prop.cryptic = 0.5 #simple simulation going for 50 total taxa #first, count.cryptic = FALSE (default) -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, - nruns = 1, nTotalTaxa = 50, count.cryptic = FALSE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 0.5, + nruns = 1, + nTotalTaxa = 50, + count.cryptic = FALSE + ) taxa <- fossilRecord2fossilTaxa(record) -nrow(taxa) #number of lineages (inc. cryptic) -length(unique(taxa[,6])) #number of morph-distinguishable taxa -# and count.cryptic = TRUE -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, - nruns = 1, nTotalTaxa = 50, count.cryptic = TRUE) +#### Count the taxa/lineages ! +# number of lineages (inc. cryptic) +nrow(taxa) +# number of morph-distinguishable taxa +length(unique(taxa[,6])) + +################### +# Now let' try with count.cryptic = TRUE +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 0.5, + nruns = 1, + nTotalTaxa = 50, + count.cryptic = TRUE + ) taxa <- fossilRecord2fossilTaxa(record) -nrow(taxa) #number of lineages (inc. cryptic) -length(unique(taxa[,6])) #number of morph-distinguishable taxa + +### Count the taxa/lineages ! +# number of lineages (inc. cryptic) +nrow(taxa) +# number of morph-distinguishable taxa +length(unique(taxa[,6])) # okay... -# now let's try with 50 extant taxa -#first, count.cryptic = FALSE (default) -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, - nruns = 1, nExtant = 10, totalTime = c(1,100), count.cryptic = FALSE) +######################## +####################### +# now let's try cryptic speciation *with* 50 extant taxa! + +# first, count.cryptic = FALSE (default) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 0.5, + nruns = 1, + nExtant = 10, + totalTime = c(1,100), + count.cryptic = FALSE + ) taxa <- fossilRecord2fossilTaxa(record) -sum(taxa[,5]) #number of still-living lineages (inc. cryptic) -length(unique(taxa[taxa[,5] == 1,6])) #number of still-living morph-dist. taxa +### Count the taxa/lineages ! +# number of still-living lineages (inc. cryptic) +sum(taxa[,5]) +# number of still-living morph-dist. taxa +length(unique(taxa[taxa[,5] == 1,6])) + +############## # and count.cryptic = TRUE -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - anag.rate = 0, prop.bifurc = 0, prop.cryptic = 0.5, - nruns = 1, nExtant = 10, totalTime = c(1,100), count.cryptic = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + anag.rate = 0, + prop.bifurc = 0, + prop.cryptic = 0.5, + nruns = 1, + nExtant = 10, + totalTime = c(1,100), + count.cryptic = TRUE + ) taxa <- fossilRecord2fossilTaxa(record) -sum(taxa[,5]) #number of still-living lineages (inc. cryptic) -length(unique(taxa[taxa[,5] == 1,6])) #number of still-living morph-dist. taxa + +### Count the taxa/lineages ! +# number of still-living lineages (inc. cryptic) +sum(taxa[,5]) +# number of still-living morph-dist. taxa +length(unique(taxa[taxa[,5] == 1,6])) ################################################# # an example using startTaxa to have more initial taxa -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - nTotalTaxa = 100, startTaxa = 20, plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nruns = 1, + nTotalTaxa = 100, + startTaxa = 20, + plot = TRUE + ) ###################################################### # Using run conditions # Users can generate datasets that meet multiple conditions: - # such as time, number of total taxa, extant taxa, sampled taxa + # such as time, number of total taxa, extant taxa, sampled taxa # These can be set as point conditions or ranges # let's set time = 10-100 units, total taxa = 30-40, extant = 10 - #and look at acceptance rates with print.run + #and look at acceptance rates with print.run record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - totalTime = c(10,100), nTotalTaxa = c(30,40), nExtant = 10, - print.runs = TRUE, plot = TRUE) + totalTime = c(10,100), nTotalTaxa = c(30,40), nExtant = 10, + print.runs = TRUE, plot = TRUE) # let's make the constraints on totaltaxa a little tighter record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - totalTime = c(50,100), nTotalTaxa = 30, nExtant = 10, - print.runs = TRUE, plot = TRUE) + totalTime = c(50,100), nTotalTaxa = 30, nExtant = 10, + print.runs = TRUE, plot = TRUE) # still okay acceptance rates # alright, now let's add a constraint on sampled taxa -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - totalTime = c(50,100), nTotalTaxa = 30, nExtant = 10, - nSamp = 15, print.runs = TRUE, plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nruns = 1, + totalTime = c(50,100), + nTotalTaxa = 30, + nExtant = 10, + nSamp = 15, + print.runs = TRUE, + plot = TRUE + ) # still okay acceptance rates # we can be really odd and condition on having a single taxon set.seed(1) -record <- simFossilRecord(p = 0.1, - q = 0.1, r = 0.1, nTotalTaxa = 1, - totalTime = c(10,20), plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nTotalTaxa = 1, + totalTime = c(10,20), + plot = TRUE + ) ######################################################## @@ -687,32 +971,64 @@ record <- simFossilRecord(p = 0.1, #Typically, a user may want to condition on a precise # number of sampled taxa in an all-extinct simulation -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - nTotalTaxa = c(1,100), nExtant = 0, nSamp = 20, - print.runs = TRUE, plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nruns = 1, + nTotalTaxa = c(1,100), + nExtant = 0, + nSamp = 20, + print.runs = TRUE, + plot = TRUE + ) # Note that when simulations don't include # sampling or extant taxa, the plot # functionality changes -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0, nruns = 1, - nExtant = 0, print.runs = TRUE, plot = TRUE) +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0, + nruns = 1, + nExtant = 0, + print.runs = TRUE, + plot = TRUE + ) + # something similar happens when there is no sampling # and there are extant taxa but they aren't sampled -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0, nruns = 1, - nExtant = 10, nTotalTaxa = 100, modern.samp.prob = 0, - print.runs = TRUE, plot = TRUE) - - +record <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0, + nruns = 1, + nExtant = 10, + nTotalTaxa = 100, + modern.samp.prob = 0, + print.runs = TRUE, + plot = TRUE + ) + +######################################### # We can set up a test to make sure that no extant taxa somehow get # returned in many simulations with extinct-only conditioning: -res <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, - nTotalTaxa = 10,nExtant = 0,nruns = 1000,plot = TRUE) +res <- simFossilRecord( + p = 0.1, + q = 0.1, + r = 0.1, + nTotalTaxa = 10, + nExtant = 0, + nruns = 1000, + plot = TRUE + ) anyLive <- any(sapply(res,function(z) - any(sapply(z,function(x) x[[1]][5] == 1)))) + any(sapply(z,function(x) x[[1]][5] == 1))) + ) # test if any are still alive if(anyLive){ - stop("Runs have extant taxa under conditioning for none?") - } + stop("Runs have extant taxa under conditioning for none?") + } } diff --git a/man/simFossilRecordMethods.Rd b/man/simFossilRecordMethods.Rd index fe0131f4..2dffcd5e 100644 --- a/man/simFossilRecordMethods.Rd +++ b/man/simFossilRecordMethods.Rd @@ -82,7 +82,8 @@ to the flat table format of taxon data as was originally output by deprecated fu \code{fossilTaxa2fossilRecord} does the reverse, converting a \code{simFossilTaxa} table into a \code{fossilRecordSimulation} list object, -but returns a \code{fossilRecordSimulation} object that considers each species as un-sampled (as sampling +but returns a \code{fossilRecordSimulation} object that +considers each species as un-sampled (as sampling information is not contained within a \code{simFossilTaxa} table). \code{fossilRecord2fossilRanges} converts a \code{fossilRecordSimulation} object @@ -93,29 +94,50 @@ to the flat table format of observed taxon ranges, as is typically output by pro \examples{ set.seed(44) -record <- simFossilRecord(p = 0.1, q = 0.1, r = 0.1, nruns = 1, - nTotalTaxa = c(20,30) ,nExtant = 0, plot = TRUE) +record <- simFossilRecord( + p = 0.1, q = 0.1, r = 0.1, + nruns = 1, + nTotalTaxa = c(20,30), + nExtant = 0, + plot = TRUE + ) -# time-slicing +################################################## +# time-slicing simulations at particular dates # let's try slicing this record at 940 time-units -slicedRecord <- timeSliceFossilRecord(fossilRecord = record, sliceTime = 940) +slicedRecord <- timeSliceFossilRecord( + fossilRecord = record, + sliceTime = 940 + ) # and let's plot it divCurveFossilRecordSim(slicedRecord) # now with shiftRoot4TimeSlice = TRUE to shift the root age -slicedRecord <- timeSliceFossilRecord(fossilRecord = record, sliceTime = 940, - shiftRoot4TimeSlice = TRUE) +slicedRecord <- timeSliceFossilRecord( + fossilRecord = record, + sliceTime = 940, + shiftRoot4TimeSlice = TRUE + ) # and let's plot it divCurveFossilRecordSim(slicedRecord) -# plot look a little different due to how axis limits are treated... -# notice that in both, 'modern' (extant) taxa are sampled with probability = 1 - #let's try it again, make that probability = 0 - +# the last two plots look a little different + # due to how axis limits are treated... +# notice that in both, 'modern' (extant) taxa + # are sampled with probability = 1 + +######## +# let's try it again, make that probability = 0 # now with shiftRoot4TimeSlice = TRUE -slicedRecord <- timeSliceFossilRecord(fossilRecord = record, sliceTime = 940, - shiftRoot4TimeSlice = TRUE, modern.samp.prob = 0) + +slicedRecord <- timeSliceFossilRecord( + fossilRecord = record, + sliceTime = 940, + shiftRoot4TimeSlice = TRUE, + modern.samp.prob = 0 + ) + # and let's plot it divCurveFossilRecordSim(slicedRecord) @@ -129,21 +151,25 @@ taxa <- fossilRecord2fossilTaxa(record) ranges <- fossilRecord2fossilRanges(record) # plot diversity curves with multiDiv -multiDiv(list(taxa,ranges),plotMultCurves = TRUE) +multiDiv(list(taxa,ranges), + plotMultCurves = TRUE) # should look a lot like what we got earlier # get the cladogram we'd obtain for these taxa with taxa2cladogram -cladogram <- taxa2cladogram(taxa,plot = TRUE) +cladogram <- taxa2cladogram(taxa, + plot = TRUE) # now get the time-scaled phylogenies with taxa2phylo # first, with tips extending to the true times of extinction -treeExt <- taxa2phylo(taxa,plot = TRUE) +treeExt <- taxa2phylo(taxa, + plot = TRUE) # now, with tips extending to the first appearance dates (FADs) of taxa # get the FADs from the ranges FADs <- ranges[,1] -treeFAD <- taxa2phylo(taxa,FADs,plot = TRUE) +treeFAD <- taxa2phylo(taxa, + FADs,plot = TRUE) } \seealso{ diff --git a/man/taxonSortPBDBocc.Rd b/man/taxonSortPBDBocc.Rd index 14127766..3b28c0ae 100644 --- a/man/taxonSortPBDBocc.Rd +++ b/man/taxonSortPBDBocc.Rd @@ -115,20 +115,27 @@ with using read.csv to convert API-downloaded data. # getting occurrence data for a genus, sorting it # Dicellograptus dicelloData <- getPBDBocc("Dicellograptus") -dicelloOcc2 <- taxonSortPBDBocc(dicelloData, - rank = "species", onlyFormal = FALSE) +dicelloOcc2 <- taxonSortPBDBocc( + data = dicelloData, + rank = "species", + onlyFormal = FALSE + ) names(dicelloOcc2) # try a PBDB API download with lots of synonymization #this should have only 1 species -# old way, using v1.1 of PBDB API: +# *old* way, using v1.1 of PBDB API: # acoData <- read.csv(paste0( # "http://paleobiodb.org/data1.1/occs/list.txt?", # "base_name = Acosarina\%20minuta&show=ident,phylo")) -# new way - with getPBDBocc, using v1.2 of PBDB API: +# +# *new* method - with getPBDBocc, using v1.2 of PBDB API: acoData <- getPBDBocc("Acosarina minuta") -acoOcc <- taxonSortPBDBocc(acoData, - rank = "species", onlyFormal = FALSE) +acoOcc <- taxonSortPBDBocc( + data = acoData, + rank = "species", + onlyFormal = FALSE + ) names(acoOcc) } @@ -137,22 +144,27 @@ names(acoOcc) data(graptPBDB) #get formal genera -occGenus <- taxonSortPBDBocc(graptOccPBDB, - rank = "genus") +occGenus <- taxonSortPBDBocc( + data = graptOccPBDB, + rank = "genus" + ) length(occGenus) #get formal species -occSpeciesFormal <- taxonSortPBDBocc(graptOccPBDB, - rank = "species") +occSpeciesFormal <- taxonSortPBDBocc( + data = graptOccPBDB, + rank = "species") length(occSpeciesFormal) -#yes, there are fewer 'formal' graptolite species in the PBDB then genera +#yes, there are fewer 'formal' + # graptolite species in the PBDB then genera #get formal and informal species occSpeciesInformal <- taxonSortPBDBocc( - graptOccPBDB, - rank = "species", - onlyFormal = FALSE) + data = graptOccPBDB, + rank = "species", + onlyFormal = FALSE + ) length(occSpeciesInformal) #way more graptolite species are 'informal' in the PBDB @@ -160,9 +172,11 @@ length(occSpeciesInformal) #get formal and informal species #including from occurrences with uncertain taxonomy #basically everything and the kitchen sink -occSpeciesEverything <- taxonSortPBDBocc(graptOccPBDB, - rank = "species", - onlyFormal = FALSE, cleanUncertain = FALSE) +occSpeciesEverything <- taxonSortPBDBocc( + data = graptOccPBDB, + rank = "species", + onlyFormal = FALSE, + cleanUncertain = FALSE) length(occSpeciesEverything) diff --git a/man/termTaxa.Rd b/man/termTaxa.Rd index 45d65a78..329739e6 100644 --- a/man/termTaxa.Rd +++ b/man/termTaxa.Rd @@ -161,74 +161,107 @@ decreasing, i.e. the present day is zero. \examples{ set.seed(444) -#example for 20 taxa +# example for 20 taxa termTaxaRes <- simTermTaxa(20) -#let look at the taxa... +# let look at the taxa... taxa <- termTaxaRes$taxonRanges taxicDivCont(taxa) -#because ancestors don't even exist as taxa - #the true diversity curve can go to zero - #kinda bizarre! +# because ancestors don't even exist as taxa + # the true diversity curve can go to zero + # kinda bizarre! -#the tree should give a better idea +# the tree should give a better idea tree <- termTaxaRes$tree phyloDiv(tree) -#well, okay, its a tree. +# well, okay, its a tree. -#get the 'ideal cladogram' ala taxa2cladogram - #much easier with terminal-taxa simulations as no paraphyletic taxa +# get the 'ideal cladogram' ala taxa2cladogram + # much easier with terminal-taxa simulations + # as no paraphyletic taxa cladogram <- tree cladogram$edge.length <- NULL plot(cladogram) -#trying out trueTermTaxaTree -#random times of observation: uniform distribution -time.obs <- apply(taxa,1,function(x) runif(1,x[2],x[1])) -tree1 <- trueTermTaxaTree(termTaxaRes,time.obs) +# trying out trueTermTaxaTree +# random times of observation: uniform distribution +time.obs <- apply(taxa,1, + function(x) runif(1,x[2],x[1]) + ) +tree1 <- trueTermTaxaTree( + termTaxaRes, + time.obs + ) layout(1:2) plot(tree) plot(tree1) - layout(1) +########################################### #let's look at the change in the terminal branches -plot(tree$edge.length,tree1$edge.length) +plot(tree$edge.length, + tree1$edge.length) #can see some edges are shorter on the new tree, cool #let's now simulate sampling and use FADs layout(1:2) -plot(tree);axisPhylo() -FADs <- sampleRanges(termTaxaRes$taxonRanges,r = 0.1)[,1] -tree1 <- trueTermTaxaTree(termTaxaRes,FADs) -plot(tree1);axisPhylo() +plot(tree) +axisPhylo() +FADs <- sampleRanges( + termTaxaRes$taxonRanges, + r = 0.1)[,1] +tree1 <- trueTermTaxaTree(termTaxaRes, FADs) +plot(tree1) +axisPhylo() +################################################ #can condition on sampling some average number of taxa #analogous to deprecated function simFossilTaxa_SRcond r <- 0.1 avgtaxa <- 50 sumRate <- 0.2 #avg number necc for an avg number sampled -ntaxa_orig <- avgtaxa/(r/(r+sumRate)) -termTaxaRes <- simTermTaxa(ntaxa = ntaxa_orig,sumRate = sumRate) +ntaxa_orig <- avgtaxa / (r / (r + sumRate)) +termTaxaRes <- simTermTaxa( + ntaxa = ntaxa_orig, + sumRate = sumRate) + #note that conditioning must be conducted using full sumRate #this is because durations are functions of both rates #just like in bifurcation + +################################# #use advanced version of simTermTaxa: simTermTaxaAdvanced #allows for extant taxa in a term-taxa simulation #with min.cond -termTaxaRes <- simTermTaxaAdvanced(p = 0.1,q = 0.1,mintaxa = 50, - maxtaxa = 100,maxtime = 100,minExtant = 10,maxExtant = 20,min.cond = TRUE) +termTaxaRes <- simTermTaxaAdvanced( + p = 0.1, + q = 0.1, + mintaxa = 50, + maxtaxa = 100, + maxtime = 100, + minExtant = 10, + maxExtant = 20, + min.cond = TRUE + ) #notice that arguments are similar to simFossilRecord - # and somewhat more similar to deprecated function simFossilTaxa ;P + # and even more similar to deprecated function simFossilTaxa plot(termTaxaRes$tree) Ntip(termTaxaRes$tree) #without min.cond -termTaxaRes <- simTermTaxaAdvanced(p = 0.1,q = 0.1,mintaxa = 50, - maxtaxa = 100,maxtime = 100,minExtant = 10,maxExtant = 20,min.cond = FALSE) +termTaxaRes <- simTermTaxaAdvanced( + p = 0.1, + q = 0.1, + mintaxa = 50, + maxtaxa = 100, + maxtime = 100, + minExtant = 10, + maxExtant = 20, + min.cond = FALSE + ) plot(termTaxaRes$tree) Ntip(termTaxaRes$tree) diff --git a/man/timePaleoPhy.Rd b/man/timePaleoPhy.Rd index ac6eeaf3..13acbfbe 100644 --- a/man/timePaleoPhy.Rd +++ b/man/timePaleoPhy.Rd @@ -29,15 +29,17 @@ in time when taxa first and last appear. If there is stratigraphic uncertainty i when taxa appear in the fossil record, it is preferable to use the 'bin' time-scaling functions; however, see the argument \code{dateTreatment}.} -\item{type}{Type of time-scaling method used. Can be "basic", "equal", "equal_paleotree_legacy", "equal_date.phylo_legacy" -"aba", "zbla" or "mbl". Type = "basic" by default. See details below.} +\item{type}{Type of time-scaling method used. Can be \code{"basic"}, +\code{"equal"}, \code{"equal_paleotree_legacy"}, \code{"equal_date.phylo_legacy"} +\code{"aba"}, \code{"zbla"} or \code{"mbl"}. +\code{Type = "basic"} by default. See details below.} -\item{vartime}{Time variable; usage depends on the method 'type' argument. -Ignored if type = "basic".} +\item{vartime}{Time variable; usage depends on the \code{type} argument. +Ignored if \code{type = "basic"}.} \item{ntrees}{Number of time-scaled trees to output. If ntrees is greater than one and both randres is false and dateTreatment is neither -'minMax' or 'randObs', the function will fail and +\code{'minMax'} or \code{'randObs'}, the function will fail and a warning is issued, as these arguments would simply produce multiple identical time-scaled trees.} @@ -58,16 +60,16 @@ See that functions help page for more information; the result of time-order resolving of polytomies generally does not differ across multiple uses, unlike use of \code{multi2di}.} -\item{add.term}{If true, adds terminal ranges. By default, this function will +\item{add.term}{If \code{TRUE}, adds terminal ranges. By default, this function will not add the ranges of taxa when time-scaling a tree, so that the tips correspond temporally to the first appearance datums of the given taxa. If -\code{add.term = T}, then the 'terminal ranges' of the taxa are added to the tips +\code{add.term = TRUE}, then the 'terminal ranges' of the taxa are added to the tips after tree is time-scaled, such that the tips now correspond to the last appearance datums.} \item{inc.term.adj}{If true, includes terminal ranges in branch length estimates for the various adjustment of branch lengths under all methods -except 'basic' (i.e. a terminal length branch will not be treated as zero +except \code{type = 'basic'} (i.e. a terminal length branch will not be treated as zero length is this argument is \code{TRUE} if the taxon at this tip has a non-zero duration). By default, this argument is \code{FALSE} and this function will not include the ranges of taxa when adjusting branch lengths, so that @@ -78,14 +80,18 @@ options.} \item{dateTreatment}{This argument controls the interpretation of \code{timeData}. The default setting \code{dateTreatment = "firstLast"} treats the dates -in \code{timeData} as a column of precise first and last appearances. +in \code{timeData} as a column of precise first and last appearances. + A second option is \code{dateTreatment = "minMax"}, which treats these dates as minimum and maximum bounds on single point dates. Under this option, all taxa in the analysis will be treated as being point dates, such that the first appearance -is also the last. These dates will be pulled under a uniform distribution. If \code{dateTreatment = "minMax"} is used, -\code{add.term} becomes meaningless, and the use of it will return an error message. A third option -is \code{dateTreatment = "randObs"}. This assumes that the dates in the matrix are first and last appearance times, -but that the desired time of observation is unknown. Thus, this is much like \code{dateTreatment = "firstLast"} except +is also the last. These dates will be pulled under a uniform distribution. +If \code{dateTreatment = "minMax"} is used, +\code{add.term} becomes meaningless, and the use of it will return an error message. + +A third option is \code{dateTreatment = "randObs"}. This assumes that the dates in the +matrix are first and last appearance times, but that the desired time of observation is unknown. +Thus, this is much like \code{dateTreatment = "firstLast"} except the effective time of observation (the taxon's LAD under \code{dateTreatment = "firstLast"}) is treated as an uncertain date, and is randomly sampled between the first and last appearance times. The FAD still is treated as a fixed number, used @@ -94,6 +100,7 @@ was called in \code{timePaleoPhy} using the argument \code{rand.obs}, which has for clarity. This temporal uncertainty in times of observation might be useful if a user is interested in applying phylogeny-based approaches to studying trait evolution, but have per-taxon measurements of traits that come from museum specimens with uncertain temporal placement. + With both arguments \code{dateTreatment = "minMax"} and \code{dateTreatment = "randObs"}, the sampling of dates from random distributions should compel users to produce many time-scaled trees for any given analytical purpose. @@ -167,7 +174,8 @@ phylogenies of fossil taxa can have biasing effects on macroevolutionary analyse (Bapst, 2014, Paleobiology), this function is largely retained for legacy purposes and plotting applications. The time-scaling methods implemented by the functions listed here do \bold{not} return realistic estimates of -divergence dates, users should investigate other time-scaling methods such as \code{\link{cal3TimePaleoPhy}}. +divergence dates, users should investigate other time-scaling +methods such as \code{\link{cal3TimePaleoPhy}}. } \details{ \emph{Time-Scaling Methods} @@ -177,10 +185,11 @@ discussed \emph{a posteriori} methods for time-scaling phylogenies of fossil tax Unfortunately, it can be difficult to attribute some time-scaling methods to specific references in the literature. -There are five main \emph{a posteriori} approaches that can be used by \code{timePaleoPhy}. Four of these -main types use some value of absolute time, chosen a priori, to time-scale the tree. -This is handled by the argument \code{vartime}, which is NULL by default and unused -for type "basic". +There are five main \emph{a posteriori} approaches that +can be used by \code{timePaleoPhy}. Four of these +main types use some value of absolute time, chosen \emph{a priori}, to time-scale the tree. +This is handled by the argument \code{vartime}, which is \code{NULL} by default and unused +for type \code{"basic"}. \describe{ \item{"basic"}{This most simple of time-scaling methods ignores \code{vartime} and @@ -264,7 +273,7 @@ and maximum absolute ages for the fossil collections that a taxon is known is known from. Presumably, the first and last appearances of that taxon in the fossil record is at unknown dates within these bounds. -As of paleotree version 2.0. the treatment of taxon ages in +As of paleotree v2.0. the treatment of taxon ages in \code{timePaleoPhy} is handled by the argument \code{dateTreatment}. \emph{By default,} this argument is set to 'firstLast' which means the matrix of ages are treated as precise first and last appearance dates (i.e. FADs and LADs). The earlier FADs will be used @@ -295,7 +304,8 @@ and there are more options available for certain types of age uncertainty, such where specimens come from the same fossil site. The input \code{timeList} object for \code{bin_timePaleoPhy} can have overlapping -(i.e. non-sequential) intervals, and intervals of uneven size. Taxa alive in the modern should be listed as last +(i.e. non-sequential) intervals, and intervals of +uneven size. Taxa alive in the modern should be listed as last occurring in a time interval that begins at time 0 and ends at time 0. If taxa occur only in single collections (i.e. their first and last appearance in the fossil record is synchronous, the argument point.occur will force all taxa @@ -339,16 +349,33 @@ plot(retioTree) taxicDivDisc(retioRanges) #Use basic time-scaling (terminal branches only go to FADs) -ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "basic", - ntrees = 1, plot = TRUE) +ttree <- bin_timePaleoPhy( + tree = retioTree, + timeList = retioRanges, + type = "basic", + ntrees = 1, + plot = TRUE + ) #Use basic time-scaling (terminal branches go to LADs) -ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "basic", - add.term = TRUE, ntrees = 1, plot = TRUE) +ttree <- bin_timePaleoPhy( + tree = retioTree, + timeList = retioRanges, + type = "basic", + add.term = TRUE, + ntrees = 1, + plot = TRUE + ) #mininum branch length time-scaling (terminal branches only go to FADs) -ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "mbl", - vartime = 1, ntrees = 1, plot = TRUE) +ttree <- bin_timePaleoPhy( + tree = retioTree, + timeList = retioRanges, + type = "mbl", + vartime = 1, + ntrees = 1, + plot = TRUE + ) ################### @@ -356,116 +383,313 @@ ttree <- bin_timePaleoPhy(tree = retioTree,timeList = retioRanges,type = "mbl", #Simulate some fossil ranges with simFossilRecord set.seed(444) -record <- simFossilRecord(p = 0.1, q = 0.1, nruns = 1, - nTotalTaxa = c(30,40), nExtant = 0) +record <- simFossilRecord( + p = 0.1, q = 0.1, + nruns = 1, + nTotalTaxa = c(30,40), + nExtant = 0 + ) taxa <- fossilRecord2fossilTaxa(record) + #simulate a fossil record with imperfect sampling with sampleRanges -rangesCont <- sampleRanges(taxa,r = 0.5) +rangesCont <- sampleRanges(taxa, r = 0.5) #let's use taxa2cladogram to get the 'ideal' cladogram of the taxa -cladogram <- taxa2cladogram(taxa,plot = TRUE) +cladogram <- taxa2cladogram(taxa, + plot = TRUE) + #Now let's try timePaleoPhy using the continuous range data -ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",plot = TRUE) +ttree <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + plot = TRUE + ) + #plot diversity curve phyloDiv(ttree) -#that tree lacked the terminal parts of ranges (tips stops at the taxon FADs) -#let's add those terminal ranges back on with add.term -ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",add.term = TRUE,plot = TRUE) + +################################################ +# that tree lacked the terminal parts of ranges + # (tips stops at the taxon FADs) +# let's add those terminal ranges back on with add.term +ttree <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + add.term = TRUE, + plot = TRUE + ) + #plot diversity curve phyloDiv(ttree) -#that tree didn't look very resolved, does it? (See Wagner and Erwin 1995 to see why) -#can randomly resolve trees using the argument randres -#each resulting tree will have polytomies randomly resolved in different ways using multi2di -ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",ntrees = 1,randres = TRUE, - add.term = TRUE,plot = TRUE) -#notice well the warning it prints! -#we would need to set ntrees to a large number to get a fair sample of trees - -#if we set ntrees>1, timePaleoPhy will make multiple time-trees -ttrees <- timePaleoPhy(cladogram,rangesCont,type = "basic",ntrees = 9,randres = TRUE, - add.term = TRUE,plot = TRUE) + +################################################# +# that tree didn't look very resolved, does it? + # (See Wagner and Erwin 1995 to see why) +# can randomly resolve trees using the argument randres +# each resulting tree will have polytomies + # randomly resolved stochastically using ape::multi2di +ttree <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + ntrees = 1, + randres = TRUE, + add.term = TRUE, + plot = TRUE + ) + +# Notice the warning it prints! PAY ATTENTION! +# We would need to set ntrees to a large number + # to get a fair sample of trees + +# if we set ntrees > 1, timePaleoPhy will make multiple time-trees +ttrees <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + ntrees = 9, + randres = TRUE, + add.term = TRUE, + plot = TRUE) #let's compare nine of them at once in a plot -layout(matrix(1:9,3,3)) +layout(matrix(1:9, 3, 3)) parOrig <- par(no.readonly = TRUE) -par(mar = c(1,1,1,1)) -for(i in 1:9){plot(ladderize(ttrees[[i]]),show.tip.label = FALSE,no.margin = TRUE)} +par(mar = c(1, 1, 1, 1)) +for(i in 1:9){ + plot( + ladderize(ttrees[[i]]), + show.tip.label = FALSE, + no.margin = TRUE + ) + } #they are all a bit different! -#we can also resolve the polytomies in the tree according to time of first appearance - #via the function timeLadderTree, by setting the argument 'timeres' to TRUE -ttree <- timePaleoPhy(cladogram,rangesCont,type = "basic",ntrees = 1,timeres = TRUE, - add.term = TRUE,plot = TRUE) + +############################################## +# we can also resolve the polytomies in the tree + # according to time of first appearance via the function timeLadderTree + # by setting the argument 'timeres = TRUE' +ttree <- timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + ntrees = 1, + timeres = TRUE, + add.term = TRUE, + plot = TRUE + ) #can plot the median diversity curve with multiDiv -layout(1);par(parOrig) +layout(1) +par(parOrig) multiDiv(ttrees) #compare different methods of timePaleoPhy -layout(matrix(1:6,3,2)) +layout(matrix(1:6, 3, 2)) parOrig <- par(no.readonly = TRUE) -par(mar = c(3,2,1,2)) -plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "basic",vartime = NULL,add.term = TRUE))) - axisPhylo();text(x = 50,y = 23,"type = basic",adj = c(0,0.5),cex = 1.2) -plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "equal",vartime = 10,add.term = TRUE))) - axisPhylo();text(x = 55,y = 23,"type = equal",adj = c(0,0.5),cex = 1.2) -plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "aba",vartime = 1,add.term = TRUE))) - axisPhylo();text(x = 55,y = 23,"type = aba",adj = c(0,0.5),cex = 1.2) -plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "zlba",vartime = 1,add.term = TRUE))) - axisPhylo();text(x = 55,y = 23,"type = zlba",adj = c(0,0.5),cex = 1.2) -plot(ladderize(timePaleoPhy(cladogram,rangesCont,type = "mbl",vartime = 1,add.term = TRUE))) - axisPhylo();text(x = 55,y = 23,"type = mbl",adj = c(0,0.5),cex = 1.2) -layout(1);par(parOrig) - +par(mar = c(3, 2, 1, 2)) +plot(ladderize(timePaleoPhy( + cladogram, + rangesCont, + type = "basic", + vartime = NULL, + add.term = TRUE + ))) +axisPhylo() +text(x = 50,y = 23,"type = basic",adj = c(0,0.5),cex = 1.2) +# +plot(ladderize(timePaleoPhy( + cladogram, + rangesCont, + type = "equal", + vartime = 10, + add.term = TRUE + ))) +axisPhylo() +text(x = 55,y = 23,"type = equal",adj = c(0,0.5),cex = 1.2) +# +plot(ladderize(timePaleoPhy( + cladogram, + rangesCont, + type = "aba", + vartime = 1, + add.term = TRUE + ))) +axisPhylo() +text(x = 55,y = 23,"type = aba",adj = c(0,0.5),cex = 1.2) +# +plot(ladderize(timePaleoPhy( + cladogram, + rangesCont, + type = "zlba", + vartime = 1, + add.term = TRUE + ))) +axisPhylo() +text(x = 55, y = 23, "type = zlba", + adj = c(0,0.5), cex = 1.2) +# +plot(ladderize(timePaleoPhy( + cladogram, + rangesCont, + type = "mbl", + vartime = 1, + add.term = TRUE + ))) +axisPhylo() +text(x = 55,y = 23,"type = mbl",adj = c(0,0.5),cex = 1.2) +layout(1) +par(parOrig) + + +############################################## #using node.mins -#let's say we have (molecular??) evidence that node #5 is at least 1200 time-units ago +#let's say we have (molecular??) evidence that + # node #5 is at least 1200 time-units ago #to use node.mins, first need to drop any unshared taxa + droppers <- cladogram$tip.label[is.na( - match(cladogram$tip.label,names(which(!is.na(rangesCont[,1])))))] + match(cladogram$tip.label, + names(which(!is.na(rangesCont[,1]))) + ) + )] cladoDrop <- drop.tip(cladogram, droppers) + # now make vector same length as number of nodes nodeDates <- rep(NA, Nnode(cladoDrop)) nodeDates[5] <- 1200 -ttree1 <- timePaleoPhy(cladoDrop,rangesCont,type = "basic", - randres = FALSE,node.mins = nodeDates,plot = TRUE) -ttree2 <- timePaleoPhy(cladoDrop,rangesCont,type = "basic", - randres = TRUE,node.mins = nodeDates,plot = TRUE) - +ttree1 <- timePaleoPhy( + cladoDrop,rangesCont, + type = "basic", + randres = FALSE, + node.mins = nodeDates, + plot = TRUE) +ttree2 <- timePaleoPhy( + cladoDrop, + rangesCont, + type = "basic", + randres = TRUE, + node.mins = nodeDates, + plot = TRUE) + + +#################################################### +################################################### +#################################################### #Using bin_timePaleoPhy to time-scale with discrete interval data + #first let's use binTimeData() to bin in intervals of 1 time unit rangesDisc <- binTimeData(rangesCont,int.length = 1) -ttreeB1 <- bin_timePaleoPhy(cladogram,rangesDisc,type = "basic",ntrees = 1,randres = TRUE, - add.term = TRUE,plot = FALSE) + +ttreeB1 <- bin_timePaleoPhy( + cladogram, + rangesDisc, + type = "basic", + ntrees = 1, + randres = TRUE, + add.term = TRUE, + plot = FALSE + ) + #notice the warning it prints! phyloDiv(ttreeB1) + #with time-order resolving via timeLadderTree -ttreeB2 <- bin_timePaleoPhy(cladogram,rangesDisc,type = "basic",ntrees = 1,timeres = TRUE, - add.term = TRUE,plot = FALSE) +ttreeB2 <- bin_timePaleoPhy( + cladogram, + rangesDisc, + type = "basic", + ntrees = 1, + timeres = TRUE, + add.term = TRUE, + plot = FALSE + ) + phyloDiv(ttreeB2) + + #can also force the appearance timings not to be chosen stochastically -ttreeB3 <- bin_timePaleoPhy(cladogram,rangesDisc,type = "basic",ntrees = 1, - nonstoch.bin = TRUE,randres = TRUE,add.term = TRUE,plot = FALSE) +ttreeB3 <- bin_timePaleoPhy( + cladogram,rangesDisc, + type = "basic", + ntrees = 1, + nonstoch.bin = TRUE, + randres = TRUE, + add.term = TRUE, + plot = FALSE + ) + phyloDiv(ttreeB3) + # testing node.mins in bin_timePaleoPhy -ttree <- bin_timePaleoPhy(cladoDrop,rangesDisc,type = "basic",ntrees = 1, - add.term = TRUE,randres = FALSE,node.mins = nodeDates,plot = TRUE) +ttree <- bin_timePaleoPhy( + cladoDrop, + rangesDisc, + type = "basic", + ntrees = 1, + add.term = TRUE, + randres = FALSE, + node.mins = nodeDates, + plot = TRUE + ) + # with randres = TRUE -ttree <- bin_timePaleoPhy(cladoDrop,rangesDisc,type = "basic",ntrees = 1, - add.term = TRUE,randres = TRUE,node.mins = nodeDates,plot = TRUE) +ttree <- bin_timePaleoPhy( + cladoDrop, + rangesDisc, + type = "basic",ntrees = 1, + add.term = TRUE, + randres = TRUE, + node.mins = nodeDates, + plot = TRUE + ) \donttest{ #simple three taxon example for testing inc.term.adj -ranges1 <- cbind(c(3,4,5),c(2,3,1));rownames(ranges1) <- paste("t",1:3,sep = "") -clado1 <- read.tree(file = NA,text = "(t1,(t2,t3));") -ttree1 <- timePaleoPhy(clado1,ranges1,type = "mbl",vartime = 1) -ttree2 <- timePaleoPhy(clado1,ranges1,type = "mbl",vartime = 1,add.term = TRUE) -ttree3 <- timePaleoPhy(clado1,ranges1,type = "mbl",vartime = 1,add.term = TRUE,inc.term.adj = TRUE) -layout(1:3) -ttree1$root.time;plot(ttree1);axisPhylo() -ttree2$root.time;plot(ttree2);axisPhylo() -ttree3$root.time;plot(ttree3);axisPhylo() +ranges1 <- cbind(c(3,4,5), c(2,3,1)) +rownames(ranges1) <- paste("t", 1:3, sep = "") +clado1 <- read.tree(file = NA, + text = "(t1,(t2,t3));") +ttree1 <- timePaleoPhy( + clado1, + ranges1, + type = "mbl", + vartime = 1 + ) +ttree2 <- timePaleoPhy( + clado1, + ranges1, + type = "mbl", + vartime = 1, + add.term = TRUE + ) +ttree3 <- timePaleoPhy( + clado1, + ranges1, + type = "mbl", + vartime = 1, + add.term = TRUE, + inc.term.adj = TRUE + ) + +# see differences in root times +ttree1$root.time +ttree2$root.time +ttree3$root.time -apply(ranges1,1,diff) + +layout(1:3) +plot(ttree1) +axisPhylo() +plot(ttree2) +axisPhylo() +plot(ttree3) +axisPhylo() + } } @@ -502,10 +726,10 @@ evolutionary patterns. Blackwell Scientific, Oxford. \code{\link{cal3TimePaleoPhy}}, \code{\link{binTimeData}}, \code{\link{multi2di}} -For an alternative time-scaling function, which includes the 'ruta' method +For an alternative time-scaling function, which includes the \code{'ruta'} method that weights the time-scaling of branches by estimates of character change -along with implementations of the 'basic' and 'equal' methods described here, -please see function \code{DatePhylo} in package \code{strap}. +along with implementations of the \code{'basic'} and \code{'equal'} +methods described here, please see function \code{DatePhylo} in package \code{strap}. } \author{ David W. Bapst, heavily inspired by code supplied by Graeme Lloyd diff --git a/revdep/.cache.rds b/revdep/.cache.rds new file mode 100644 index 00000000..07f5c1b7 Binary files /dev/null and b/revdep/.cache.rds differ diff --git a/tests/testthat/test_cal3_dateTreatment_Armin.R b/tests/testthat/test_cal3_dateTreatment_Armin.R index a2701c63..9aa1d98a 100644 --- a/tests/testthat/test_cal3_dateTreatment_Armin.R +++ b/tests/testthat/test_cal3_dateTreatment_Armin.R @@ -1,5 +1,5 @@ # testing consistency issues with date treatment under cal3 (05-31-19) - # making sure date treatment issues as reported by Armin Essler in October 2018 + # making sure date treatment issues as reported by Armin Elsler in October 2018 # are avoided in current code base test_that("cal3 datetreatment is consistent with arguments", {