Skip to content

Commit

Permalink
Add target.stats capability and control$estimate.tapered.bias
Browse files Browse the repository at this point in the history
Add the ability to use target.stats
and not double fit the MPLE when estimate="MPLE"
  • Loading branch information
handcock committed May 14, 2023
1 parent f2c1e13 commit b6b5524
Show file tree
Hide file tree
Showing 12 changed files with 954 additions and 19 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,4 @@ dkms.conf
.Rproj.user
*.print
R/ergm.tapered.R.withcomments
R/ergm.tapered.R.nosancall
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Suggests: testthat
License: GPL-3
Roxygen: list(markdown = TRUE)
Encoding: UTF-8
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
Remotes: github::statnet/statnet.common@master,
github::statnet/network@master,
github::statnet/ergm@tapered
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(summary,ergm.tapered)
export(control.ergm.tapered)
export(ergm.mvtapered)
export(ergm.tapered)
export(ergm.tapered.formula)
export(llik.fun.Kpenalty)
export(llik.fun.obs.Kpenalty)
export(llik.grad.Kpenalty)
Expand All @@ -14,6 +15,7 @@ export(llik.grad.obs.Kpenalty.numDeriv)
export(llik.hessian.Kpenalty)
export(llik.hessian.Kpenalty.numDeriv)
export(llik.hessian.obs.Kpenalty.numDeriv)
export(simulate_ergm.tapered)
import(ergm)
import(network)
import(statnet.common)
Expand Down
5 changes: 4 additions & 1 deletion R/control.ergm.tapered.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
#' errors are below the precision bound, and the Hummel step length is 1 for
#' two consecutive iterations. This is an experimental feature [ergm()].
#' The default value in [ergm.tapered()] is 0.15, higher than in [ergm()].
#' @param estimate.tapered.bias logical. It \code{TRUE} the bias of the estimated estimates due to tapering is estimated.
#' If this is \code{FALSE} the MPLE of the untapered model is not computed, saving computational time.
#' @param loglik list List of additional control arguments for the loglik. See \code{\link{control.logLik.ergm.tapered}}.
#' @param \dots Additional arguments, passed to other functions This argument
#' is helpful because it collects any control parameters that have been
Expand Down Expand Up @@ -71,6 +73,7 @@ control.ergm.tapered<-function(
MCMLE.kurtosis.penalty=2.0,
MCMLE.termination=c("precision","confidence", "Hummel", "Hotelling", "none"),
MCMLE.MCMC.precision=0.005,
estimate.tapered.bias=TRUE,
# MCMC.effectiveSize.maxruns=8,
loglik=control.logLik.ergm.tapered(),
...
Expand All @@ -88,4 +91,4 @@ control.ergm.tapered<-function(
set.control.class(c("control.ergm", "control.ergm.tapered"))
}

STATIC_TAPERING_CONTROLS <- c("MCMLE.kurtosis.location", "MCMLE.kurtosis.scale", "MCMLE.kurtosis.penalty")
STATIC_TAPERING_CONTROLS <- c("MCMLE.kurtosis.location", "MCMLE.kurtosis.scale", "MCMLE.kurtosis.penalty", "estimate.tapered.bias")
90 changes: 74 additions & 16 deletions R/ergm.tapered.R
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,23 @@
#' @param response {Name of the edge attribute whose value is to be
#' modeled in the valued ERGM framework. Defaults to \code{NULL} for
#' simple presence or absence, modeled via a binary ERGM.}
#' @param reference {A one-sided formula specifying
#' the reference measure (\eqn{h(y)}) to be used.
#' See help for \link[=ergm-references]{ERGM reference measures} implemented in the
#' \strong{\link[=ergm-package]{ergm}} package.}
#' @param constraints {A formula specifying one or more constraints
#' on the support of the distribution of the networks being modeled,
#' using syntax similar to the \code{formula} argument, on the
#' right-hand side. See \link[=ergm-constraints]{ERGM constraints}
#' \code{\link{ergm}} for details.}
#' @param control An object of class control.ergm. Passed to the ergm function.
#' right-hand side. See \link[=ergm-constraints]{ERGM constraints}.}
#' @param reference {A one-sided formula specifying
#' the reference measure (\eqn{h(y)}) to be used.
#' See help for \link[=ergm-references]{ERGM reference measures} implemented in the
#' \strong{\link[=ergm-package]{ergm}} package for details.}
#' @param estimate {If "MPLE," then the maximum pseudolikelihood estimator
#' is returned. If "MLE" (the default), then an approximate maximum likelihood
#' estimator is returned. For certain models, the MPLE and MLE are equivalent,
#' in which case this argument is ignored. (To force MCMC-based approximate
#' likelihood calculation even when the MLE and MPLE are the same, see the
#' \code{force.main} argument of \code{\link{control.ergm}}. If "CD" (\emph{EXPERIMENTAL}),
#' the Monte-Carlo contrastive divergence estimate is returned. )
#' }
#' @param control An object of class control.erg.tapered. Passed to the ergm function.
#' @param fixed A `logical`: if this is \code{TRUE}, the tapering is fixed at the passed values.
#' If this is \code{FALSE}, the tapering is estimated using a kurtosis penalized likelihood.
#' See Blackburn and Handcock (2022) for an explanation.
Expand Down Expand Up @@ -66,6 +73,7 @@
ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NULL, target.stats=NULL,
family="taper", taper.terms="all",
response=NULL, constraints=~., reference=~Bernoulli,
estimate=c("MLE", "MPLE", "CD"),
control = control.ergm.tapered(), fixed=TRUE, verbose=FALSE, eval.loglik=TRUE, ...){

if(!methods::is(control,"control.ergm.tapered")){
Expand Down Expand Up @@ -93,6 +101,9 @@ ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NUL

if(is.null(tapering.centers)) tapering.centers <- target.stats

estimate <- match.arg(estimate)
arg_list <- list(...)
arg_list$estimate <- NULL

# set tapering terms
if(is.character(taper.terms) & length(taper.terms)==1){
Expand Down Expand Up @@ -126,7 +137,11 @@ ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NUL
reformula <- append_rhs.formula(trimmed_formula,taper_formula)
}
attr(taper.terms,"env") <- NULL
taper.stats <- summary(append_rhs.formula(nw ~.,taper.terms), response=response, ...)
if(is.null(target.stats)){
taper.stats <- summary(append_rhs.formula(nw ~.,taper.terms), response=response, ...)
}else{
taper.stats <- target.stats
}

# set tapering coefficient
tau <- switch(family,
Expand Down Expand Up @@ -230,20 +245,58 @@ ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NUL
}
}

if(!is.valued(nw)){
if(!is.valued(nw) & control$estimate.tapered.bias){
# fit binary ergm
fit.MPLE.control <- control
fit.MPLE.control$init <- NULL
fit.MPLE.control$MPLE.save.xmat <- TRUE
fit.MPLE <- ergm(reformula, control=fit.MPLE.control, estimate="MPLE",
response=response, constraints=constraints, reference=reference, eval.loglik=eval.loglik, verbose=verbose, ...)
if(length(arg_list)>0){
fit.MPLE <- ergm(reformula, control=fit.MPLE.control, estimate="MPLE",
response=response, constraints=constraints, reference=reference, eval.loglik=eval.loglik, verbose=verbose, arg_list)
}else{
fit.MPLE <- ergm(reformula, control=fit.MPLE.control, estimate="MPLE",
response=response, constraints=constraints, reference=reference, eval.loglik=eval.loglik, verbose=verbose)
}
}
if(is.null(target.stats)){
if(!is.null(target.stats) & estimate != "MPLE"){
target.stats.aug <- c(target.stats,NA)
names(target.stats.aug) <- re.names
names(target.stats) <- re.names[1:length(target.stats)]
for( i in 1:20){
sanformula <- statnet.common::nonsimp_update.formula(newformula,nw ~ .)
env$.taper.center <- target.stats
env$.taper.coef <- tau
env$nw <- nw
environment(sanformula) <- env
if(verbose){
message(sprintf("Computing SAN network\n"))
}
nw <- san(sanformula,target.stats=target.stats,
control=control.san(SAN.maxit=10, SAN.exclude.statistics="Taper_Penalty", parallel=control$parallel))
}
env$.taper.center <- target.stats
env$.taper.coef <- tau
env$nw <- nw
environment(sanformula) <- env
#if(verbose){
message(sprintf("SAN network compared to target statistics:\n"))
print(cbind(target.stats, summary(sanformula)[-length(target.stats)-1]))
#}
newformula <- sanformula
#tostats <- ostats
#names(tostats) <- re.names
#control$SAN$SAN.exclude.statistics <- "Taper_Penalty"
#fit <- ergm(newformula, control=control, #target.stats=tostats, #offset.coef=tau,
# response=response, constraints=constraints, reference=reference, eval.loglik=eval.loglik, verbose=verbose, ...)
}
if(length(arg_list)>0){
fit <- ergm(newformula, control=control,
response=response, constraints=constraints, reference=reference, eval.loglik=eval.loglik, verbose=verbose, ...)
response=response, constraints=constraints, reference=reference, estimate=estimate,
eval.loglik=eval.loglik, verbose=verbose, arg_list)
}else{
fit <- ergm(newformula, control=control, target.stats=ostats, offset.coef=tau,
response=response, constraints=constraints, reference=reference, eval.loglik=eval.loglik, verbose=verbose, ...)
fit <- ergm(newformula, control=control,
response=response, constraints=constraints, reference=reference, estimate=estimate,
eval.loglik=eval.loglik, verbose=verbose)
}

fit$tapering.centers <- taper.stats
Expand All @@ -269,6 +322,7 @@ ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NUL
}
fit$orig.formula <- formula

if(estimate == "MLE"){
if(fixed){
sample <- as.matrix(fit$sample)[,1:npar,drop=FALSE]
fit$hessian <- fit$hessian[1:npar,1:npar]
Expand All @@ -286,9 +340,12 @@ ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NUL
fcoef[seq_along(fulltau)[!is.na(nm)]] <- fcoef[nm[!is.na(nm)]]
fit$tapering.coefficients <- fulltau
fit$taudelta.offset <- 2*fulltau*as.vector(apply(sapply(fit$sample,function(x){apply(x[,-ncol(x)],2,sd)}),1,mean))
if(!is.valued(nw)){
if(!is.valued(nw) & control$estimate.tapered.bias){
fit$taudelta.mean <- apply((2*fit.MPLE$glm.result$value$model[,1]-1)*sweep(fit.MPLE$xmat.full,2,fulltau,"*"),2,weighted.mean,weight=fit.MPLE$glm.result$value$prior.weights)
fit$taudelta.mad <- apply((2*fit.MPLE$glm.result$value$model[,1]-1)*sweep(abs(fit.MPLE$xmat.full),2,fulltau,"*"),2,weighted.mean,weight=fit.MPLE$value$glm.result$prior.weights)
}else{
fit$taudelta.mean <- rep(NA, length(fulltau))
fit$taudelta.mad <- rep(NA, length(fulltau))
}

# post process fit to alter Hessian etc
Expand All @@ -310,6 +367,7 @@ ergm.tapered <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NUL
fit$hessian <- -fit$hessian
}
}
}

class(fit) <- c("ergm.tapered",family,class(fit))

Expand Down
181 changes: 181 additions & 0 deletions R/ergm.tapered.formula.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#' Create an Tapered ERGM formula specifying an Tapered ERGM model as a standard ERGM
#' @param formula An ergm formula to fit
#' @param r The scaling factor to use for the heuristic of setting beta equal to r standard deviations of the observed statistics
#' @param beta The tapering parameters, expressed as in Fellows and Handcock (2017). If not NULL, these override the heuristics (r).
#' @param tau The tapering parameters, expressed as natural parameters. If not NULL, these override the beta and the heuristics (r).
#' @param tapering.centers The centers of the tapering terms. If null, these are taken to be the mean value parameters.
#' @param target.stats {vector of "observed network statistics,"
#' if these statistics are for some reason different than the
#' actual statistics of the network on the left-hand side of
#' \code{formula}.
#' Equivalently, this vector is the mean-value parameter values for the
#' model. If this is given, the algorithm finds the natural
#' parameter values corresponding to these mean-value parameters.
#' If \code{NULL}, the mean-value parameters used are the observed
#' statistics of the network in the formula.
#' }
#' @param family The type of tapering used. This should either be the \code{stereo} or \code{taper}, the
#' tapering model of Fellows and Handcock (2017).
#' @param taper.terms Specification of the tapering used. If the character variable "dependence" then all the dependent
#' terms are tapered. If the character variable "all" then all terms are tapered.
#' It can also be the RHS of a formula giving the terms to be tapered.
#' @param response {Name of the edge attribute whose value is to be
#' modeled in the valued ERGM framework. Defaults to \code{NULL} for
#' simple presence or absence, modeled via a binary ERGM.}
#' @param constraints {A formula specifying one or more constraints
#' on the support of the distribution of the networks being modeled,
#' using syntax similar to the \code{formula} argument, on the
#' right-hand side. See \link[=ergm-constraints]{ERGM constraints}.}
#' @param reference {A one-sided formula specifying
#' the reference measure (\eqn{h(y)}) to be used.
#' See help for \link[=ergm-references]{ERGM reference measures} implemented in the
#' \strong{\link[=ergm-package]{ergm}} package for details.}
#' @param verbose A `logical`: if this is
#' \code{TRUE}, the program will print out additional
#' information about the progress of estimation.
#' @param ... Additional arguments to \code{\link{ergm}}.
#' @returns
#' An formula containing a specification of the Tapered ERGM model. In addition to all of the ergm items,
#' this object contains tapering.centers, tapering.coef and orig.formula. tapering.centers are the centers for the tapering term.
#' tapering.coef are the tapering coefficients = 1/ beta^2. orig.formula is the formula passed into ergm.tapered.
#' @importFrom stats var as.formula
#' @references \itemize{
#' * Fellows, I. and M. S. Handcock (2017),
#' Removing Phase Transitions from Gibbs Measures. Volume 54 of
#' Proceedings of Machine Learning Research, Fort Lauderdale,
#' FL, USA, pp. 289–297. PMLR.
#' * Blackburn, B. and M. S. Handcock (2022),
#' Practical Network Modeling via Tapered Exponential-family Random Graph Models.
#' Journal of Computational and Graphical Statistics
#' \doi{10.1080/10618600.2022.2116444}.
#'
#' }
#' @examples
#' \dontrun{
#' data(sampson)
#' fit <- ergm.tapered.formula(samplike ~ edges + triangles())
#' summary(fit)
#' }
#' @export
ergm.tapered.formula <- function(formula, r=2, beta=NULL, tau=NULL, tapering.centers=NULL, target.stats=NULL,
family="taper", taper.terms="all",
response=NULL, constraints=~., reference=~Bernoulli,
verbose=FALSE, ...){

# Determine the dyadic independence terms
nw <- ergm.getnetwork(formula)
m<-ergm_model(formula, nw, response=response, ...)

if(is.null(tapering.centers)) tapering.centers <- target.stats

# set tapering terms
if(is.character(taper.terms) & length(taper.terms)==1){
if(taper.terms=="dependent"){
a <- sapply(m$terms, function(term){is.null(term$dependence) || term$dependence})
taper.terms <- list_rhs.formula(formula)
tmp <- taper.terms
taper.terms <- NULL
for(i in seq_along(tmp)){if(a[i]){taper.terms <- c(taper.terms,tmp[[i]])}}
taper_formula <- append_rhs.formula(~.,taper.terms, env=NULL)
trimmed_formula=suppressWarnings(filter_rhs.formula(formula, function(term,taper.terms){all(term != taper.terms)}, taper.terms))
reformula <- append_rhs.formula(trimmed_formula,taper_formula, env=NULL)
}else{if(taper.terms=="all"){
taper.terms <- list_rhs.formula(formula)
taper_formula <- append_rhs.formula(~.,taper.terms, env=NULL)
trimmed_formula=suppressWarnings(filter_rhs.formula(formula, function(term,taper.terms){all(term != taper.terms)}, taper.terms))
reformula <- formula
}else{
if(!inherits(taper.terms,"formula")){
stop('taper.terms must be "dependent", "all" or a formula of terms.')
}
taper.terms <- list_rhs.formula(taper.terms)
taper_formula <- append_rhs.formula(~.,taper.terms, env=NULL)
trimmed_formula=suppressWarnings(filter_rhs.formula(formula, function(term,taper.terms){all(term != taper.terms)}, taper.terms))
reformula <- append_rhs.formula(trimmed_formula,taper_formula, env=NULL)
}}
}else{
taper_formula <- taper.terms
taper.terms <- list_rhs.formula(taper.terms)
trimmed_formula=suppressWarnings(filter_rhs.formula(formula, function(term,taper.terms){all(term != taper.terms)}, taper.terms))
reformula <- append_rhs.formula(trimmed_formula,taper_formula, env=NULL)
}
attr(taper.terms,"env") <- NULL
if(is.null(target.stats)){
taper.stats <- summary(append_rhs.formula(nw ~.,taper.terms), response=response, ...)
}else{
taper.stats <- target.stats
}

# set tapering coefficient
tau <- switch(family,
"stereo"={
if(is.null(beta) & is.null(tau)){
1
}else{
if(is.null(tau)){
beta
}else{
tau
}
}},
{if(is.null(tau)){
if(is.null(beta)){
1 / (r^2 * pmax(1,abs(taper.stats)))
}else{
1 / beta^2
}}else{tau}}
)
if(family=="stereo"){
names(tau) <- "stereo"
}else{
names(tau) <- names(taper.stats)
}

taper_terms <- switch(paste0(family,ifelse(fixed,"_fixed","_notfixed")),
"stereo_fixed"=statnet.common::nonsimp_update.formula(taper_formula,.~Stereo(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
"stereo_notfixed"=statnet.common::nonsimp_update.formula(taper_formula,.~Stereo(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
"taper_fixed"=statnet.common::nonsimp_update.formula(taper_formula,.~Taper(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
"taper_notfixed"=statnet.common::nonsimp_update.formula(taper_formula,.~Kpenalty(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
statnet.common::nonsimp_update.formula(taper_formula,.~Taper(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE)
)

if(length(list_rhs.formula(formula))==length(taper.terms)){
newformula <- switch(paste0(family,ifelse(fixed,"_fixed","_notfixed")),
"stereo_fixed"=statnet.common::nonsimp_update.formula(formula,.~Stereo(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
"stereo_notfixed"=statnet.common::nonsimp_update.formula(formula,.~Stereo(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
"taper_fixed"=statnet.common::nonsimp_update.formula(formula,.~Taper(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
"taper_notfixed"=statnet.common::nonsimp_update.formula(formula,.~Kpenalty(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE),
statnet.common::nonsimp_update.formula(formula,.~Taper(~.,coef=.taper.coef,m=.taper.center),
environment(), from.new=TRUE)
)

}else{
newformula <- append_rhs.formula(trimmed_formula,taper_terms, env=NULL)
}
ostats <- summary(reformula, response=response, ...)
if(!is.null(tapering.centers)){
tmp <- match(names(ostats), names(tapering.centers))
if(any(is.na(tmp))){
stop(paste("tapering.centers needs to have a named center for each statistic in the model:",
names(ostats)))
}
ostats <- tapering.centers[tmp]
}
npar <- length(ostats)

env <- new.env(parent=environment(formula))
env$.taper.center <- taper.stats
env$.taper.coef <- tau
environment(newformula) <- env

return(newformula)
}
Loading

0 comments on commit b6b5524

Please sign in to comment.