Skip to content

Commit

Permalink
Simplified bs code. Added SetEnv call and bs example to help file
Browse files Browse the repository at this point in the history
  • Loading branch information
droodman committed Dec 29, 2023
1 parent 36df3a7 commit a1d24a6
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 54 deletions.
79 changes: 42 additions & 37 deletions reghdfejl.ado
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ program define reghdfejl, eclass
macro shift
if `bs' {
local 0 `*'
syntax, [CLuster(string) Reps(integer 50) mse seed(string) NODOTS dots(integer 1) SIze(integer 0) PROCs(integer 1)]
syntax, [CLuster(string) Reps(integer 50) mse seed(string) SIze(integer 0) PROCs(integer 1)]
_assert `reps'>1, msg("reps() must be an integer greater than 1") rc(198)
_assert `dots'>0, msg("dots() must be an integer greater than 0") rc(198)
_assert `size'>=0, msg("size() must be an integer greater than 0") rc(198)
_assert `procs'>=0, msg("procs() must be an integer greater than 0") rc(198)
_assert `size'>=0, msg("size() must be a positive integer") rc(198)
_assert `procs'>=0, msg("procs() must be a positive integer") rc(198)
if `procs'==0 local procs 1
local bscluster `cluster'
if `"`seed'"'!="" set seed `seed'
}
Expand Down Expand Up @@ -302,44 +302,49 @@ program define reghdfejl, eclass
if "`wtvar'"!="" jl, qui: sumweights = mapreduce((w,s)->(s ? w : 0), +, df.`wtvar', m.esample; init = 0)

if `k' {
tempname b V
tempname b V

* bootstrap
if 0`bs' {
di _n "Bootstrap replications (" as res `reps' "): " _c
jl, qui: rng = StableRNG(`=runiformint(0, 9007199254740992)'); /// // chain Stata rng to Julia rng
coefbs = fill(zero(Float64), k); ///
Vbs = fill(zero(Float64), k, k)
local hasclust = "`bscluster'"!=""
if `hasclust' {
jl, qui: groups = [findall(a->a==i, df.`bscluster') for i in Set(df.`bscluster')]; ///
bssize = iszero(`size') ? length(groups) : `size'
forvalues m=1/`reps' {
jl, qui: _df = df[vcat(rand(rng, groups, bssize)...),:]; ///
_m = reg(_df, f `wtopt' `methodopt' `threadsopt', tol=`tolerance', maxiter=`iterations'); ///
coefbs .+= coef(_m); ///
Vbs .+= coef(_m) .* coef(_m)'
if "`nodots'"=="" & !mod(`m',`dots') di cond(mod(`m',10*`dots'),".","`m'") _c
}
}
else {
jl, qui: one2N = collect(1:sizedf[1]); ///
_df = similar(df); ///
bssize = iszero(`size') ? sizedf[1] : `size'
forvalues m=1/`reps' {
jl, qui: _df .= view(df,rand(rng, one2N, bssize),:); ///
_m = reg(_df, f `wtopt' `methodopt' `threadsopt', tol=`tolerance', maxiter=`iterations'); ///
coefbs .+= coef(_m); ///
Vbs .+= coef(_m) .* coef(_m)'
if "`nodots'"=="" & !mod(`m',`dots') di cond(mod(`m',10*`dots'),".","`m'") _c
}
tempname bswt

qui jl: nworkers()
if `procs' != `r(ans)' {
jl, qui: rmprocs(procs())
if `procs'>1 jl, qui: addprocs(`procs', exeflags="-t1"); /* single-threaded workers */ ///
@everywhere using `=cond(c(os)=="MacOSX", "Metal, AppleAccelerate", "CUDA, BLISBLAS")', StableRNGs, DataFrames, FixedEffectModels, SharedArrays
}
if "`nodots'"=="" di " done"
jl: _df = nothing; ///
Vbs .-= coefbs ./ `reps' .* coefbs'; ///
Vbs ./= `reps' - `="`mse'"==""'; ///
rand(rng, Int32) // chain Julia rng back to Stata to advance it replicably
set seed `r(ans)'

jl, qui: rngs = [StableRNG(`=runiformint(0, 1e6)' * i + 42) for i in 1:maximum(workers())] // different, deterministic seeds for each worker

if `hasclust' ///
jl, qui: s = Set(df.`bscluster'); ///
Nclust = length(s); ///
_id = SharedVector(getindex.(Ref(Dict(zip(s, 1:Nclust))), df.`bscluster')) /* ordinalize cluster id */
else
jl, qui: Nclust = size(df,1); ///
_id = Colon()

jl, qui: bssize = `=cond(0`size',"`size'","Nclust")'; ///
bsweights = Vector{Int}(undef, Nclust); ///
@everywhere function reghdfejlbs(bsweights, bssize, rngs, Nclust, df, _id, f) ///
fill!(bsweights, 0); ///
rng = rngs[myid()]; ///
@inbounds for i in 1:bssize /* bs draws */ ///
bsweights[rand(rng, 1:Nclust)] += 1 ///
end; ///
df.`bswt' = bsweights[_id]; ///
`=cond("`wtopt'"!="", "df.`bswt' .*= df.`wtvar';", "")' ///
b = coef(reg(df, f, weights=:`bswt' `methodopt' `threadsopt', tol=`tolerance', maxiter=`iterations')); ///
[b, b*b'] ///
end; ///
retval = @distributed (+) for m in 1:`reps' ///
reghdfejlbs(bsweights, bssize, rngs, Nclust, df, _id, f) ///
end; ///
Vbs = retval[2]; ///
Vbs .-= retval[1] ./ `reps' .* retval[1]'; ///
Vbs ./= `reps' - `="`mse'"==""'
}
}

Expand Down
43 changes: 30 additions & 13 deletions reghdfejl.sthlp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{smcl}
{* *! version 0.6.0 20dec2023}{...}
{* *! version 0.6.0 23dec2023}{...}

{title:Title}

Expand Down Expand Up @@ -71,7 +71,8 @@ cannot be of the form {it:i.y} though, only {it:#.y} (where # is a number){p_end
{synoptline}
{p 4 6 2}Exactly one of {opt gen:erate()} and {opt pre:fix()} must be specified.

{p 4 6 2}{cmd:vce(bootstrap,} {it:bsoptions}{cmd:)} accepts the following {help bootstrap:bootstrap options}: {opt r:obust}, {opt cl:uster()}, {opt seed()}, {opt reps()}, {opt mse}, {opt size}, {opt nodots}, {opt dots()}.
{p 4 6 2}{cmd:vce(bootstrap,} {it:bsoptions}{cmd:)} accepts these standard {help bootstrap:bootstrap options}: {opt r:obust}, {opt cl:uster(varname)}, {opt seed(#)}, {opt reps(#)}, {opt mse},
{opt size(#)}. In addition it accepts a {opt proc:s()} option to accelerate the bootstrap through multitasking.


{marker description}{...}
Expand All @@ -98,7 +99,7 @@ of another function in the Julia package.
{pstd}
If {cmd:reghdfejl} appears to be failing to install the needed packages,
you can try intervening manually: start Julia outside of Stata, hit the "]" key to enter the package manager, and type
{cmd:add <pkgname>} for each package. The needed packages are Vcov, FixedEffectModels, DataFrames, and either Metal (for
{cmd:add <pkgname>} for each package. The needed packages are Vcov, FixedEffectModels, DataFrames, and Metal (for
Macs) or CUDA (otherwise).

{pstd}
Expand Down Expand Up @@ -131,15 +132,21 @@ useful when you have plenty of RAM, when the number of non-absorbed regressors i
(for then the computational efficiency of Julia shines).

{pstd}
{cmd:reghdfejl} offers two novel features that can increase speed. The first is access to multithreading in Julia, even in
{cmd:reghdfejl} offers several novel features that can increase speed. The first is access to multithreading in Julia, even in
flavors of Stata that do not offer multiprocessing. The {opt threads(#)}
option pertains to this feature. But it can only {it:reduce} the number of CPU threads Julia uses. The default number--and the
maximum--is set by the {browse "https://docs.julialang.org/en/v1/manual/multi-threading/":system environment variable JULIA_NUM_THREADS}. It is
possible for the default to be too high as well as too low. If you set it high, then you can experiment using {opt threads(#)}. See
{help jl##threads:help jl} for more on determining and controlling the number of threads.

{pstd}
The other novel feature is access to GPU-based computation. The {cmd:gpu} specifies the use of NVIDIA or Apple Silicon
In a similar vein, {cmd:reghdfejl} offers accelerated bootstrapping for the purpose of computing standard errors, via the {cmd:bs}/{cmd:bootstrap}
suboption of the {opt vce()} option. The results should be the same, asymptotically, as if one prefixes a {cmd:reghdfejl} command line with
Stata's {cmd:bootstrap} command. But they should come much faster because copying of data between Stata and Julia is minimized, and multiple
copies of Julia are launched for parallelization.

{pstd}
A final new feature is access to GPU-based computation. The {cmd:gpu} specifies the use of NVIDIA or Apple Silicon
GPUs for computation. Typically this modestly increases speed.

{pstd}
Expand All @@ -163,7 +170,7 @@ if the destination variables already exist, unless {cmd:replace} is also specifi
{synopt:{it:varname}}categorical variable to be absorbed{p_end}
{synopt:{cmd:i.}{it:varname}}categorical variable to be absorbed (same as above; the {cmd:i.} prefix is always implicit){p_end}
{synopt:{cmd:i.}{it:var1}{cmd:#i.}{it:var2}}absorb the interactions of multiple categorical variables{p_end}
{synopt:{cmd:i.}{it:var1}{cmd:#}{cmd:c.}{it:var2}}absorb heterogeneous slopes, where {it:var2} has a different slope estimate depending on {it:var1}. Use carefully (see below!){p_end}
{synopt:{cmd:i.}{it:var1}{cmd:#}{cmd:c.}{it:var2}}absorb heterogeneous slopes, where {it:var2} has a different slope estimate depending on {it:var1}. Use carefully (see below)!{p_end}
{synopt:{it:var1}{cmd:##}{cmd:c.}{it:var2}}absorb heterogenous intercepts and slopes. Equivalent to "{cmd:i.}{it:var1} {cmd:i.}{it:var1}{cmd:#}{cmd:c.}{it:var2}", but {it:much} faster{p_end}
{synopt:{it:var1}{cmd:##c.(}{it:var2 var3}{cmd:)}}multiple heterogeneous slopes are allowed together. Alternative syntax: {it:var1}{cmd:##(c.}{it:var2} {cmd:c.}{it:var3}{cmd:)}{p_end}
{synopt:{it:v1}{cmd:#}{it:v2}{cmd:#}{it:v3}{cmd:##c.(}{it:v4 v5}{cmd:)}}factor operators can be combined{p_end}
Expand Down Expand Up @@ -215,10 +222,20 @@ are correlated within groups. Multi-way-clustering is allowed.

{pmore}
{cmd:vce(bootstrap,} {it:bsoptions}{cmd:)}} and {cmd:vce(bs,} {it:bsoptions}{cmd:)} are synonyms. They request estimation of standard errors using the non-parametric or "pairs"
bootstrap. {cmd: reghdfejl ..., ... vce(bs,} {it:bsoptions}{cmd:)} should return the same results as {cmd: bs,} {it:bsoptions}{cmd:: reghdfejl ..., ...}. More precisely, the
two should converge as the number of replications rises. But the first is faster because it avoids copying data from Stata to Julia on every replication. {it:bsoptions}
may include any of the following {help bootstrap:bootstrap options}: {opt r:obust}, {opt cl:uster()}, {opt seed()}, {opt reps()}, {opt mse}, {opt size}, {opt nodots},
{opt dots()}.
bootstrap. In expectation, {cmd: reghdfejl ..., ... vce(bs,} {it:bsoptions}{cmd:)} should return the same results as {cmd: bs,} {it:bsoptions}{cmd:: reghdfejl ..., ...}. But the
first is faster because it avoids copying data from Stata to Julia on every replication and can exploit mulitasking. {it:bsoptions}
may include any of the following suboptions: {opt r:obust}, {opt cl:uster(varname)}, {opt seed(#)}, {opt reps(#)}, {opt mse}, {opt size(#)}, and
{opt proc:s(#)}. All but the last are standard {help bootstrap:bootstrap options}. The last instructs {cmd:reghdfejl} to launch several
copies of Julia in order to run the bootstrap in parallel. The {opt proc:s(#)} suboption is semantically distinct from {cmd:reghdfejl}'s {opt threads()}
option. The latter triggers low-level multitasking: the Julia package FixedEffectModels.jl spreads certain loops across multiple threads. The former instead runs
multiple copies of Julia, each of which loads and runs FixedEffectModels.jl, on just one thread. This set-up is efficient for "embarrassingly parallel"
tasks such as bootstrapping. While the options are implemented in different ways, the principles governing the optimal number to choose are the
same. See {help jl##threads:help jl}.

{pmore}
Note that while setting the {opt seed(#)} suboption allows for exact reproducibility of results, even with the same seed, changing the {opt proc:s(#)}
suboption will (slightly) change results. The latter affects how the bootstrap is distributed across the Julia processes, each with its own
psuedorandom number stream.

{phang}
{cmdab:res:iduals[(}{help newvar}{cmd:})]} saves the regression residuals in a new variable. {opt res:iduals} without parenthesis saves them
Expand Down Expand Up @@ -305,6 +322,7 @@ the resulting variable will always be of type {it:double}.{p_end}
{phang}. {stata webuse nlswork}{p_end}
{phang}. {stata reghdfejl ln_wage age ttl_exp tenure not_smsa south, absorb(idcode year)}{p_end}
{phang}. {stata reghdfejl ln_wage age ttl_exp tenure not_smsa south, absorb(year occ_code) cluster(year occ_code)}{p_end}
{phang}. {stata reghdfejl ln_wage age ttl_exp tenure not_smsa south, absorb(year occ_code) vce(bs, cluster(occ_code) reps(1000) seed(42) procs(4))}{p_end}

{phang}. {stata partialhdfejl ln_wage age ttl_exp tenure not_smsa south, absorb(year occ_code) prefix(_) replace}{p_end}
{phang}. {stata reghdfejl _ln_wage _age _ttl_exp _tenure _not_smsa _south, cluster(year occ_code) nocons} // same point estimates as in previous regression{p_end}
Expand Down Expand Up @@ -482,9 +500,8 @@ Email: {browse "mailto:[email protected]":[email protected]}
{pstd}
More so than for most packages, in writing this one, the author stands on the shoulders of giants. {cmd:reghdfejl} is merely a wrapper
for {browse "https://www.matthieugomez.com/":Matthieu Gomez}'s {browse "https://github.com/FixedEffects/FixedEffectModels.jl":FixedEfectModels.jl},
which is itself a Julia implementation of {browse "http://scorreia.com/":Sergio Correia}'s path-breaking {help reghdfe}. {cmd:reghdfejl}'s code for
postestimation functionality is copied from {cmd:reghdfe}, as are parts of this help file. The Julia programming language is a free, open-source project
with many contributors.
which is itself an implementation---with important innovations---of {browse "http://scorreia.com/":Sergio Correia}'s path-breaking {help reghdfe}. {cmd:reghdfejl}'s code for
postestimation functionality is copied from {cmd:reghdfe}, as are parts of this help file. The Julia programming language is a free, open-source project.

{pstd}

Expand Down
14 changes: 10 additions & 4 deletions reghdfejl_load.ado
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

cap program drop reghdfejl_load
program define reghdfejl_load
local JLVERSION 0.7.2
local JLVERSION 0.8.0

if `"$reghdfejl_loaded"'=="" {
cap jl version
Expand All @@ -12,19 +12,25 @@ program define reghdfejl_load
exit 198
}
if "`r(version)'" != "`JLVERSION'" {
parse "`r(version)'", parse(".")
local v1 `1'
local v2 `3'
local v3 `5'
parse "`JLVERSION'", parse(".")
if `v1'<`1' | `v1'==`1' & `v2'<`3' | `v1'==`1' & `v2'==`3' & `v3'<`5' {
di as txt "The Stata package {cmd:julia} is not up to date. Attempting to update it with {stata ssc install julia, replace}." _n
ssc install julia, replace
}
local gpulib = cond(c(os)=="MacOSX", "Metal", "CUDA")
local blaslib = cond(c(os)=="MacOSX", "AppleAccelerate", "BLISBLAS")
jl SetEnv reghdfejl
jl AddPkg `blaslib'
jl AddPkg `gpulib'
jl AddPkg StableRNGs
jl AddPkg FixedEffectModels, minver(1.10.2)
jl AddPkg FixedEffectModels, minver(1.11.0)
jl AddPkg Vcov, minver(0.8.1)
jl, qui: using `blaslib', `gpulib', FixedEffectModels, Vcov, StableRNGs
jl, qui: using `blaslib', `gpulib', FixedEffectModels, Vcov, StableRNGs, Distributed, DataFrames
global reghdfejl_loaded 1
}
end

0 comments on commit a1d24a6

Please sign in to comment.