Skip to content

Commit

Permalink
Adds sequential pareto - lognormal estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
john-p-ryan committed Feb 20, 2024
1 parent 14417ca commit 4768e5f
Showing 1 changed file with 37 additions and 2 deletions.
39 changes: 37 additions & 2 deletions iot/inverse_optimal_tax.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def compute_mtr_dist(
)
binned_data = pd.DataFrame(
data[["mtr", income_measure, "z_bin", weight_var]]
.groupby(["z_bin"])
.groupby(["z_bin"], observed=False)
.apply(lambda x: wm(x[["mtr", income_measure]], x[weight_var]))
)
# make column 0 into two columns
Expand Down Expand Up @@ -228,7 +228,7 @@ def compute_income_dist(
/ (z_line**2 * sigma**3 * np.sqrt(2 * np.pi))
)
)
elif dist_type == "log_normal_pareto":
elif dist_type == "log_normal_pareto_mle":
# lognormal before cutoff, pareto after
def lognormal_pareto_pdf(x, mu, sigma, b, cutoff):
# for continuity at cutoff
Expand Down Expand Up @@ -276,6 +276,11 @@ def weighted_log_likelihood(params):
mu, sigma, b = scipy.optimize.minimize(weighted_log_likelihood, params_guess, method='Nelder-Mead').x
cutoff = pareto_cutoff

# assert that mu, sigma, b, and cutoff are all positive
assert mu > 0 and sigma > 0 and b > 0 and cutoff > 0
# assert that sigma, b and cutoff not too large
assert sigma < 25 and b < 25 and cutoff < 1_000_000

c = st.lognorm.pdf(cutoff, sigma, scale=np.exp(mu)) * cutoff / b
integral = st.lognorm.cdf(cutoff, sigma, scale=np.exp(mu)) + c

Expand All @@ -289,6 +294,36 @@ def weighted_log_likelihood(params):
/ (z_line**2 * sigma**3 * np.sqrt(2 * np.pi))
),
(-c * b * (b+1) * cutoff**b / z_line**(b+2))) / integral
elif dist_type == "log_normal_pareto_sequential":
# requires a cutoff
cutoff = pareto_cutoff
data_pareto = data[data[income_measure] > pareto_cutoff]
b = sum(data_pareto[weight_var]) / sum(data_pareto[weight_var] * np.log(data_pareto[income_measure] / pareto_cutoff))
# for continuity at cutoff
mu = (
np.log(data[income_measure]) * data[weight_var]
).sum() / data[weight_var].sum()
sigma = np.sqrt((
(
((np.log(data[income_measure]) - mu) ** 2)
* data[weight_var]
).values
/ data[weight_var].sum()
).sum())

c = st.lognorm.pdf(cutoff, sigma, scale=np.exp(mu)) * cutoff / b
# for renormalization, derived analytically
integral = st.lognorm.cdf(cutoff, sigma, scale=np.exp(mu)) + c
f = np.where(z_line < cutoff, st.lognorm.pdf(z_line, sigma, scale=np.exp(mu)), c*b*cutoff**b / z_line**(b+1)) / integral
F = np.where(z_line <= cutoff, st.lognorm.cdf(z_line, sigma, scale=np.exp(mu)),
st.lognorm.cdf(cutoff, sigma, scale=np.exp(mu)) + c*(1 - (cutoff / z_line)**b)) / integral
f_prime = np.where(z_line <= cutoff, -1 *
np.exp(-((np.log(z_line) - mu) ** 2) / (2 * sigma**2))
* (
(np.log(z_line) + sigma**2 - mu)
/ (z_line**2 * sigma**3 * np.sqrt(2 * np.pi))
),
(-c * b * (b+1) * cutoff**b / z_line**(b+2))) / integral
elif dist_type == "kde":
# uses the original full data for kde estimation
f_function = st.gaussian_kde(
Expand Down

0 comments on commit 4768e5f

Please sign in to comment.