Skip to content

Commit

Permalink
Use a standard LOG_VARIANCE between methods
Browse files Browse the repository at this point in the history
  • Loading branch information
danielbachhuber committed Dec 13, 2024
1 parent 3d2e24c commit 2ea703d
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,18 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca

self.assertEqual(len(probabilities), 2)
if stats_version == 2:
self.assertAlmostEqual(probabilities[0], 0.5, delta=0.1)
self.assertAlmostEqual(probabilities[1], 0.5, delta=0.1)
self.assertAlmostEqual(probabilities[0], 0.4, delta=0.1)
self.assertAlmostEqual(probabilities[1], 0.6, delta=0.1)
self.assertEqual(significance, ExperimentSignificanceCode.LOW_WIN_PROBABILITY)
self.assertEqual(p_value, 1)

# Control: ~$100 mean with wide interval due to small sample
self.assertAlmostEqual(intervals["control"][0], 85, delta=5) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 110, delta=5) # Upper bound
self.assertAlmostEqual(intervals["control"][0], 72, delta=5) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 128, delta=5) # Upper bound

# Test: ~$105 mean with wide interval due to small sample
self.assertAlmostEqual(intervals["test"][0], 90, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test"][1], 115, delta=5) # Upper bound
self.assertAlmostEqual(intervals["test"][0], 75, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test"][1], 130, delta=5) # Upper bound
else:
# Original implementation behavior for small sample
self.assertAlmostEqual(probabilities[0], 0.5, delta=0.2)
Expand Down Expand Up @@ -111,12 +111,12 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca
self.assertEqual(p_value, 0)

# Control: $100 mean with narrow interval due to large sample
self.assertAlmostEqual(intervals["control"][0], 100, delta=2) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 100, delta=2) # Upper bound
self.assertAlmostEqual(intervals["control"][0], 97, delta=2) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 103, delta=2) # Upper bound

# Test: $120 mean with narrow interval due to large sample
self.assertAlmostEqual(intervals["test"][0], 120, delta=2) # Lower bound
self.assertAlmostEqual(intervals["test"][1], 120, delta=2) # Upper bound
self.assertAlmostEqual(intervals["test"][0], 116, delta=2) # Lower bound
self.assertAlmostEqual(intervals["test"][1], 124, delta=2) # Upper bound
else:
# Original implementation behavior for large sample
self.assertAlmostEqual(probabilities[1], 0.75, delta=0.25)
Expand Down Expand Up @@ -160,12 +160,12 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca
self.assertEqual(p_value, 0)

# Control: $100 mean
self.assertAlmostEqual(intervals["control"][0], 100, delta=2) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 100, delta=2) # Upper bound
self.assertAlmostEqual(intervals["control"][0], 97, delta=2) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 103, delta=2) # Upper bound

# Test: $150 mean, clearly higher than control
self.assertAlmostEqual(intervals["test"][0], 150, delta=3) # Lower bound
self.assertAlmostEqual(intervals["test"][1], 150, delta=3) # Upper bound
self.assertAlmostEqual(intervals["test"][0], 146, delta=3) # Lower bound
self.assertAlmostEqual(intervals["test"][1], 154, delta=3) # Upper bound
else:
# Original implementation behavior for strongly significant case
self.assertTrue(probabilities[1] > 0.5) # Test variant winning
Expand Down Expand Up @@ -219,20 +219,20 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca

# All variants around $100 with overlapping intervals
# Control variant
self.assertAlmostEqual(intervals["control"][0], 95, delta=5) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 105, delta=5) # Upper bound
self.assertAlmostEqual(intervals["control"][0], 90, delta=5) # Lower bound
self.assertAlmostEqual(intervals["control"][1], 110, delta=5) # Upper bound

# Test A variant
self.assertAlmostEqual(intervals["test_a"][0], 95, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test_a"][1], 105, delta=5) # Upper bound
self.assertAlmostEqual(intervals["test_a"][0], 90, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test_a"][1], 110, delta=5) # Upper bound

# Test B variant
self.assertAlmostEqual(intervals["test_b"][0], 95, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test_b"][1], 105, delta=5) # Upper bound
self.assertAlmostEqual(intervals["test_b"][0], 90, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test_b"][1], 110, delta=5) # Upper bound

# Test C variant
self.assertAlmostEqual(intervals["test_c"][0], 95, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test_c"][1], 105, delta=5) # Upper bound
self.assertAlmostEqual(intervals["test_c"][0], 90, delta=5) # Lower bound
self.assertAlmostEqual(intervals["test_c"][1], 110, delta=5) # Upper bound
else:
# Original implementation behavior for multiple variants with no clear winner
self.assertTrue(all(0.1 < p < 0.9 for p in probabilities))
Expand Down Expand Up @@ -299,20 +299,20 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca
self.assertEqual(p_value, 0)

# Control at $100
self.assertAlmostEqual(intervals["control"][0], 100, delta=2)
self.assertAlmostEqual(intervals["control"][1], 100, delta=2)
self.assertAlmostEqual(intervals["control"][0], 97, delta=1)
self.assertAlmostEqual(intervals["control"][1], 103, delta=1)

# Test A slightly higher at $105
self.assertAlmostEqual(intervals["test_a"][0], 105, delta=2)
self.assertAlmostEqual(intervals["test_a"][1], 105, delta=2)
self.assertAlmostEqual(intervals["test_a"][0], 102, delta=1)
self.assertAlmostEqual(intervals["test_a"][1], 108, delta=1)

# Test B clearly winning at $150
self.assertAlmostEqual(intervals["test_b"][0], 150, delta=3)
self.assertAlmostEqual(intervals["test_b"][1], 150, delta=3)
self.assertAlmostEqual(intervals["test_b"][0], 146, delta=1)
self.assertAlmostEqual(intervals["test_b"][1], 154, delta=1)

# Test C slightly higher at $110
self.assertAlmostEqual(intervals["test_c"][0], 110, delta=2)
self.assertAlmostEqual(intervals["test_c"][1], 110, delta=2)
self.assertAlmostEqual(intervals["test_c"][0], 106, delta=1)
self.assertAlmostEqual(intervals["test_c"][1], 114, delta=1)
else:
# Original implementation behavior for multiple variants with clear winner
self.assertTrue(probabilities[2] > 0.5) # test_b should be winning
Expand Down Expand Up @@ -353,11 +353,11 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca
self.assertEqual(p_value, 1.0)

# Both variants should have wide intervals due to small sample size
self.assertAlmostEqual(intervals["control"][0], 80, delta=10)
self.assertAlmostEqual(intervals["control"][1], 110, delta=10)
self.assertAlmostEqual(intervals["control"][0], 62, delta=10)
self.assertAlmostEqual(intervals["control"][1], 138, delta=10)

self.assertAlmostEqual(intervals["test"][0], 95, delta=10)
self.assertAlmostEqual(intervals["test"][1], 125, delta=10)
self.assertAlmostEqual(intervals["test"][0], 75, delta=10)
self.assertAlmostEqual(intervals["test"][1], 160, delta=10)
else:
# Original implementation behavior for insufficient sample size
self.assertAlmostEqual(probabilities[0], 0.075, delta=0.025)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
ALPHA_0 = 1.0 # Prior shape for variance
BETA_0 = 1.0 # Prior scale for variance

LOG_VARIANCE = 2

SAMPLE_SIZE = 10000
EPSILON = 1e-10 # Small epsilon value to handle zeros

Expand Down Expand Up @@ -53,13 +55,12 @@ def calculate_probabilities_v2_continuous(

# Calculate posterior parameters for control
log_control_mean = np.log(control_variant.count + EPSILON) # Using count field to store mean value
log_variance = 2 # Assumed variance in log-space

# Update parameters for control
kappa_n_control = KAPPA_0 + control_variant.absolute_exposure
mu_n_control = (KAPPA_0 * MU_0 + control_variant.absolute_exposure * log_control_mean) / kappa_n_control
alpha_n_control = ALPHA_0 + control_variant.absolute_exposure / 2
beta_n_control = BETA_0 + 0.5 * control_variant.absolute_exposure * log_variance
beta_n_control = BETA_0 + 0.5 * control_variant.absolute_exposure * LOG_VARIANCE

# Draw samples from control posterior
control_posterior = t(
Expand All @@ -75,7 +76,7 @@ def calculate_probabilities_v2_continuous(
kappa_n_test = KAPPA_0 + test.absolute_exposure
mu_n_test = (KAPPA_0 * MU_0 + test.absolute_exposure * log_test_mean) / kappa_n_test
alpha_n_test = ALPHA_0 + test.absolute_exposure / 2
beta_n_test = BETA_0 + 0.5 * test.absolute_exposure * log_variance
beta_n_test = BETA_0 + 0.5 * test.absolute_exposure * LOG_VARIANCE

test_posterior = t(
df=2 * alpha_n_test, loc=mu_n_test, scale=np.sqrt(beta_n_test / (kappa_n_test * alpha_n_test))
Expand Down Expand Up @@ -166,13 +167,12 @@ def calculate_credible_intervals_v2_continuous(variants, lower_bound=0.025, uppe
try:
# Log-transform the mean value, adding epsilon to handle zeros
log_mean = np.log(variant.count + EPSILON) # Using count field to store mean value
log_variance = 0.25

# Calculate posterior parameters using absolute_exposure
kappa_n = KAPPA_0 + variant.absolute_exposure
mu_n = (KAPPA_0 * MU_0 + variant.absolute_exposure * log_mean) / kappa_n
alpha_n = ALPHA_0 + variant.absolute_exposure / 2
beta_n = BETA_0 + 0.5 * variant.absolute_exposure * log_variance
beta_n = BETA_0 + 0.5 * variant.absolute_exposure * LOG_VARIANCE

# Create posterior distribution
posterior = t(df=2 * alpha_n, loc=mu_n, scale=np.sqrt(beta_n / (kappa_n * alpha_n)))
Expand Down

0 comments on commit 2ea703d

Please sign in to comment.