Skip to content

Commit

Permalink
Use correct absolute_exposure values
Browse files Browse the repository at this point in the history
  • Loading branch information
danielbachhuber committed Dec 13, 2024
1 parent 7dfc17b commit 9f43ba7
Showing 1 changed file with 104 additions and 24 deletions.
128 changes: 104 additions & 24 deletions posthog/hogql_queries/experiments/test/test_trends_statistics_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
from posthog.test.base import APIBaseTest


def create_variant(key: str, count: int, exposure: int) -> ExperimentVariantTrendsBaseStats:
return ExperimentVariantTrendsBaseStats(key=key, count=count, exposure=exposure, absolute_exposure=exposure)
def create_variant(key: str, count: int, exposure: float, absolute_exposure: int) -> ExperimentVariantTrendsBaseStats:
return ExperimentVariantTrendsBaseStats(
key=key, count=count, exposure=exposure, absolute_exposure=absolute_exposure
)


def create_variant_with_different_exposures(
Expand Down Expand Up @@ -50,8 +52,15 @@ def test_small_sample_two_variants_not_significant(self):
"""Test with small sample size, two variants, no clear winner"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=10, exposure=100)
test = create_variant("test", count=11, exposure=100)
control_absolute_exposure = 100
control = create_variant("control", count=10, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 100
test = create_variant(
"test",
count=11,
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, p_value = are_results_significant(control, [test], probabilities)
Expand All @@ -77,8 +86,15 @@ def test_large_sample_two_variants_significant(self):
"""Test with large sample size, two variants, clear winner"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=1000, exposure=10000)
test = create_variant("test", count=1200, exposure=10000)
control_absolute_exposure = 10000
control = create_variant("control", count=1000, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 10000
test = create_variant(
"test",
count=1200,
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, p_value = are_results_significant(control, [test], probabilities)
Expand Down Expand Up @@ -107,8 +123,15 @@ def test_large_sample_two_variants_strongly_significant(self):
"""Test with large sample size, two variants, very clear winner"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=1000, exposure=10000)
test = create_variant("test", count=1500, exposure=10000)
control_absolute_exposure = 10000
control = create_variant("control", count=1000, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 10000
test = create_variant(
"test",
count=1500,
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, p_value = are_results_significant(control, [test], probabilities)
Expand Down Expand Up @@ -137,10 +160,29 @@ def test_many_variants_not_significant(self):
"""Test with multiple variants, no clear winner"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=100, exposure=1000)
test_a = create_variant("test_a", count=98, exposure=1000)
test_b = create_variant("test_b", count=102, exposure=1000)
test_c = create_variant("test_c", count=101, exposure=1000)
control_absolute_exposure = 1000
control = create_variant("control", count=100, exposure=1, absolute_exposure=control_absolute_exposure)
test_a_absolute_exposure = 1000
test_a = create_variant(
"test_a",
count=98,
exposure=test_a_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_a_absolute_exposure,
)
test_b_absolute_exposure = 1000
test_b = create_variant(
"test_b",
count=102,
exposure=test_b_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_b_absolute_exposure,
)
test_c_absolute_exposure = 1000
test_c = create_variant(
"test_c",
count=101,
exposure=test_c_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_c_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test_a, test_b, test_c])
significance, p_value = are_results_significant(control, [test_a, test_b, test_c], probabilities)
Expand Down Expand Up @@ -170,10 +212,29 @@ def test_many_variants_significant(self):
"""Test with multiple variants, one clear winner"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=1000, exposure=10000)
test_a = create_variant("test_a", count=1050, exposure=10000)
test_b = create_variant("test_b", count=1500, exposure=10000)
test_c = create_variant("test_c", count=1100, exposure=10000)
control_absolute_exposure = 10000
control = create_variant("control", count=1000, exposure=1, absolute_exposure=control_absolute_exposure)
test_a_absolute_exposure = 10000
test_a = create_variant(
"test_a",
count=1050,
exposure=test_a_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_a_absolute_exposure,
)
test_b_absolute_exposure = 10000
test_b = create_variant(
"test_b",
count=1500,
exposure=test_b_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_b_absolute_exposure,
)
test_c_absolute_exposure = 10000
test_c = create_variant(
"test_c",
count=1100,
exposure=test_c_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_c_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test_a, test_b, test_c])
significance, p_value = are_results_significant(control, [test_a, test_b, test_c], probabilities)
Expand Down Expand Up @@ -211,8 +272,15 @@ def test_insufficient_sample_size(self):
"""Test with sample size below threshold"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=5, exposure=50)
test = create_variant("test", count=8, exposure=50)
control_absolute_exposure = 50
control = create_variant("control", count=5, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 50
test = create_variant(
"test",
count=8,
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, p_value = are_results_significant(control, [test], probabilities)
Expand Down Expand Up @@ -241,8 +309,15 @@ def test_edge_cases(self):
"""Test edge cases like zero counts"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control = create_variant("control", count=0, exposure=1000)
test = create_variant("test", count=0, exposure=1000)
control_absolute_exposure = 1000
control = create_variant("control", count=0, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 1000
test = create_variant(
"test",
count=0,
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, p_value = are_results_significant(control, [test], probabilities)
Expand All @@ -268,11 +343,16 @@ def test_different_relative_and_absolute_exposure(self):

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
# Control has exposure=1 (relative) but absolute_exposure=10000
control = create_variant_with_different_exposures(
"control", count=1000, exposure=1, absolute_exposure=10000
)
control_absolute_exposure = 10000
control = create_variant("control", count=1000, exposure=1, absolute_exposure=control_absolute_exposure)
# Test has exposure=1.2 (relative) but absolute_exposure=12000
test = create_variant_with_different_exposures("test", count=1200, exposure=1.2, absolute_exposure=12000)
test_absolute_exposure = 12000
test = create_variant(
"test",
count=1200,
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, p_value = are_results_significant(control, [test], probabilities)
Expand Down

0 comments on commit 9f43ba7

Please sign in to comment.