Skip to content

Commit

Permalink
Merge pull request #7 from lgabs/upgrade-pandas
Browse files Browse the repository at this point in the history
Update pandas version
  • Loading branch information
lgabs authored Oct 11, 2023
2 parents 421fa90 + 321ba13 commit a4ca5d7
Show file tree
Hide file tree
Showing 12 changed files with 60 additions and 38 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Python application test

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10']

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run tests
run: |
python -m unittest discover tests
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ install-lib:
python setup.py bdist_wheel
pip3 install --force-reinstall dist/**.whl

tests:
test:
python -m unittest discover

lint:
Expand Down
3 changes: 0 additions & 3 deletions janus/stats/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def calculate_probabilities(self, other_variant, metric_name: str):
def calculate_bootstrap(
self, metric_name: str, eval_function=get_bootstraped_mean, spark_session=None
):

if metric_name == "revenue":
data = list(self.variant_df.query("sales == 1")["revenue"])
elif metric_name == "conversion":
Expand Down Expand Up @@ -143,7 +142,6 @@ def __init__(
self.results = {}

def run_experiment(self, df_results_per_user: pd.DataFrame):

logging.info(f"INITIALIZING experiment '{self.name}' evaluation...")
variant_names = list(df_results_per_user.alternative.unique())
assert (
Expand Down Expand Up @@ -201,7 +199,6 @@ def evaluate_statistics(
variant1.calculate_probabilities(variant2, metric_name)

def consolidate_results(self, variantA: Variant, variantB: Variant):

for variant_interest, variant_other in zip(
(variantA, variantB), (variantB, variantA)
):
Expand Down
1 change: 0 additions & 1 deletion janus/utils/make_test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def createTestDataFrame(
average_ticket_baseline: float,
average_ticket_alternative: float,
):

# testes antes de começar função:
# TODO: 1. nenhum parâmetro nulo
# TODO: 2. todos os tipos estão certos
Expand Down
22 changes: 10 additions & 12 deletions pages/1_Analyze_with_Summary_Information.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@
value=100,
)
control_total_value = st.number_input(
label="Total Conversion Value in Control",
value=200.0,
step=1.,
format="%.4f"
label="Total Conversion Value in Control", value=200.0, step=1.0, format="%.4f"
)

# Treatment
Expand All @@ -58,11 +55,13 @@
test_total_value = st.number_input(
label="Total Conversion Value in Treatment",
value=250.00,
step=1.,
format="%.4f"
step=1.0,
format="%.4f",
)

experiment_name = st.text_input(label='Experiment Name (Optional)', value='My Experiment')
experiment_name = st.text_input(
label="Experiment Name (Optional)", value="My Experiment"
)

submit_button = st.form_submit_button(label="Run Experiment")

Expand All @@ -83,7 +82,7 @@
df_per_user_simulated = create_per_user_dataframe_multivariate(
df, conversion_value_cols=conversion_value_cols
)

# st.write("df")
# st.dataframe(df)
# st.write("df_per_user_simulated")
Expand All @@ -95,16 +94,16 @@
# TODO: generalize this code for all pages and generalize
# lib's revenue col to monetary values
df_per_user_simulated = df_per_user_simulated.rename(
columns={"converted": "sales", "total_value": 'revenue'}
columns={"converted": "sales", "total_value": "revenue"}
) # hacking, sales are generic conversions in janus lib
experiment = Experiment(
name=experiment_name,
keymetrics=["conversion", "revenue", "arpu"],
baseline_variant_name='control',
baseline_variant_name="control",
)
experiment.run_experiment(df_results_per_user=df_per_user_simulated)
save_results_in_session_state(
experiment, control_label='control', treatment_label='treatment'
experiment, control_label="control", treatment_label="treatment"
)

# Show Results in dataframe form v0
Expand All @@ -120,4 +119,3 @@

st.write("### Treatment")
st.dataframe(data=pd.DataFrame.from_dict(st.session_state.treatment_stats))

9 changes: 8 additions & 1 deletion pages/2_Analyze_with_Summary_CSV.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,14 @@
options=[
c
for c in df.columns
if c not in [label_values, "sales", "exposure_period", "exposures", "conversions"]
if c
not in [
label_values,
"sales",
"exposure_period",
"exposures",
"conversions",
]
],
help="Select which column refers to the value that comes from conversions, e.g.: revenue or cost from conversons.",
)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pandas==1.2.5
pandas==1.4.4
numpy==1.25.1
scipy==1.11.1
streamlit==1.13.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
author="Luan Fernandes",
license="",
install_requires=[
"pandas==1.2.5",
"pandas==1.4.4",
"numpy==1.25.1",
"scipy==1.11.1",
],
Expand Down
13 changes: 0 additions & 13 deletions tests/test_create_per_user_dataframe_multivariate.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_run_experiment_one_variant(self):
}
experiment = Experiment(**args)

df_results_per_user = pd.read_csv("tests/results_per_user.csv")
df_results_per_user = pd.read_csv("examples/results_per_user.csv")
with self.assertRaises(AssertionError):
experiment.run_experiment(df_results_per_user=df_results_per_user)

Expand Down
3 changes: 1 addition & 2 deletions tests/test_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def test_init(self):
self.assertEqual(variant.name, "A")

def test_consolidate_results(self):

results_per_user = pd.read_csv("tests/results_per_user.csv")
results_per_user = pd.read_csv("examples/results_per_user.csv")
variant_results = {
"A": {
"users": 6,
Expand Down
7 changes: 5 additions & 2 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ def save_results_in_session_state(experiment, control_label, treatment_label):
experiment.results[control_label]["statistics"]
)


def explain_metrics():
st.write("""
st.write(
"""
- **chance_to_beat**: chance that the variant is better than the other.
- **expected_loss**: a measure of the risk you're assuming if you stay with this variant. The lower the risk, the best (e.g: 0.10 in conversion means that your risk of staying with the variant compared to the other is to lose 10% p.p. 0.10 for arpu is a risk of loosing $0.10 per user.)
- **lift**: the observed relative difference compared to the other variant for each metric.
- **diff**: the observed absolute difference compared to the other variant for each metric.
""")
"""
)

0 comments on commit a4ca5d7

Please sign in to comment.