Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pandas version #7

Merged
merged 6 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Python application test

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10']

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt

- name: Run tests
run: |
python -m unittest discover tests
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ install-lib:
python setup.py bdist_wheel
pip3 install --force-reinstall dist/**.whl

tests:
test:
python -m unittest discover

lint:
Expand Down
3 changes: 0 additions & 3 deletions janus/stats/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def calculate_probabilities(self, other_variant, metric_name: str):
def calculate_bootstrap(
self, metric_name: str, eval_function=get_bootstraped_mean, spark_session=None
):

if metric_name == "revenue":
data = list(self.variant_df.query("sales == 1")["revenue"])
elif metric_name == "conversion":
Expand Down Expand Up @@ -143,7 +142,6 @@ def __init__(
self.results = {}

def run_experiment(self, df_results_per_user: pd.DataFrame):

logging.info(f"INITIALIZING experiment '{self.name}' evaluation...")
variant_names = list(df_results_per_user.alternative.unique())
assert (
Expand Down Expand Up @@ -201,7 +199,6 @@ def evaluate_statistics(
variant1.calculate_probabilities(variant2, metric_name)

def consolidate_results(self, variantA: Variant, variantB: Variant):

for variant_interest, variant_other in zip(
(variantA, variantB), (variantB, variantA)
):
Expand Down
1 change: 0 additions & 1 deletion janus/utils/make_test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def createTestDataFrame(
average_ticket_baseline: float,
average_ticket_alternative: float,
):

# testes antes de começar função:
# TODO: 1. nenhum parâmetro nulo
# TODO: 2. todos os tipos estão certos
Expand Down
22 changes: 10 additions & 12 deletions pages/1_Analyze_with_Summary_Information.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@
value=100,
)
control_total_value = st.number_input(
label="Total Conversion Value in Control",
value=200.0,
step=1.,
format="%.4f"
label="Total Conversion Value in Control", value=200.0, step=1.0, format="%.4f"
)

# Treatment
Expand All @@ -58,11 +55,13 @@
test_total_value = st.number_input(
label="Total Conversion Value in Treatment",
value=250.00,
step=1.,
format="%.4f"
step=1.0,
format="%.4f",
)

experiment_name = st.text_input(label='Experiment Name (Optional)', value='My Experiment')
experiment_name = st.text_input(
label="Experiment Name (Optional)", value="My Experiment"
)

submit_button = st.form_submit_button(label="Run Experiment")

Expand All @@ -83,7 +82,7 @@
df_per_user_simulated = create_per_user_dataframe_multivariate(
df, conversion_value_cols=conversion_value_cols
)

# st.write("df")
# st.dataframe(df)
# st.write("df_per_user_simulated")
Expand All @@ -95,16 +94,16 @@
# TODO: generalize this code for all pages and generalize
# lib's revenue col to monetary values
df_per_user_simulated = df_per_user_simulated.rename(
columns={"converted": "sales", "total_value": 'revenue'}
columns={"converted": "sales", "total_value": "revenue"}
) # hacking, sales are generic conversions in janus lib
experiment = Experiment(
name=experiment_name,
keymetrics=["conversion", "revenue", "arpu"],
baseline_variant_name='control',
baseline_variant_name="control",
)
experiment.run_experiment(df_results_per_user=df_per_user_simulated)
save_results_in_session_state(
experiment, control_label='control', treatment_label='treatment'
experiment, control_label="control", treatment_label="treatment"
)

# Show Results in dataframe form v0
Expand All @@ -120,4 +119,3 @@

st.write("### Treatment")
st.dataframe(data=pd.DataFrame.from_dict(st.session_state.treatment_stats))

9 changes: 8 additions & 1 deletion pages/2_Analyze_with_Summary_CSV.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,14 @@
options=[
c
for c in df.columns
if c not in [label_values, "sales", "exposure_period", "exposures", "conversions"]
if c
not in [
label_values,
"sales",
"exposure_period",
"exposures",
"conversions",
]
],
help="Select which column refers to the value that comes from conversions, e.g.: revenue or cost from conversons.",
)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pandas==1.2.5
pandas==1.4.4
numpy==1.25.1
scipy==1.11.1
streamlit==1.13.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
author="Luan Fernandes",
license="",
install_requires=[
"pandas==1.2.5",
"pandas==1.4.4",
"numpy==1.25.1",
"scipy==1.11.1",
],
Expand Down
13 changes: 0 additions & 13 deletions tests/test_create_per_user_dataframe_multivariate.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_run_experiment_one_variant(self):
}
experiment = Experiment(**args)

df_results_per_user = pd.read_csv("tests/results_per_user.csv")
df_results_per_user = pd.read_csv("examples/results_per_user.csv")
with self.assertRaises(AssertionError):
experiment.run_experiment(df_results_per_user=df_results_per_user)

Expand Down
3 changes: 1 addition & 2 deletions tests/test_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def test_init(self):
self.assertEqual(variant.name, "A")

def test_consolidate_results(self):

results_per_user = pd.read_csv("tests/results_per_user.csv")
results_per_user = pd.read_csv("examples/results_per_user.csv")
variant_results = {
"A": {
"users": 6,
Expand Down
7 changes: 5 additions & 2 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ def save_results_in_session_state(experiment, control_label, treatment_label):
experiment.results[control_label]["statistics"]
)


def explain_metrics():
st.write("""
st.write(
"""
- **chance_to_beat**: chance that the variant is better than the other.
- **expected_loss**: a measure of the risk you're assuming if you stay with this variant. The lower the risk, the best (e.g: 0.10 in conversion means that your risk of staying with the variant compared to the other is to lose 10% p.p. 0.10 for arpu is a risk of loosing $0.10 per user.)
- **lift**: the observed relative difference compared to the other variant for each metric.
- **diff**: the observed absolute difference compared to the other variant for each metric.
""")
"""
)
Loading