Skip to content

Commit

Permalink
final submission 12/5
Browse files Browse the repository at this point in the history
  • Loading branch information
apiraccini committed May 12, 2024
1 parent 93e12cb commit f6a91af
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
Binary file modified model.joblib
Binary file not shown.
19 changes: 19 additions & 0 deletions submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,19 @@ def clean_df(df, background_df=None):
# parents
'cf20m005', # num # year of birth father 2020
'cf20m009', # num # year of birth mother 2020

#gynaecologist
'ch20m219',

# question about father and mother
'cf20m007',
'cf20m008',
'cf20m011',
'cf20m012',
'cf20m013',
'cf20m014',
'cf20m015',
'cf20m016'
]
golden_features_df = df[golden_features + ['nomem_encr']]

Expand Down Expand Up @@ -257,6 +270,8 @@ def process_background_df(background_df, train_df, wave_filter='201601'):
'sted': [f_map_urban_type], # urban character of place of residence
'belbezig': [f_map_occupation_type], # primary occupation
'brutohh_f': [f_actual, f_med, f_std], # gross household income in Euros
'nettohh_f': [f_actual, f_med, f_std], # gross household income in Euros

})

out.columns = out.columns.map('_'.join).str.strip('_')
Expand All @@ -278,6 +293,9 @@ def process_background_df(background_df, train_df, wave_filter='201601'):
'brutohh_f_<lambda_0>': 'actual_household_gross_monthly_income_qt',
'brutohh_f_<lambda_1>': 'actual_household_gross_monthly_income_med_qt',
'brutohh_f_<lambda_2>': 'actual_household_gross_monthly_income_std_qt',
'nettohh_f_<lambda_0>': 'actual_household_net_monthly_income_qt',
'nettohh_f_<lambda_1>': 'actual_household_net_monthly_income_med_qt',
'nettohh_f_<lambda_2>': 'actual_household_net_monthly_income_std_qt',
})

return out
Expand All @@ -287,6 +305,7 @@ def personality_bigfive(train_df):
pattern = r'^cp.*0[2-6][0-9]$'

codebook_df = pd.read_csv('PreFer_codebook.csv', low_memory=False)
codebook_df.head()
codebook_df_personality = codebook_df['var_name'][(codebook_df['survey'] == "Personality") & (codebook_df['year'] == 2020)]
train_personality = train_df[codebook_df_personality]

Expand Down
2 changes: 1 addition & 1 deletion training.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def train_save_model(cleaned_df: pd.DataFrame, outcome_df: pd.DataFrame, evaluat
parent_proj_dir = proj_dir.parent
data_dir = parent_proj_dir / 'prefer_data'

evaluate = True
evaluate = False

# import data
print('Loading data...')
Expand Down

0 comments on commit f6a91af

Please sign in to comment.