From 62efd328d2233f4ee7f9abe2f075bbdf3e122f95 Mon Sep 17 00:00:00 2001 From: Michael Tenetko Date: Mon, 2 Dec 2024 22:40:47 +0300 Subject: [PATCH] (fix) made changes in add_q5011_2t so that it correctly works with different date formats --- add_q5011_2t.py | 83 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/add_q5011_2t.py b/add_q5011_2t.py index 525d098..f1fc873 100644 --- a/add_q5011_2t.py +++ b/add_q5011_2t.py @@ -2,14 +2,15 @@ import pandas as pd import psycopg2 import zipfile -import datetime +from datetime import datetime from glob import glob class Q5011_2TUpdater: - Q5011_2T_DATETIME_FORMAT = "%Y/%m/%d %H:%M:%S" # 2024/09/04 07:07:06 - ISO_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" + IVDATE1_DATETIME_FORMAT = "%d.%m.%Y %H:%M:%S" # 02.05.2022 15:16:18 + ISO_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" # 2022-05-02 15:04:09 + def __init__(self): self.config = self.get_config() @@ -18,9 +19,33 @@ def get_config(self): return json.load(input_file) def get_dataframe(self, file_name): - return pd.read_excel(file_name) + df = pd.read_excel(file_name) + df = df.astype({"Q5011_2T": "str"}) + + return df + + def is_month_incorrect(self, dataframe): + first_row = dataframe.iloc[0] + ivdate = first_row["IVDate1"] + ivdate = datetime.strptime(ivdate, self.IVDATE1_DATETIME_FORMAT) + + for _, row in dataframe.iterrows(): + recruiting_date = row["Q5011_2T"] + if pd.isna(recruiting_date) or recruiting_date == "nan": + continue + + recruiting_date_month = int(recruiting_date[5:7]) + + if ivdate.month != recruiting_date_month: + return True + + else: + return False + + return False + + def update_table(self, dataframe, month_is_incorrect): - def update_table(self, dataframe): with psycopg2.connect( host=self.config["db_host"], dbname=self.config["db_name"], @@ -29,8 +54,20 @@ def update_table(self, dataframe): ) as conn: with conn.cursor() as cur: for _, row in dataframe.iterrows(): - recruiting_date = datetime.strptime(row["Q5011_2T"], self.Q5011_2T_DATETIME_FORMAT) - recruiting_date = datetime.strftime(recruiting_date, self.ISO_DATETIME_FORMAT) + recruiting_date = row["Q5011_2T"] + # 2024/09/04 07:07:06 + # 2024-09-04 07:07:06 + + ivdate1 = row["IVDate1"] + ivdate1 = datetime.strptime(ivdate1, self.IVDATE1_DATETIME_FORMAT) + ivdate1 = datetime.strftime(ivdate1, self.ISO_DATETIME_FORMAT) + + if pd.isna(recruiting_date) or recruiting_date == "nan": + recruiting_date = ivdate1 + + elif month_is_incorrect: + recruiting_date = self.make_fixed_recruiting_date(recruiting_date) + query_parameters = { "id": row["ID"], "q5011_2t": recruiting_date, @@ -45,15 +82,41 @@ def update_table(self, dataframe): ) conn.commit() + def make_fixed_recruiting_date(self, recruiting_date): + month = recruiting_date[5:7] + new_month = self.get_new_month(month) + + rd_year = recruiting_date[:4] + rd_day_and_time = recruiting_date[8:] + + new_recruiting_date = f"{rd_year}-{new_month}-{rd_day_and_time}" + + return new_recruiting_date + + def get_new_month(self, month): + month = int(month) + 1 + month = f"{month:02d}" + + return month + def run(self): for file_name in glob("./xlsx/*.zip"): print(file_name[7:]) - with zipfile.ZipFile(file_name, 'r') as zip: + with zipfile.ZipFile(file_name, "r") as zip: with zip.open(zip.namelist()[0]) as excel_file: dataframe = self.get_dataframe(excel_file) - self.update_table(dataframe) + month_is_incorrect = self.is_month_incorrect(dataframe) + self.update_table(dataframe, month_is_incorrect) + + def run_excel(self): + for file_name in glob("./xlsx/*.xlsx"): + print(file_name[7:]) + dataframe = self.get_dataframe(file_name) + month_is_incorrect = self.is_month_incorrect(dataframe) + self.update_table(dataframe, month_is_incorrect) if __name__ == "__main__": u = Q5011_2TUpdater() - u.run() \ No newline at end of file + u.run_excel() + # u.run() \ No newline at end of file