From 42f4689fdf8799bc67624ffea14edb9e3dbde2f6 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 31 Oct 2024 06:58:58 +0000 Subject: [PATCH] update backup scheduler for Prresutls DB --- common_utils.py | 4 ++++ eventHandler.py | 33 +++++++++++++++++++++++++++++++-- html_fetching.py | 2 +- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/common_utils.py b/common_utils.py index 2aaf182..35ee0ca 100644 --- a/common_utils.py +++ b/common_utils.py @@ -156,6 +156,10 @@ def analyze_pagepile_processing(): print("No processing records found for Pagepile QIDs") return None +#Data extraction for time complexity analysis +def time_complexity_analysis(): + pass + if __name__ == "__main__": processed_date = "2024-09-06" #pagePile_results_extraction(processed_date) diff --git a/eventHandler.py b/eventHandler.py index e428fdc..b15859a 100644 --- a/eventHandler.py +++ b/eventHandler.py @@ -292,6 +292,31 @@ def update_prior_item_list(): print("prior_item_list.csv has been successfully updated.") +def backup_database(): + """ + Backup the reference_checked.db file to the specified HPC directory + with date prefix in format YYYYMMDD_reference_checked.db + """ + try: + # Source database path + source_db = 'reference_checked.db' + + # Create backup directory if it doesn't exist + backup_dir = '/hpc/scratch/prj/inf_wqp/prove_backup' + os.makedirs(backup_dir, exist_ok=True) + + # Generate backup filename with date prefix + date_prefix = datetime.datetime.now().strftime('%Y%m%d') + backup_filename = f"{date_prefix}_reference_checked.db" + backup_path = os.path.join(backup_dir, backup_filename) + + # Copy the database file + import shutil + shutil.copy2(source_db, backup_path) + print(f"Database backup created successfully at {backup_path}") + + except Exception as e: + print(f"Error during database backup: {e}") def main(batch_qids): reset_database = False # Developer mode to test, it initialize db for getting clean db @@ -303,15 +328,19 @@ def main(batch_qids): print(f"Database file {db_path} has been deleted.") initialize_database(db_path) + + # Schedule both tasks for Monday schedule.every().monday.do(update_prior_item_list) + schedule.every().monday.do(backup_database) + while True: try: prove_process(db_path, batch_qids, algo_version) schedule.run_pending() except Exception as e: print(f"An error occurred in the main loop: {e}") - time.sleep(30) - + time.sleep(30) + if __name__ == "__main__": batch_qids = 2 diff --git a/html_fetching.py b/html_fetching.py index d67fd42..b73723a 100644 --- a/html_fetching.py +++ b/html_fetching.py @@ -270,7 +270,7 @@ def reading_html_by_requests(self, url: str) -> None: def reading_html_by_chrome(self, driver, url: str) -> None: try: - response = requests.get(url, timeout=5) + response = requests.get(url, timeout=15) if response.status_code == 200: driver.get(url)