From e5be0e7f554a62b8821da7a47427f06b851d2a5f Mon Sep 17 00:00:00 2001 From: Ben Callen Date: Wed, 19 Feb 2020 16:51:11 +1100 Subject: [PATCH] Amended program to include timeout mechanism - Relates to #69 --- process_sc13dg_indexes.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/process_sc13dg_indexes.py b/process_sc13dg_indexes.py index d79f1bb..3a8cbf6 100644 --- a/process_sc13dg_indexes.py +++ b/process_sc13dg_indexes.py @@ -6,6 +6,8 @@ from multiprocess import Pool import datetime as dt +function_timeout = 60 + engine = create_engine(conn_string) directory = os.getenv("EDGAR_DIR") @@ -32,16 +34,16 @@ end = (i + 1) * batch_size - success = pd.Series(p.map(lambda i: write_indexes_to_table(full_df.loc[i, 'file_name'], full_df.loc[i, 'document'], full_df.loc[i, 'form_type'], directory, engine) , range(start, end))) + success = pd.Series(p.map(lambda i: write_indexes_to_table(full_df.loc[i, 'file_name'], full_df.loc[i, 'document'],\ + full_df.loc[i, 'form_type'], directory, engine, function_timeout) , range(start, end))) time_now = dt.datetime.now() time_taken = time_now - start_time num_success = num_success + success.sum() if(i % 50 == 0 or i == num_batches - 1): - print(num_success + ' filings successfully process from ' + end) - print('Time taken: ' + time_taken) + print(str(num_success) + ' filings successfully process from ' + str(end)) + print('Time taken: ' + str(time_taken)) p.close() -