Skip to content

Commit

Permalink
Amended program to include timeout mechanism
Browse files Browse the repository at this point in the history
- Relates to #69
  • Loading branch information
bdcallen committed Feb 19, 2020
1 parent 8d850bb commit e5be0e7
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions process_sc13dg_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from multiprocess import Pool
import datetime as dt

function_timeout = 60

engine = create_engine(conn_string)

directory = os.getenv("EDGAR_DIR")
Expand All @@ -32,16 +34,16 @@

end = (i + 1) * batch_size

success = pd.Series(p.map(lambda i: write_indexes_to_table(full_df.loc[i, 'file_name'], full_df.loc[i, 'document'], full_df.loc[i, 'form_type'], directory, engine) , range(start, end)))
success = pd.Series(p.map(lambda i: write_indexes_to_table(full_df.loc[i, 'file_name'], full_df.loc[i, 'document'],\
full_df.loc[i, 'form_type'], directory, engine, function_timeout) , range(start, end)))
time_now = dt.datetime.now()
time_taken = time_now - start_time
num_success = num_success + success.sum()

if(i % 50 == 0 or i == num_batches - 1):

print(num_success + ' filings successfully process from ' + end)
print('Time taken: ' + time_taken)
print(str(num_success) + ' filings successfully process from ' + str(end))
print('Time taken: ' + str(time_taken))


p.close()

0 comments on commit e5be0e7

Please sign in to comment.