-
Notifications
You must be signed in to change notification settings - Fork 1
/
toSql.py
22 lines (21 loc) · 875 Bytes
/
toSql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import threading
def tosql(df, *args, **kargs):
CHUNKSIZE = 1000
INITIAL_CHUNK = 100
if len(df) <= CHUNKSIZE:
df.to_sql(*args, **kargs)
print('added: smaller than or same as chunksize', len(df), args[0])
return
if len(df) > CHUNKSIZE:
df.iloc[:INITIAL_CHUNK, :].to_sql(*args, **kargs)
if kargs['if_exists'] == 'replace':
kargs['if_exists'] = 'append'
workers = []
for x in range((len(df) - INITIAL_CHUNK)//CHUNKSIZE):
t = threading.Thread(target=lambda: df.iloc[INITIAL_CHUNK+x*CHUNKSIZE:INITIAL_CHUNK+(x+1)*CHUNKSIZE, :].to_sql(*args, **kargs))
t.start()
workers.append(t)
print('total number of threads:', x, 'for', args[0])
df.iloc[INITIAL_CHUNK+(x+1)*CHUNKSIZE:, :].to_sql(*args, **kargs)
[t.join() for t in workers]
print('added data:', len(df), 'to', args[0])