Skip to content

Commit

Permalink
Python download script (openai#89)
Browse files Browse the repository at this point in the history
added python download script and modified requirements to add the modules needed. Tested in Windows Version 10.0.17134 Build 17134  and Ubuntu 18.04.1 LTS
  • Loading branch information
webproduktion01 authored and WuTheFWasThat committed Mar 4, 2019
1 parent 0465394 commit 8eb6793
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
24 changes: 24 additions & 0 deletions download_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env python
import os
import sys
import requests
from tqdm import tqdm

if len(sys.argv)!=2:
print('You must enter the model name as a parameter, e.g.: download_model.py 117M')
sys.exit(1)
model = sys.argv[1]
#Create directory if it does not exist already, then do nothing
if not os.path.exists('models/'+model):
os.makedirs('models/'+model)
#download all the files
for filename in ['checkpoint','encoder.json','hparams.json','model.ckpt.data-00000-of-00001', 'model.ckpt.index', 'model.ckpt.meta', 'vocab.bpe']:
r = requests.get("https://storage.googleapis.com/gpt-2/models/"+model+"/"+filename,stream=True)
#wb flag required for windows
with open('models/'+model+'/'+filename,'wb') as currentFile:
fileSize = int(r.headers["content-length"])
with tqdm(ncols=100,desc="Fetching "+filename,total=fileSize,unit_scale=True) as pbar:
#went for 1k for chunk_size. Motivation -> Ethernet packet size is around 1500 bytes.
for chunk in r.iter_content(chunk_size=1000):
currentFile.write(chunk)
pbar.update(1000)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
fire>=0.1.3
regex==2017.4.5
requests==2.21.0
tqdm==4.31.1

0 comments on commit 8eb6793

Please sign in to comment.