Skip to content

Commit

Permalink
Merge pull request #6 from minimaxir/v0.2
Browse files Browse the repository at this point in the history
V0.2
  • Loading branch information
minimaxir authored Apr 20, 2019
2 parents 152f3c1 + b3b3f89 commit ca5c4ac
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
28 changes: 28 additions & 0 deletions gpt_2_simple/gpt_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import tensorflow as tf
import time
import csv

# if in Google Colaboratory
try:
Expand Down Expand Up @@ -424,3 +425,30 @@ def copy_file_from_gdrive(file_path):
is_mounted()

shutil.copyfile("/content/drive/My Drive/" + file_path, file_path)


def is_gpt2_downloaded(model_path=os.path.join("models", "117M")):
"""Checks if the original model + associated files are present in folder."""

for filename in ['checkpoint', 'encoder.json', 'hparams.json',
'model.ckpt.data-00000-of-00001', 'model.ckpt.index',
'model.ckpt.meta', 'vocab.bpe']:
if not os.path.isfile(os.path.join(model_path, filename)):
return False
return True


def encode_csv(csv_path, out_path='csv_encoded.txt', header=True,
start_token="<|startoftext|>",
end_token="<|endoftext|>"):
"""Encodes a single-column CSV to a format suitable for gpt-2-simple.
Automatically adds the specified prefix and suffix tokens.
"""

with open(csv_path, 'r', encoding='utf8', errors='ignore') as f:
with open(out_path, 'w', encoding='utf8', errors='ignore') as w:
if header:
f.readline()
reader = csv.reader(f)
for row in reader:
w.write(start_token + row[0] + end_token + "\n")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
setup(
name='gpt_2_simple',
packages=['gpt_2_simple'], # this must be the same as the name above
version='0.1',
version='0.2',
description="Python package to easily retrain OpenAI's GPT-2 " \
"text-generating model on new texts.",
long_description=long_description,
Expand Down

0 comments on commit ca5c4ac

Please sign in to comment.