From f35fa1d920e9d2d0690f66d03aa3f76b3c59230e Mon Sep 17 00:00:00 2001
From: Jeff Wu <wuthefwasthat@gmail.com>
Date: Tue, 20 Aug 2019 08:50:19 -0700
Subject: [PATCH] push 774M model

---
 DEVELOPERS.md                          | 5 +++--
 Dockerfile.cpu                         | 5 +++--
 Dockerfile.gpu                         | 5 +++--
 README.md                              | 6 ++++--
 download_model.py                      | 2 +-
 src/generate_unconditional_samples.py  | 4 ++--
 src/interactive_conditional_samples.py | 8 ++++----
 7 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/DEVELOPERS.md b/DEVELOPERS.md
index 57fd519f0..0992c7026 100644
--- a/DEVELOPERS.md
+++ b/DEVELOPERS.md
@@ -27,8 +27,9 @@ pip3 install -r requirements.txt
 
 Download the model data
 ```
-python3 download_model.py 117M
-python3 download_model.py 345M
+python3 download_model.py 124M
+python3 download_model.py 355M
+python3 download_model.py 774M
 ```
 
 ## Docker Installation
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index a02d2b320..c923234a3 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -5,5 +5,6 @@ RUN mkdir /gpt-2
 WORKDIR /gpt-2
 ADD . /gpt-2
 RUN pip3 install -r requirements.txt
-RUN python3 download_model.py 117M
-RUN python3 download_model.py 345M
+RUN python3 download_model.py 124M
+RUN python3 download_model.py 355M
+RUN python3 download_model.py 774M
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
index b3f87db14..e59880e5d 100644
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@@ -14,5 +14,6 @@ RUN mkdir /gpt-2
 WORKDIR /gpt-2
 ADD . /gpt-2
 RUN pip3 install -r requirements.txt
-RUN python3 download_model.py 117M
-RUN python3 download_model.py 345M
+RUN python3 download_model.py 124M
+RUN python3 download_model.py 355M
+RUN python3 download_model.py 774M
diff --git a/README.md b/README.md
index 46bb8f3e9..1b2d5e81a 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,11 @@
 
 Code from the paper ["Language Models are Unsupervised Multitask Learners"](https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf).
 
-We have currently released small (117M parameter) and medium (345M parameter) versions of GPT-2.  While we have not released the larger models, we have [released a dataset](https://github.com/openai/gpt-2-output-dataset) for researchers to study their behaviors.
+We have currently released small (124M parameter), medium (355M parameter), and large (774M parameter) versions of GPT-2<sup>*</sup>, with only the full model as of yet unreleased.  We have also [released a dataset](https://github.com/openai/gpt-2-output-dataset) for researchers to study their behaviors.
 
-See more details in our [blog post](https://blog.openai.com/better-language-models/).
+You can read about GPT-2 and release decisions in our [original blog post](https://blog.openai.com/better-language-models/) and [6 month follow-up post](https://openai.com/blog/gpt-2-6-month-follow-up/).
+
+<sup>*</sup> *Note that our original parameter counts were wrong due to an error (in our previous blog posts and paper).  Thus you may have seen small referred to as 117M and medium referred to as 345M.*
 
 ## Usage
 
diff --git a/download_model.py b/download_model.py
index 30ba84ade..56d4e7675 100644
--- a/download_model.py
+++ b/download_model.py
@@ -4,7 +4,7 @@
 from tqdm import tqdm
 
 if len(sys.argv) != 2:
-    print('You must enter the model name as a parameter, e.g.: download_model.py 117M')
+    print('You must enter the model name as a parameter, e.g.: download_model.py 124M')
     sys.exit(1)
 
 model = sys.argv[1]
diff --git a/src/generate_unconditional_samples.py b/src/generate_unconditional_samples.py
index f18a83891..cc3f3a32a 100755
--- a/src/generate_unconditional_samples.py
+++ b/src/generate_unconditional_samples.py
@@ -9,7 +9,7 @@
 import model, sample, encoder
 
 def sample_model(
-    model_name='117M',
+    model_name='124M',
     seed=None,
     nsamples=0,
     batch_size=1,
@@ -20,7 +20,7 @@ def sample_model(
 ):
     """
     Run the sample_model
-    :model_name=117M : String, which model to use
+    :model_name=124M : String, which model to use
     :seed=None : Integer seed for random number generators, fix seed to
      reproduce results
     :nsamples=0 : Number of samples to return, if 0, continues to
diff --git a/src/interactive_conditional_samples.py b/src/interactive_conditional_samples.py
index ae348d842..48b5cb3d3 100755
--- a/src/interactive_conditional_samples.py
+++ b/src/interactive_conditional_samples.py
@@ -9,18 +9,18 @@
 import model, sample, encoder
 
 def interact_model(
-    model_name='117M',
+    model_name='124M',
     seed=None,
     nsamples=1,
     batch_size=1,
     length=None,
     temperature=1,
     top_k=0,
-    models_dir='models',    
+    models_dir='models',
 ):
     """
     Interactively run the model
-    :model_name=117M : String, which model to use
+    :model_name=124M : String, which model to use
     :seed=None : Integer seed for random number generators, fix seed to reproduce
      results
     :nsamples=1 : Number of samples to return total
@@ -36,7 +36,7 @@ def interact_model(
      while 40 means 40 words are considered at each step. 0 (default) is a
      special setting meaning no restrictions. 40 generally is a good value.
      :models_dir : path to parent folder containing model subfolders
-     (i.e. contains the <model_name> folder)     
+     (i.e. contains the <model_name> folder)
     """
     models_dir = os.path.expanduser(os.path.expandvars(models_dir))
     if batch_size is None: