From 43a6b6511e336d89032eadc31c1188619f4d0f57 Mon Sep 17 00:00:00 2001
From: Chirag Jain <jain.chirag925@gmail.com>
Date: Wed, 6 May 2020 13:14:52 +0530
Subject: [PATCH 1/2] fix(xlnet): Set training mode to False and set dropout to
 zero

Taken directly from https://github.com/zihangdai/xlnet/pull/151/commits/d1d0ff4d0d2e536e897adabcdd04683e62dfa7a4
---
 server/embedding_as_service/text/xlnet/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/server/embedding_as_service/text/xlnet/__init__.py b/server/embedding_as_service/text/xlnet/__init__.py
index 163c7ab..eaca039 100644
--- a/server/embedding_as_service/text/xlnet/__init__.py
+++ b/server/embedding_as_service/text/xlnet/__init__.py
@@ -118,7 +118,8 @@ def _model_single_input(self, text: Union[str, List[str]], is_tokenized: bool
     def load_model(self, model: str, model_path: str, max_seq_length: int):
         model_path = os.path.join(model_path, next(os.walk(model_path))[1][0])
         self.xlnet_config = xlnet.XLNetConfig(json_path=os.path.join(model_path, Embeddings.mode_config_path))
-        self.run_config = xlnet.create_run_config(is_training=True, is_finetune=True, FLAGS=Flags)
+        self.run_config = xlnet.create_run_config(is_training=False, is_finetune=False, FLAGS=Flags)
+        self.run_config.dropout = self.run_config.dropatt = 0.0
         self.load_tokenizer(model_path)
         self.max_seq_length = max_seq_length
         self.model_name = model

From a73e1f62700214fbceafd3a9db5403d8c36f3b1f Mon Sep 17 00:00:00 2001
From: Chirag Jain <jain.chirag925@gmail.com>
Date: Fri, 17 Jul 2020 15:46:50 +0530
Subject: [PATCH 2/2] Add missing flags to xlnet config However the embeddings
 problem is still unsolved. Need to put a debugger on it

---
 server/embedding_as_service/text/xlnet/__init__.py | 1 +
 server/embedding_as_service/text/xlnet/config.py   | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/server/embedding_as_service/text/xlnet/__init__.py b/server/embedding_as_service/text/xlnet/__init__.py
index eaca039..d2cf1c9 100644
--- a/server/embedding_as_service/text/xlnet/__init__.py
+++ b/server/embedding_as_service/text/xlnet/__init__.py
@@ -118,6 +118,7 @@ def _model_single_input(self, text: Union[str, List[str]], is_tokenized: bool
     def load_model(self, model: str, model_path: str, max_seq_length: int):
         model_path = os.path.join(model_path, next(os.walk(model_path))[1][0])
         self.xlnet_config = xlnet.XLNetConfig(json_path=os.path.join(model_path, Embeddings.mode_config_path))
+        self.xlnet_config.dropout = self.xlnet_config.dropatt = 0.0
         self.run_config = xlnet.create_run_config(is_training=False, is_finetune=False, FLAGS=Flags)
         self.run_config.dropout = self.run_config.dropatt = 0.0
         self.load_tokenizer(model_path)
diff --git a/server/embedding_as_service/text/xlnet/config.py b/server/embedding_as_service/text/xlnet/config.py
index c1780b2..03b32d6 100644
--- a/server/embedding_as_service/text/xlnet/config.py
+++ b/server/embedding_as_service/text/xlnet/config.py
@@ -1,5 +1,4 @@
 class Flags:
-
     # Model
     model_config_path: str = None  # Model config path
     dropout: float = 0.1  # Dropout rate
@@ -11,7 +10,7 @@ class Flags:
 
     # Parameter initialization
     init: str = "normal"
-    init_std: float = 0.2   # Initialization std when init is normal.
+    init_std: float = 0.2  # Initialization std when init is normal.
     init_range: float = 0.1  # Initialization std when init is uniform.
 
     # I/O paths
@@ -44,13 +43,17 @@ class Flags:
     # Low layer: lr[l-1] = lr[l] * lr_layer_decay_rate.
 
     min_lr_ratio: float = 0.0  # min lr ratio for cos decay.
-    clip: float = 1.0   # Gradient clipping
+    clip: float = 1.0  # Gradient clipping
     max_save: int = 0  # Max number of checkpoints to save. Use 0 to save all.
     save_steps: int = None  # Save the model for every save_steps. If None, not to save any model.
     train_batch_size: int = 8  # Batch size for training
     weight_decay: float = 0.00  # Weight decay rate
     adam_epsilon: float = 1e-8  # Adam epsilon
     decay_method: str = "poly"  # poly or cos
+    mem_len: int = 0  # Number of steps to cache
+    same_length: bool = False  # Same length attention
+    reuse_len: int = 0  # How many tokens to be reused in the next batch.
+    bi_data: bool = False  # Use bidirectional data streams, i.e., forward & backward.
 
     # evaluation
     do_eval: bool = False  # whether to do eval