From 43a6b6511e336d89032eadc31c1188619f4d0f57 Mon Sep 17 00:00:00 2001 From: Chirag Jain Date: Wed, 6 May 2020 13:14:52 +0530 Subject: [PATCH 1/2] fix(xlnet): Set training mode to False and set dropout to zero Taken directly from https://github.com/zihangdai/xlnet/pull/151/commits/d1d0ff4d0d2e536e897adabcdd04683e62dfa7a4 --- server/embedding_as_service/text/xlnet/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/embedding_as_service/text/xlnet/__init__.py b/server/embedding_as_service/text/xlnet/__init__.py index 163c7ab..eaca039 100644 --- a/server/embedding_as_service/text/xlnet/__init__.py +++ b/server/embedding_as_service/text/xlnet/__init__.py @@ -118,7 +118,8 @@ def _model_single_input(self, text: Union[str, List[str]], is_tokenized: bool def load_model(self, model: str, model_path: str, max_seq_length: int): model_path = os.path.join(model_path, next(os.walk(model_path))[1][0]) self.xlnet_config = xlnet.XLNetConfig(json_path=os.path.join(model_path, Embeddings.mode_config_path)) - self.run_config = xlnet.create_run_config(is_training=True, is_finetune=True, FLAGS=Flags) + self.run_config = xlnet.create_run_config(is_training=False, is_finetune=False, FLAGS=Flags) + self.run_config.dropout = self.run_config.dropatt = 0.0 self.load_tokenizer(model_path) self.max_seq_length = max_seq_length self.model_name = model From a73e1f62700214fbceafd3a9db5403d8c36f3b1f Mon Sep 17 00:00:00 2001 From: Chirag Jain Date: Fri, 17 Jul 2020 15:46:50 +0530 Subject: [PATCH 2/2] Add missing flags to xlnet config However the embeddings problem is still unsolved. Need to put a debugger on it --- server/embedding_as_service/text/xlnet/__init__.py | 1 + server/embedding_as_service/text/xlnet/config.py | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/server/embedding_as_service/text/xlnet/__init__.py b/server/embedding_as_service/text/xlnet/__init__.py index eaca039..d2cf1c9 100644 --- a/server/embedding_as_service/text/xlnet/__init__.py +++ b/server/embedding_as_service/text/xlnet/__init__.py @@ -118,6 +118,7 @@ def _model_single_input(self, text: Union[str, List[str]], is_tokenized: bool def load_model(self, model: str, model_path: str, max_seq_length: int): model_path = os.path.join(model_path, next(os.walk(model_path))[1][0]) self.xlnet_config = xlnet.XLNetConfig(json_path=os.path.join(model_path, Embeddings.mode_config_path)) + self.xlnet_config.dropout = self.xlnet_config.dropatt = 0.0 self.run_config = xlnet.create_run_config(is_training=False, is_finetune=False, FLAGS=Flags) self.run_config.dropout = self.run_config.dropatt = 0.0 self.load_tokenizer(model_path) diff --git a/server/embedding_as_service/text/xlnet/config.py b/server/embedding_as_service/text/xlnet/config.py index c1780b2..03b32d6 100644 --- a/server/embedding_as_service/text/xlnet/config.py +++ b/server/embedding_as_service/text/xlnet/config.py @@ -1,5 +1,4 @@ class Flags: - # Model model_config_path: str = None # Model config path dropout: float = 0.1 # Dropout rate @@ -11,7 +10,7 @@ class Flags: # Parameter initialization init: str = "normal" - init_std: float = 0.2 # Initialization std when init is normal. + init_std: float = 0.2 # Initialization std when init is normal. init_range: float = 0.1 # Initialization std when init is uniform. # I/O paths @@ -44,13 +43,17 @@ class Flags: # Low layer: lr[l-1] = lr[l] * lr_layer_decay_rate. min_lr_ratio: float = 0.0 # min lr ratio for cos decay. - clip: float = 1.0 # Gradient clipping + clip: float = 1.0 # Gradient clipping max_save: int = 0 # Max number of checkpoints to save. Use 0 to save all. save_steps: int = None # Save the model for every save_steps. If None, not to save any model. train_batch_size: int = 8 # Batch size for training weight_decay: float = 0.00 # Weight decay rate adam_epsilon: float = 1e-8 # Adam epsilon decay_method: str = "poly" # poly or cos + mem_len: int = 0 # Number of steps to cache + same_length: bool = False # Same length attention + reuse_len: int = 0 # How many tokens to be reused in the next batch. + bi_data: bool = False # Use bidirectional data streams, i.e., forward & backward. # evaluation do_eval: bool = False # whether to do eval