v0.2.4

- Add History callback - Fix regularization error in pytorch1.7 - improve compatibility for old version
shenweichen · Dec 5, 2020 · 6eec1ed · 6eec1ed
2 parents bc881dc + 500c0a5
commit 6eec1ed
Show file tree

Hide file tree

Showing 34 changed files with 132 additions and 96 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -19,8 +19,8 @@ Steps to reproduce the behavior:
 
 **Operating environment(运行环境):**
  - python version [e.g. 3.5, 3.6]
- - torch version [e.g. 1.1.0, 1.2.0]
- - deepctr-torch version [e.g. 0.1.0,]
+ - torch version [e.g. 1.6.0, 1.7.0]
+ - deepctr-torch version [e.g. 0.2.4,]
 
 **Additional context**
 Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
@@ -16,5 +16,5 @@ Add any other context about the problem here.
 
 **Operating environment(运行环境):**
  - python version [e.g. 3.6]
- - torch version [e.g. 1.2.0,]
- - deepctr-torch version [e.g. 0.1.0,]
+ - torch version [e.g. 1.7.0,]
+ - deepctr-torch version [e.g. 0.2.4,]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6,3.7]
-        torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0]
+        torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.0]
 
 #        exclude:
 #          - python-version: 3.5

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,10 +1,19 @@
 This project is under development and we need developers to participate in.
 
+# Join us
+
 If you
 
 - familiar with and interested in CTR models
 - familiar with pytorch(both pytorch and tensorflow better)
 - have spare time to learn and develop
 - familiar with git
 
-please send a brief introduction of your background and experience to [email protected], welcome to join us!
+please send a brief introduction of your background and experience to [email protected], welcome to join us!
+
+# Creating a pull request
+1. **Become a collaborator**: Send an email with introduction and your github account name to [email protected] and waiting for invitation to become a collaborator.
+2. **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR-Torch` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly.
+3. **Testing**: Test  logical correctness and effect when finishing the code development of the `dev_yourname` branch. 
+4. **Pre-release** : After testing contact [email protected] for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge. 
+5. **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi.
diff --git a/README.md b/README.md
@@ -92,25 +92,25 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
          <a href="https://github.com/wutongzhang">Zhang Wutong</a>
          <p>Core Dev<br> Beijing University <br> of  Posts and <br> Telecommunications</p>
       </td>
+      <td>
+         <a href="https://github.com/zanshuxun"><img width="70" height="70" src="https://github.com/zanshuxun.png?s=40" alt="pic"></a><br>
+         <a href="https://github.com/zanshuxun">Zan Shuxun</a>
+        <p>Core Dev<br> Beijing University <br> of  Posts and <br> Telecommunications</p>
+      </td>
       <td>
          <a href="https://github.com/ZhangYuef"><img width="70" height="70" src="https://github.com/ZhangYuef.png?s=40" alt="pic"></a><br>
          <a href="https://github.com/ZhangYuef">Zhang Yuefeng</a>
         <p>Core Dev<br>
         Peking University <br>  <br>  </p>
       </td>
+    </tr>
+    <tr align="center">
       <td>
          <a href="https://github.com/JyiHUO"><img width="70" height="70" src="https://github.com/JyiHUO.png?s=40" alt="pic"></a><br>
          <a href="https://github.com/JyiHUO">Huo Junyi</a>
         <p>Core Dev<br>
         University of Southampton <br> <br>  </p>
       </td>
-    </tr>
-    <tr align="center">
-      <td>
-         <a href="https://github.com/zanshuxun"><img width="70" height="70" src="https://github.com/zanshuxun.png?s=40" alt="pic"></a><br>
-         <a href="https://github.com/zanshuxun">Zan Shuxun</a>
-        <p>Dev<br> Beijing University <br> of  Posts and <br> Telecommunications</p>
-      </td>
       <td>
          <a href="https://github.com/Zengai"><img width="70" height="70" src="https://github.com/Zengai.png?s=40" alt="pic"></a><br>
          <a href="https://github.com/Zengai">Zeng Kai</a> 

diff --git a/deepctr_torch/__init__.py b/deepctr_torch/__init__.py
@@ -2,5 +2,5 @@
 from . import models
 from .utils import check_version
 
-__version__ = '0.2.3'
+__version__ = '0.2.4'
 check_version(__version__)
diff --git a/deepctr_torch/callbacks.py b/deepctr_torch/callbacks.py
@@ -1,9 +1,10 @@
 import torch
 from tensorflow.python.keras.callbacks import EarlyStopping
 from tensorflow.python.keras.callbacks import ModelCheckpoint
+from tensorflow.python.keras.callbacks import History
 
 EarlyStopping = EarlyStopping
-
+History = History
 
 class ModelCheckpoint(ModelCheckpoint):
     """Save the model after every epoch.

diff --git a/deepctr_torch/inputs.py b/deepctr_torch/inputs.py
@@ -266,6 +266,6 @@ def get_dense_input(X, features, feature_columns):
 
 def maxlen_lookup(X, sparse_input_dict, maxlen_column):
     if maxlen_column is None or len(maxlen_column)==0:
-        raise ValueError('please add max length column for VarLenSparseFeat of DIEN input')
+        raise ValueError('please add max length column for VarLenSparseFeat of DIN/DIEN input')
     lookup_idx = np.array(sparse_input_dict[maxlen_column[0]])
     return X[:, lookup_idx[0]:lookup_idx[1]].long()
diff --git a/deepctr_torch/layers/activation.py b/deepctr_torch/layers/activation.py
@@ -25,10 +25,11 @@ def __init__(self, emb_size, dim=2, epsilon=1e-8, device='cpu'):
         self.sigmoid = nn.Sigmoid()
         self.dim = dim
 
+        # wrap alpha in nn.Parameter to make it trainable
         if self.dim == 2:
-            self.alpha = torch.zeros((emb_size,)).to(device)
+            self.alpha = nn.Parameter(torch.zeros((emb_size,)).to(device))
         else:
-            self.alpha = torch.zeros((emb_size, 1)).to(device)
+            self.alpha = nn.Parameter(torch.zeros((emb_size, 1)).to(device))
 
     def forward(self, x):
         assert x.dim() == self.dim

diff --git a/deepctr_torch/layers/interaction.py b/deepctr_torch/layers/interaction.py
@@ -512,7 +512,7 @@ def forward(self, inputs):
 
                 # (2) E(x_l)
                 # project the input x_l to $\mathbb{R}^{r}$
-                v_x = torch.matmul(self.V_list[i][expert_id].T, x_l)  # (bs, low_rank, 1)
+                v_x = torch.matmul(self.V_list[i][expert_id].t(), x_l)  # (bs, low_rank, 1)
 
                 # nonlinear activation in low rank space
                 v_x = torch.tanh(v_x)

diff --git a/deepctr_torch/models/afm.py b/deepctr_torch/models/afm.py
@@ -43,7 +43,7 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, use_attention=Tr
         if use_attention:
             self.fm = AFMLayer(self.embedding_size, attention_factor, l2_reg_att, afm_dropout,
                                seed, device)
-            self.add_regularization_weight(self.fm.attention_W, l2_reg_att)
+            self.add_regularization_weight(self.fm.attention_W, l2=l2_reg_att)
         else:
             self.fm = FM()
 

diff --git a/deepctr_torch/models/autoint.py b/deepctr_torch/models/autoint.py
@@ -69,7 +69,7 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, att_layer_num=3,
                            activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
                            init_std=init_std, device=device)
             self.add_regularization_weight(
-                filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
+                filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
         self.int_layers = nn.ModuleList(
             [InteractingLayer(self.embedding_size if i == 0 else att_embedding_size * att_head_num,
                               att_embedding_size, att_head_num, att_res, device=device) for i in range(att_layer_num)])

diff --git a/deepctr_torch/models/basemodel.py b/deepctr_torch/models/basemodel.py
@@ -26,8 +26,8 @@
 from ..inputs import build_input_features, SparseFeat, DenseFeat, VarLenSparseFeat, get_varlen_pooling_list, \
     create_embedding_matrix
 from ..layers import PredictionLayer
-
 from ..layers.utils import slice_arrays
+from ..callbacks import History
 
 
 class Linear(nn.Module):
@@ -55,8 +55,8 @@ def __init__(self, feature_columns, feature_index, init_std=0.0001, device='cpu'
             nn.init.normal_(tensor.weight, mean=0, std=init_std)
 
         if len(self.dense_feature_columns) > 0:
-            self.weight = nn.Parameter(torch.Tensor(sum(fc.dimension for fc in self.dense_feature_columns), 1)).to(
-                device)
+            self.weight = nn.Parameter(torch.Tensor(sum(fc.dimension for fc in self.dense_feature_columns), 1).to(
+                device))
             torch.nn.init.normal_(self.weight, mean=0, std=init_std)
 
     def forward(self, X):
@@ -117,14 +117,16 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, l2_reg_linear=1e
 
         self.regularization_weight = []
 
-        self.add_regularization_weight(
-            self.embedding_dict.parameters(), l2_reg_embedding)
-        self.add_regularization_weight(
-            self.linear_model.parameters(), l2_reg_linear)
+        self.add_regularization_weight(self.embedding_dict.parameters(), l2=l2_reg_embedding)
+        self.add_regularization_weight(self.linear_model.parameters(), l2=l2_reg_linear)
 
         self.out = PredictionLayer(task, )
         self.to(device)
-        self._is_graph_network = True  # used for callbacks
+
+        # parameters of callbacks
+        self._is_graph_network = True  # used for ModelCheckpoint
+        self.stop_training = False  # used for EarlyStopping
+        self.history = History()
 
     def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoch=0, validation_split=0.,
             validation_data=None, shuffle=True, callbacks=None):
@@ -142,6 +144,7 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
         :param shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch.
         :param callbacks: List of `deepctr_torch.callbacks.Callback` instances. List of callbacks to apply during training and validation (if ). See [callbacks](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks). Now available: `EarlyStopping` , `ModelCheckpoint`
 
+        :return: A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable).
         """
         if isinstance(x, dict):
             x = [x[feature] for feature in self.feature_index]
@@ -200,10 +203,14 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
         sample_num = len(train_tensor_data)
         steps_per_epoch = (sample_num - 1) // batch_size + 1
 
+        # configure callbacks
+        callbacks = (callbacks or []) + [self.history]  # add history callback
         callbacks = CallbackList(callbacks)
-        callbacks.set_model(self)
         callbacks.on_train_begin()
-        self.stop_training = False  # used for early stopping
+        callbacks.set_model(self)
+        if not hasattr(callbacks, 'model'):
+            callbacks.__setattr__('model', self)
+        callbacks.model.stop_training = False
 
         # Train
         print("Train on {0} samples, validate on {1} samples, {2} steps per epoch".format(
@@ -231,7 +238,7 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
 
                         loss_epoch += loss.item()
                         total_loss_epoch += total_loss.item()
-                        total_loss.backward(retain_graph=True)
+                        total_loss.backward()
                         optim.step()
 
                         if verbose > 0:
@@ -279,6 +286,8 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
 
         callbacks.on_train_end()
 
+        return self.history
+
     def evaluate(self, x, y, batch_size=256):
         """
 
@@ -368,21 +377,32 @@ def compute_input_dim(self, feature_columns, include_sparse=True, include_dense=
             input_dim += dense_input_dim
         return input_dim
 
-    def add_regularization_weight(self, weight_list, weight_decay, p=2):
-        self.regularization_weight.append((list(weight_list), weight_decay, p))
+    def add_regularization_weight(self, weight_list, l1=0.0, l2=0.0):
+        # For a Parameter, put it in a list to keep Compatible with get_regularization_loss()
+        if isinstance(weight_list, torch.nn.parameter.Parameter):
+            weight_list = [weight_list]
+        # For generators, filters and ParameterLists, convert them to a list of tensors to avoid bugs.
+        # e.g., we can't pickle generator objects when we save the model.
+        else:
+            weight_list = list(weight_list)
+        self.regularization_weight.append((weight_list, l1, l2))
 
     def get_regularization_loss(self, ):
         total_reg_loss = torch.zeros((1,), device=self.device)
-        for weight_list, weight_decay, p in self.regularization_weight:
-            weight_reg_loss = torch.zeros((1,), device=self.device)
+        for weight_list, l1, l2 in self.regularization_weight:
             for w in weight_list:
                 if isinstance(w, tuple):
-                    l2_reg = torch.norm(w[1], p=p, )
+                    parameter = w[1]  # named_parameters
                 else:
-                    l2_reg = torch.norm(w, p=p, )
-                weight_reg_loss = weight_reg_loss + l2_reg
-            reg_loss = weight_decay * weight_reg_loss
-            total_reg_loss += reg_loss
+                    parameter = w
+                if l1 > 0:
+                    total_reg_loss += torch.sum(l1 * torch.abs(parameter))
+                if l2 > 0:
+                    try:
+                        total_reg_loss += torch.sum(l2 * torch.square(parameter))
+                    except AttributeError:
+                        total_reg_loss += torch.sum(l2 * parameter * parameter)
+
         return total_reg_loss
 
     def add_auxiliary_loss(self, aux_loss, alpha):

diff --git a/deepctr_torch/models/ccpm.py b/deepctr_torch/models/ccpm.py
@@ -60,8 +60,8 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, conv_kernel_widt
                        init_std=init_std, device=device)
         self.dnn_linear = nn.Linear(dnn_hidden_units[-1], 1, bias=False).to(device)
         self.add_regularization_weight(
-            filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
-        self.add_regularization_weight(self.dnn_linear.weight, l2_reg_dnn)
+            filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
+        self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_dnn)
 
         self.to(device)
 

diff --git a/deepctr_torch/models/dcn.py b/deepctr_torch/models/dcn.py
@@ -65,9 +65,9 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, cross_num=2, cro
         self.crossnet = CrossNet(in_features=self.compute_input_dim(dnn_feature_columns),
                                  layer_num=cross_num, parameterization=cross_parameterization, device=device)
         self.add_regularization_weight(
-            filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
-        self.add_regularization_weight(self.dnn_linear.weight, l2_reg_linear)
-        self.add_regularization_weight(self.crossnet.kernels, l2_reg_cross)
+            filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
+        self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_linear)
+        self.add_regularization_weight(self.crossnet.kernels, l2=l2_reg_cross)
         self.to(device)
 
     def forward(self, X):

diff --git a/deepctr_torch/models/dcnmix.py b/deepctr_torch/models/dcnmix.py
@@ -68,11 +68,11 @@ def __init__(self, linear_feature_columns,
                                     low_rank=low_rank, num_experts=num_experts,
                                     layer_num=cross_num, device=device)
         self.add_regularization_weight(
-            filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
-        self.add_regularization_weight(self.dnn_linear.weight, l2_reg_linear)
-        self.add_regularization_weight(self.crossnet.U_list, l2_reg_cross)
-        self.add_regularization_weight(self.crossnet.V_list, l2_reg_cross)
-        self.add_regularization_weight(self.crossnet.C_list, l2_reg_cross)
+            filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
+        self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_linear)
+        self.add_regularization_weight(self.crossnet.U_list, l2=l2_reg_cross)
+        self.add_regularization_weight(self.crossnet.V_list, l2=l2_reg_cross)
+        self.add_regularization_weight(self.crossnet.C_list, l2=l2_reg_cross)
         self.to(device)
 
     def forward(self, X):

diff --git a/deepctr_torch/models/deepfm.py b/deepctr_torch/models/deepfm.py
@@ -59,8 +59,8 @@ def __init__(self,
                 dnn_hidden_units[-1], 1, bias=False).to(device)
 
             self.add_regularization_weight(
-                filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
-            self.add_regularization_weight(self.dnn_linear.weight, l2_reg_dnn)
+                filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
+            self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_dnn)
         self.to(device)
 
     def forward(self, X):