Skip to content

Commit

Permalink
support multi-value input
Browse files Browse the repository at this point in the history
  • Loading branch information
浅梦 authored Sep 28, 2019
1 parent d4e671d commit c8a04f9
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 57 deletions.
2 changes: 1 addition & 1 deletion deepctr_torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
from . import models
from .utils import check_version

__version__ = '0.1.1'
__version__ = '0.1.2'
check_version(__version__)
86 changes: 50 additions & 36 deletions deepctr_torch/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,14 @@ def __new__(cls, name, dimension, maxlen, combiner="mean", use_hash=False, dtype


def get_fixlen_feature_names(feature_columns):
features = build_input_features(feature_columns, include_varlen=False, include_fixlen=True)
features = build_input_features(
feature_columns, include_varlen=False, include_fixlen=True)
return list(features.keys())


def get_varlen_feature_names(feature_columns):
features = build_input_features(feature_columns, include_varlen=True, include_fixlen=False)
features = build_input_features(
feature_columns, include_varlen=True, include_fixlen=False)
return list(features.keys())


Expand All @@ -61,44 +63,54 @@ def build_input_features(feature_columns, include_varlen=True, mask_zero=True, p

start = 0

for feat in feature_columns:
feat_name = feat.name
if feat_name in features:
continue
if isinstance(feat, SparseFeat):
features[feat_name] = (start, start + 1)
start += 1
elif isinstance(feat, DenseFeat):
features[feat_name] = (start, start + feat.dimension)
start += feat.dimension

if include_fixlen:
for fc in feature_columns:
if isinstance(fc, SparseFeat):
input_features[fc.name] = 1
# Input( shape=(1,), name=prefix+fc.name, dtype=fc.dtype)
elif isinstance(fc, DenseFeat):
input_features[fc.name] = 1
# Input(
# shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype)
for feat in feature_columns:
feat_name = feat.name
if feat_name in features:
continue
if isinstance(feat, SparseFeat):
features[feat_name] = (start, start + 1)
start += 1
elif isinstance(feat, DenseFeat):
features[feat_name] = (start, start + feat.dimension)
start += feat.dimension
if include_varlen:
for fc in feature_columns:
if isinstance(fc, VarLenSparseFeat):
input_features[fc.name] = 1
# Input(shape=(fc.maxlen,), name=prefix + 'seq_' + fc.name,
# dtype=fc.dtype)
if not mask_zero:
for fc in feature_columns:
input_features[fc.name + "_seq_length"] = 1
# Input(shape=(
# 1,), name=prefix + 'seq_length_' + fc.name)
input_features[fc.name + "_seq_max_length"] = 1 # fc.maxlen
for feat in feature_columns:
feat_name = feat.name
if feat_name in features:
continue
if isinstance(feat, VarLenSparseFeat):
features[feat_name] = (start, start + feat.maxlen)
start += feat.maxlen

# if include_fixlen:
# for fc in feature_columns:
# if isinstance(fc, SparseFeat):
# input_features[fc.name] = 1
# # Input( shape=(1,), name=prefix+fc.name, dtype=fc.dtype)
# elif isinstance(fc, DenseFeat):
# input_features[fc.name] = 1
# # Input(
# # shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype)
# if include_varlen:
# for fc in feature_columns:
# if isinstance(fc, VarLenSparseFeat):
# input_features[fc.name] = 1
# # Input(shape=(fc.maxlen,), name=prefix + 'seq_' + fc.name,
# # dtype=fc.dtype)
# if not mask_zero:
# for fc in feature_columns:
# input_features[fc.name + "_seq_length"] = 1
# # Input(shape=(
# # 1,), name=prefix + 'seq_length_' + fc.name)
# input_features[fc.name + "_seq_max_length"] = 1 # fc.maxlen

return features


def get_dense_input(features, feature_columns):
dense_feature_columns = list(filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if feature_columns else []
dense_feature_columns = list(filter(lambda x: isinstance(
x, DenseFeat), feature_columns)) if feature_columns else []
dense_input_list = []
for fc in dense_feature_columns:
dense_input_list.append(features[fc.name])
Expand All @@ -107,12 +119,14 @@ def get_dense_input(features, feature_columns):

def combined_dnn_input(sparse_embedding_list, dense_value_list):
if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
sparse_dnn_input = torch.flatten(torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
dense_dnn_input = torch.flatten(torch.cat(dense_value_list, dim=-1), start_dim=1)
sparse_dnn_input = torch.flatten(
torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
dense_dnn_input = torch.flatten(
torch.cat(dense_value_list, dim=-1), start_dim=1)
return concat_fun([sparse_dnn_input, dense_dnn_input])
elif len(sparse_embedding_list) > 0:
return torch.flatten(torch.cat(sparse_embedding_list, dim=-1), start_dim=1)
elif len(dense_value_list) > 0:
return torch.flatten(torch.cat(dense_value_list, dim=-1), start_dim=1)
else:
raise NotImplementedError
raise NotImplementedError
55 changes: 41 additions & 14 deletions deepctr_torch/models/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch.utils.data import DataLoader
from tqdm import tqdm

from ..inputs import build_input_features, SparseFeat, DenseFeat
from ..inputs import build_input_features, SparseFeat, DenseFeat, VarLenSparseFeat
from ..layers import PredictionLayer
from ..layers.utils import slice_arrays

Expand All @@ -45,11 +45,12 @@ def __init__(self, feature_columns, feature_index, init_std=0.0001, device='cpu'
nn.init.normal_(tensor.weight, mean=0, std=init_std)

if len(self.dense_feature_columns) > 0:
self.weight = nn.Parameter(torch.Tensor(len(self.dense_feature_columns), 1)).to(
self.weight = nn.Parameter(torch.Tensor(sum(fc.dimension for fc in self.dense_feature_columns), 1)).to(
device)
torch.nn.init.normal_(self.weight, mean=0, std=init_std)

def forward(self, X):

sparse_embedding_list = [self.embedding_dict[feat.embedding_name](
X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
feat in self.sparse_feature_columns]
Expand All @@ -70,7 +71,7 @@ def forward(self, X):
linear_logit = torch.cat(
dense_value_list, dim=-1).matmul(self.weight)
else:
linear_logit = torch.zeros([X.shape[0],1])
linear_logit = torch.zeros([X.shape[0], 1])
return linear_logit

def create_embedding_matrix(self, feature_columns, embedding_size, init_std=0.0001, sparse=False):
Expand Down Expand Up @@ -173,10 +174,13 @@ def fit(self, x=None,
else:
val_x = []
val_y = []
for i in range(len(x)):
if len(x[i].shape) == 1:
x[i] = np.expand_dims(x[i], axis=1)

train_tensor_data = Data.TensorDataset(
torch.from_numpy(
np.hstack(list(map(lambda x: np.expand_dims(x, axis=1), x)))),
np.concatenate(x, axis=-1)),
torch.from_numpy(y))
if batch_size is None:
batch_size = 256
Expand Down Expand Up @@ -268,8 +272,12 @@ def predict(self, x, batch_size=256):
:return: Numpy array(s) of predictions.
"""
model = self.eval()
x = np.hstack(list(map(lambda x: np.expand_dims(x, axis=1), x)))
tensor_data = Data.TensorDataset(torch.from_numpy(x))
for i in range(len(x)):
if len(x[i].shape) == 1:
x[i] = np.expand_dims(x[i], axis=1)

tensor_data = Data.TensorDataset(
torch.from_numpy(np.concatenate(x, axis=-1)))
test_loader = DataLoader(
dataset=tensor_data, shuffle=False, batch_size=batch_size)

Expand All @@ -284,48 +292,67 @@ def predict(self, x, batch_size=256):
return np.concatenate(pred_ans)

def input_from_feature_columns(self, X, feature_columns, embedding_dict, support_dense=True):

sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []

varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []

if not support_dense and len(dense_feature_columns) > 0:
raise ValueError(
"DenseFeat is not supported in dnn_feature_columns")

sparse_embedding_list = [embedding_dict[feat.embedding_name](
X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
feat in sparse_feature_columns]
varlen_sparse_embedding_list = [embedding_dict[feat.embedding_name](
X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
feat in varlen_sparse_feature_columns]
varlen_sparse_embedding_list = list(
map(lambda x: x.unsqueeze(dim=1), varlen_sparse_embedding_list))

dense_value_list = [X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]] for feat in
dense_feature_columns]

return sparse_embedding_list, dense_value_list
return sparse_embedding_list + varlen_sparse_embedding_list, dense_value_list

def create_embedding_matrix(self, feature_columns, embedding_size, init_std=0.0001, sparse=False):

sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []

varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if len(feature_columns) else []

embedding_dict = nn.ModuleDict(
{feat.embedding_name: nn.Embedding(feat.dimension, embedding_size, sparse=sparse) for feat in
sparse_feature_columns}
)

for feat in varlen_sparse_feature_columns:
embedding_dict[feat.embedding_name] = nn.EmbeddingBag(
feat.dimension, embedding_size, sparse=sparse, mode=feat.combiner)

for tensor in embedding_dict.values():
nn.init.normal_(tensor.weight, mean=0, std=init_std)

return embedding_dict

def compute_input_dim(self, feature_columns, embedding_size=1, include_sparse=True, include_dense=True, feature_group=False):
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
filter(lambda x: isinstance(x, (SparseFeat, VarLenSparseFeat)), feature_columns)) if len(feature_columns) else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []

dense_input_dim = sum(map(lambda x: x.dimension, dense_feature_columns))
dense_input_dim = sum(
map(lambda x: x.dimension, dense_feature_columns))
if feature_group:
sparse_input_dim = len(sparse_feature_columns)
else:
sparse_input_dim = len(sparse_feature_columns)* embedding_size
sparse_input_dim = len(sparse_feature_columns) * embedding_size
input_dim = 0
if include_sparse:
input_dim += sparse_input_dim
Expand Down Expand Up @@ -354,7 +381,6 @@ def compile(self, optimizer,
:param metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`.
"""


self.optim = self._get_optim(optimizer)
self.loss_func = self._get_loss_func(loss)
self.metrics = self._get_metrics(metrics)
Expand Down Expand Up @@ -399,6 +425,7 @@ def _get_metrics(self, metrics):
metrics_[metric] = roc_auc_score
if metric == "mse":
metrics_[metric] = mean_squared_error
if metric == "accuracy" or metric =="acc":
metrics_[metric] = lambda y_true,y_pred: accuracy_score(y_true,np.where(y_pred > 0.5, 1, 0))
return metrics_
if metric == "accuracy" or metric == "acc":
metrics_[metric] = lambda y_true, y_pred: accuracy_score(
y_true, np.where(y_pred > 0.5, 1, 0))
return metrics_
4 changes: 2 additions & 2 deletions deepctr_torch/models/fibinet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import torch.nn.functional as F

from .basemodel import BaseModel
from ..inputs import combined_dnn_input, SparseFeat, DenseFeat
from ..inputs import combined_dnn_input, SparseFeat, DenseFeat, VarLenSparseFeat
from ..layers import SENETLayer,BilinearInteraction,DNN


Expand Down Expand Up @@ -61,7 +61,7 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, embedding_size=8

def compute_input_dim(self, feature_columns, embedding_size, include_sparse=True, include_dense=True):
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []
filter(lambda x: isinstance(x, (SparseFeat,VarLenSparseFeat)), feature_columns)) if len(feature_columns) else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if len(feature_columns) else []
field_size = len(sparse_feature_columns)
Expand Down
Loading

0 comments on commit c8a04f9

Please sign in to comment.