Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Dont Merge】cambricon support gpt2 train and inference #482

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5626a1c
cambricon support gpt2 inference
BBuf Mar 23, 2023
73c6732
Delete configs
BBuf Mar 23, 2023
48062f5
Delete version.py
BBuf Mar 23, 2023
89f63fc
Merge branch 'main' into cambricon_support_gpt2
BBuf Mar 23, 2023
9e80018
refine train script
BBuf Mar 28, 2023
ec311a7
refine train script
BBuf Mar 28, 2023
22b8f86
refine
BBuf Mar 28, 2023
d47fe9c
refine
BBuf Mar 28, 2023
811584a
refine script
BBuf Mar 29, 2023
0e991aa
Merge branch 'main' into cambricon_support_gpt2
BBuf Mar 30, 2023
44520ad
refine
BBuf Mar 30, 2023
9e57cad
Merge branch 'main' into cambricon_support_gpt2
BBuf Mar 31, 2023
b7a8f3e
refine
BBuf Mar 31, 2023
338177b
refine
BBuf Mar 31, 2023
a3c8a5e
mock tokenzier
xiezipeng-ML Apr 10, 2023
846fba8
compatible with tokenizer of hugginface
xiezipeng-ML Apr 10, 2023
1d2ac19
replace mock_tokenizer to pipeline demo
xiezipeng-ML Apr 10, 2023
e69b70c
support data parallel inference
xiezipeng-ML Apr 10, 2023
f705e5d
Merge branch 'main' of https://github.com/Oneflow-Inc/libai into dev_…
xiezipeng-ML Apr 11, 2023
5cce818
reformat
xiezipeng-ML Apr 11, 2023
23b0422
refine
xiezipeng-ML Apr 11, 2023
347d6d1
refine
xiezipeng-ML Apr 11, 2023
6df6fbc
reformat
xiezipeng-ML Apr 11, 2023
b2ab174
fix conflict
BBuf Apr 11, 2023
2cfc20d
update flow.nn.functional.gelu
BBuf Apr 17, 2023
abc8b80
update flow.nn.functional.gelu
BBuf Apr 17, 2023
010b465
refine
BBuf Apr 17, 2023
8030159
refine
BBuf Apr 17, 2023
ac43197
Merge branch 'main' into cambricon_support_gpt2
BBuf Apr 17, 2023
64263fe
update mlu gpt2 dataset url (#502)
hjchen2 May 4, 2023
2b800ee
import oneflow_mlu (#505)
hjchen2 May 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 36 additions & 36 deletions configs/common/data/gpt_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,40 +20,40 @@

dataloader = OmegaConf.create()

dataloader.train = LazyCall(build_nlp_train_val_test_loader)(
dataset=[
LazyCall(GPT2Dataset)(
name="gpt-2",
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
indexed_dataset=LazyCall(get_indexed_dataset)(
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
data_impl="mmap",
skip_warmup=False,
),
max_seq_length=1024,
seed=1234,
),
],
train_val_test_num_samples=None, # a hint for deferred assignment
splits=[[949.0, 50.0, 1.0]],
weights=[1.0],
num_workers=4,
)
# dataloader.train = LazyCall(build_nlp_train_val_test_loader)(
# dataset=[
# LazyCall(GPT2Dataset)(
# name="gpt-2",
# data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
# indexed_dataset=LazyCall(get_indexed_dataset)(
# data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
# data_impl="mmap",
# skip_warmup=False,
# ),
# max_seq_length=1024,
# seed=1234,
# ),
# ],
# train_val_test_num_samples=None, # a hint for deferred assignment
# splits=[[949.0, 50.0, 1.0]],
# weights=[1.0],
# num_workers=4,
# )

dataloader.test = [
LazyCall(build_nlp_test_loader)(
dataset=LazyCall(GPT2Dataset)(
name="gpt-2",
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
indexed_dataset=LazyCall(get_indexed_dataset)(
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
data_impl="mmap",
skip_warmup=False,
),
max_seq_length=1024,
max_num_samples=10,
seed=1234,
),
test_batch_size=4,
)
]
# dataloader.test = [
# LazyCall(build_nlp_test_loader)(
# dataset=LazyCall(GPT2Dataset)(
# name="gpt-2",
# data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
# indexed_dataset=LazyCall(get_indexed_dataset)(
# data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
# data_impl="mmap",
# skip_warmup=False,
# ),
# max_seq_length=1024,
# max_num_samples=10,
# seed=1234,
# ),
# test_batch_size=4,
# )
# ]
4 changes: 2 additions & 2 deletions configs/common/optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
params=LazyCall(get_default_optimizer_params)(
# params.model is meant to be set to the model object,
# before instantiating the optimizer.
clip_grad_max_norm=1.0,
clip_grad_norm_type=2.0,
# clip_grad_max_norm=1.0,
# clip_grad_norm_type=2.0,
weight_decay_norm=0.0,
weight_decay_bias=0.0,
),
Expand Down
1 change: 1 addition & 0 deletions libai/config/configs
2 changes: 1 addition & 1 deletion libai/inference/generator/generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def greedy_search(
# if eos_token was found in one sentence, set sentence to finished
if eos_token_id is not None:
unfinished_sequences = flow.mul(
unfinished_sequences, (next_tokens != eos_token_id).long()
unfinished_sequences, (next_tokens.to(flow.int32) != eos_token_id).long()
)

if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
Expand Down
1 change: 1 addition & 0 deletions libai/layers/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from enum import Enum
from typing import Optional

Expand Down
3 changes: 2 additions & 1 deletion libai/layers/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,10 @@ def __init__(
)
)
# Initialize the word embedding

if os.getenv("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "0") != "1":
self.init_method(self.weight)

# FIXME(Lxy): Fill padding_idx is not supported in nd_sbp right now.
# self._fill_padding_idx_with_zero()

Expand All @@ -164,7 +166,6 @@ def forward(self, input_ids):
input_embeds = flow._C.gather(weight, input_ids, axis=0)
# Set the embeds sbp from [S(0), P] --> [S(0), B] to get complete embedding results.
input_embeds = input_embeds.to_global(sbp=dist.get_hidden_sbp())

return input_embeds

def _fill_padding_idx_with_zero(self) -> None:
Expand Down
1 change: 0 additions & 1 deletion libai/models/gpt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ def forward(self, input_ids):
Returns:
flow.Tensor: logits
"""

input_ids = input_ids.to_global(placement=dist.get_layer_placement(0))
input_embeds = self.embeddings(input_ids, 0)

Expand Down
2 changes: 1 addition & 1 deletion libai/tokenizer/tokenization_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ def convert_to_tensors(self, token_ids, return_tensors=None, is_global=False, **
elif is_global:
sbp = kwargs.get("sbp", dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]))
placement = kwargs.get(
"placement", flow.placement("cuda", list(range(dist.get_world_size())))
"placement", flow.placement("mlu", list(range(dist.get_world_size())))
)
return_token_ids = flow.tensor(
token_ids, sbp=sbp, placement=placement, dtype=flow.long
Expand Down
4 changes: 2 additions & 2 deletions libai/utils/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _init_distributed_env(self, cfg):
self._world_size = num_gpus_per_node * num_nodes

# Add set device type
self._device_type = try_get_key(cfg, "device_type", default="cuda")
self._device_type = try_get_key(cfg, "device_type", default="mlu")

def _init_parallel_size(self, cfg):

Expand Down Expand Up @@ -438,7 +438,7 @@ def convert_to_distributed_default_setting(t):
def ttol(tensor, pure_local=False, ranks=None):
"""Global tensor to local tensor."""
if tensor.is_global:
placement = tensor.placement if not ranks else flow.placement("cuda", ranks)
placement = tensor.placement if not ranks else flow.placement("mlu", ranks)
if pure_local:
tensor = tensor.to_global(placement=placement).to_local()
else:
Expand Down
2 changes: 2 additions & 0 deletions libai/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__version__ = '0.2.0'
git_version = 'd47fe9c7908c1e7a4604574b2bd58a7e06d20645'
8 changes: 5 additions & 3 deletions projects/MagicPrompt/configs/gpt2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from projects.MagicPrompt.gpt2 import GPTModel, GPTForPreTraining
from configs.common.data.gpt_dataset import tokenization
from configs.common.train import train
from configs.common.models.graph import graph

graph.enabled=True

cfg.update(
# Model
Expand Down Expand Up @@ -56,13 +58,13 @@
sep_token_id=None,
decoder_start_token_id=None,
# train
pretrained_model_path="/data/home/magicprompt",
pretrained_model_path="./oneflow-model",
)


model = LazyCall(GPTModel)(cfg=cfg)
pretrain_model = LazyCall(GPTForPreTraining)(cfg=cfg)
tokenization.tokenizer = LazyCall(mock_tokenization.GPT2Tokenizer)(
vocab_file="/data/home/magicprompt/vocab.json",
merges_file="/data/home/magicprompt/merges.txt",
vocab_file="./oneflow-model/vocab.json",
merges_file="./oneflow-model/merges.txt",
)
20 changes: 12 additions & 8 deletions projects/MagicPrompt/configs/gpt2_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from configs.common.train import train
from configs.common.models.graph import graph

graph.enabled=False
graph.debug = 2

vocab_file = "/data/home/magicprompt/vocab.json"
merge_files = "/data/home/magicprompt/merges.txt"
train_data_prefix = "/data/home/magicprompt/train/en_train_mmap_text_sentence"
vocab_file = "./magicprompt/vocab.json"
merge_files = "./magicprompt/merges.txt"
train_data_prefix = "./magicprompt/train/en_train_mmap_text_sentence"

tokenization.tokenizer.vocab_file = vocab_file
tokenization.tokenizer.merges_file = merge_files
Expand All @@ -33,9 +35,9 @@
model.cfg.vocab_size = 50257
model.cfg.layernorm_epsilon = 1e-5
model.cfg.use_scaled_init_for_output_weights = True
model.cfg.bias_gelu_fusion = True
model.cfg.bias_dropout_fusion = True
model.cfg.scale_mask_softmax_fusion = True
model.cfg.bias_gelu_fusion = False
model.cfg.bias_dropout_fusion = False
model.cfg.scale_mask_softmax_fusion = False
model.cfg.apply_query_key_layer_scaling = True
model.cfg.apply_residual_post_layernorm = False
model.cfg.amp_enabled = True
Expand All @@ -56,14 +58,16 @@
test_micro_batch_size=4,
train_epoch=33,
train_iter=10000,
log_period=50,
log_period=1,
amp=dict(enabled=True),
warmup_ratio=0,
checkpointer=dict(period=8000, max_to_keep=20),
dist=dict(
data_parallel_size=1,
data_parallel_size=4,
tensor_parallel_size=1,
pipeline_parallel_size=1,
# pipeline_num_layers = 12,
# custom_pipeline_stage_id = [0] * 6 + [1] * 6,
# pipeline_num_layers=model.cfg.hidden_layers,
),
scheduler=LazyCall(WarmupExponentialLR)(
Expand Down
2 changes: 1 addition & 1 deletion projects/MagicPrompt/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def postprocess(self, model_output_dict, **kwargs) -> dict:
pipeline_parallel=1,
# pipeline_stage_id=[0] * 6 + [1] * 6,
# pipeline_num_layers=12,
model_path="/path/to/oneflow-model",
model_path="oneflow-model/model",
mode="libai",
)

Expand Down
4 changes: 3 additions & 1 deletion tools/train_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import numpy as np
import oneflow as flow
import oneflow_mlu

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from libai.config import LazyConfig, default_argument_parser, try_get_key
Expand All @@ -36,7 +37,8 @@ def main(args):

seed_for_rank = cfg.train.seed + flow.env.get_rank()
flow.manual_seed(seed_for_rank)
flow.cuda.manual_seed(seed_for_rank)
# flow.cuda.manual_seed(seed_for_rank)
flow._oneflow_internal.default_generator("mlu").manual_seed(seed_for_rank)
np.random.seed(seed_for_rank)
random.seed(seed_for_rank)

Expand Down