Skip to content

Commit

Permalink
fix llama and test llama2 (#515)
Browse files Browse the repository at this point in the history
* fix llama

* fix gpt bloom llama

* refine

* reformat
  • Loading branch information
xiezipeng-ML authored Aug 9, 2023
1 parent 13f85aa commit 722b5e3
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 23 deletions.
15 changes: 10 additions & 5 deletions projects/mock_transformers/dist_infer_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,17 @@ def __init__(self, config):
)
dist.setup_dist_util(parallel_config)

placement_sbp_dict = dict(
placement=flow.env.all_device_placement("cuda"),
sbp=flow.sbp.broadcast,
)

# initial and load model
model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m", torch_dtype=flow.float16)
with global_mode(True, **placement_sbp_dict):
model = AutoModelForCausalLM.from_pretrained(
"bigscience/bloom-560m", torch_dtype=flow.float16
)

# set model to cuda
dist.set_device_type("cuda")
model._apply(dist.convert_to_distributed_default_setting)
Expand All @@ -108,10 +117,6 @@ def __init__(self, config):
)

# generate id
placement_sbp_dict = dict(
placement=flow.env.all_device_placement("cuda"),
sbp=flow.sbp.broadcast,
)
with global_mode(True, **placement_sbp_dict):
generated_ids = model.generate(input_ids, max_length=30)
out_put_ids = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
Expand Down
13 changes: 8 additions & 5 deletions projects/mock_transformers/dist_infer_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,15 @@ def __init__(self, intermediate_size, config):
)
dist.setup_dist_util(parallel_config)

placement_sbp_dict = dict(
placement=flow.env.all_device_placement("cuda"),
sbp=flow.sbp.broadcast,
)

# initial and load model
model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=flow.float16)
with global_mode(True, **placement_sbp_dict):
model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=flow.float16)

# set model to cuda
dist.set_device_type("cuda")
model._apply(dist.convert_to_distributed_default_setting)
Expand All @@ -144,10 +151,6 @@ def __init__(self, intermediate_size, config):
)

# generate id
placement_sbp_dict = dict(
placement=flow.env.all_device_placement("cuda"),
sbp=flow.sbp.broadcast,
)
with global_mode(True, **placement_sbp_dict):
generated_ids = model.generate(input_ids, max_length=30)
out_put_ids = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
Expand Down
29 changes: 16 additions & 13 deletions projects/mock_transformers/dist_infer_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def __init__(self, config):


class LiBaiLlamaMLP(temp_class):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
hidden_size = kwargs["hidden_size"]
intermediate_size = kwargs["intermediate_size"]
def __init__(self, config):
super().__init__(config)
hidden_size = config.hidden_size
intermediate_size = config.intermediate_size
self.gate_proj = Linear(
hidden_size, intermediate_size, bias=False, parallel="col", dtype=flow.float16
)
Expand All @@ -90,23 +90,30 @@ def __init__(self, *args, **kwargs):
parallel_config = DictConfig(
dict(
data_parallel_size=1,
tensor_parallel_size=4,
tensor_parallel_size=2,
pipeline_parallel_size=1, # set to 1, unsupport pipeline parallel now
pipeline_num_layers=None,
device_type="cpu",
)
)
dist.setup_dist_util(parallel_config)

# initial and load model
model = AutoModelForCausalLM.from_pretrained(
"decapoda-research/llama-13b-hf", torch_dtype=flow.float16
placement_sbp_dict = dict(
placement=flow.env.all_device_placement("cuda"),
sbp=flow.sbp.broadcast,
)

# initial and load model
with global_mode(True, **placement_sbp_dict):
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b", torch_dtype=flow.float16
)

# set model to cuda
dist.set_device_type("cuda")
model._apply(dist.convert_to_distributed_default_setting)
# initial tokenizer
tokenizer = AutoTokenizer.from_pretrained("decapoda-research/llama-13b-hf", use_fast=False)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b", use_fast=False)

# get input_ids
prompt = "Hello, I'm am conscious and"
Expand All @@ -118,10 +125,6 @@ def __init__(self, *args, **kwargs):
)

# generate id
placement_sbp_dict = dict(
placement=flow.env.all_device_placement("cuda"),
sbp=flow.sbp.broadcast,
)
with global_mode(True, **placement_sbp_dict):
generated_ids = model.generate(input_ids, max_length=30)
out_put_ids = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
Expand Down

0 comments on commit 722b5e3

Please sign in to comment.