Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressed missed out comments in PR #1 #3

Merged
merged 4 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ include = [
"instructlab.dolomite.hf_models.modeling_utils.normalization.layernorm",
"instructlab.dolomite.hf_models.modeling_utils.normalization.rmsnorm",
"instructlab.dolomite.hf_models.modeling_utils.position_embedding",
"instructlab.dolomite.gradient_checkpointing",
"instructlab.dolomite.utils",
]

Expand Down
24 changes: 0 additions & 24 deletions src/instructlab/dolomite/gradient_checkpointing/__init__.py

This file was deleted.

47 changes: 0 additions & 47 deletions src/instructlab/dolomite/gradient_checkpointing/block.py

This file was deleted.

3 changes: 2 additions & 1 deletion src/instructlab/dolomite/hf_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# Extracted from https://github.com/ibm-granite/dolomite-engine
# ----------------------------------------------------------------
# Local
from .config import GPTDolomiteConfig
from .model_conversion import export_to_huggingface, import_from_huggingface
from .models import GPTDolomiteConfig, GPTDolomiteForCausalLM, GPTDolomiteModel
from .models import GPTDolomiteForCausalLM, GPTDolomiteModel
from .register_hf import register_model_classes

register_model_classes()
5 changes: 4 additions & 1 deletion src/instructlab/dolomite/hf_models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from .enums import AttentionHeadType, PositionEmbeddingType


class CommonConfig(PretrainedConfig):
class GPTDolomiteConfig(PretrainedConfig):
model_type = "gpt_dolomite"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
"hidden_size": "n_embd",
Expand All @@ -19,6 +20,8 @@ class CommonConfig(PretrainedConfig):

# NOTE: initializer range is kept for backward compatiblity
# but it is not used anymore
# : also rope_scaling is not used anymore but kept for
# same reason.

def __init__(
self,
Expand Down
4 changes: 0 additions & 4 deletions src/instructlab/dolomite/hf_models/defaults.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTBigCodeConfig

# Local
from ..config import GPTDolomiteConfig
from ..enums import AttentionHeadType, PositionEmbeddingType
from ..models import GPTDolomiteConfig


def import_from_huggingface_bigcode(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

# Local
from ...utils import SafeTensorsWeightsManager, download_repo
from ..config import GPTDolomiteConfig
from ..enums import AttentionHeadType
from ..modeling_utils import (
interleave_query_key_value_tensor_for_attention,
split_query_key_value_tensor_for_attention,
)
from ..models import GPTDolomiteConfig
from ..models.gpt_dolomite import (
interleave_up_gate_tensor_for_mlp,
split_up_gate_tensor_for_mlp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,5 @@
repeat_key_value,
split_query_key_value_tensor_for_attention,
)
from .embedding import Embedding
from .linear import Linear
from .normalization import RMSNorm, get_normalization_function
from .position_embedding import Alibi, RoPE, YaRNScaledRoPE, apply_rotary_pos_emb
from .position_embedding import Alibi, RoPE, apply_rotary_pos_emb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch

# Local
from ...config import CommonConfig
from ...config import GPTDolomiteConfig
from ...enums import AttentionHeadType
from .base import Attention
from .flash import FlashAttention2
Expand Down Expand Up @@ -48,7 +48,7 @@


def get_attention_module(
config: CommonConfig,
config: GPTDolomiteConfig,
causal: bool,
attention_implementation: str,
use_padding_free_transformer: bool,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
from typing import Tuple

# Third Party
from torch.nn import Linear # replaces ParameterizedLinear
from transformers import DynamicCache
import torch
import torch.nn.functional as F

# Local
from ...config import CommonConfig
from ...config import GPTDolomiteConfig
from ...enums import AttentionHeadType, PositionEmbeddingType
from ..linear import Linear
from ..position_embedding import apply_rotary_pos_emb
from .utils import repeat_key_value


class Attention(torch.nn.Module):
def __init__(
self, config: CommonConfig, causal: bool, layer_idx: int = None
self, config: GPTDolomiteConfig, causal: bool, layer_idx: int = None
) -> None:
super().__init__()

Expand Down

This file was deleted.

8 changes: 0 additions & 8 deletions src/instructlab/dolomite/hf_models/modeling_utils/linear.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import torch

# Local
from .layernorm import get_layernorm
from .rmsnorm import RMSNorm, get_rmsnorm
from .norms import RMSNorm, get_layernorm, get_rmsnorm

_NORMALIZATION_FUNCTIONS = {
"layernorm": get_layernorm,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# ----------------------------------------------------------------
# Extracted from https://github.com/ibm-granite/dolomite-engine
# ----------------------------------------------------------------

# Standard
import numbers

# Third Party
import torch

# ---------------- LayerNorm ---------------

_LAYERNORM_MODULES = {
"torch": torch.nn.LayerNorm,
}


def get_layernorm(
normalized_shape: int,
eps: float,
normalization_implementation: str = "torch",
) -> torch.nn.LayerNorm:
if normalization_implementation in _LAYERNORM_MODULES:
return _LAYERNORM_MODULES[normalization_implementation](
normalized_shape=normalized_shape, eps=eps
)

raise ValueError(
f"unexpected `normalization_implementation` {normalization_implementation}"
)


# --------------- RMS Norm ---------------
# ----------------------------------------------------------------
# Extracted from https://github.com/ibm-granite/dolomite-engine
# ----------------------------------------------------------------


class RMSNorm(torch.nn.Module):
def __init__(self, normalized_shape: int, eps: float = 1e-6) -> None:
super().__init__()

self.weight = torch.nn.Parameter(torch.ones(normalized_shape))
self.eps = eps

if isinstance(normalized_shape, numbers.Integral):
normalized_shape = (normalized_shape,)
self.normalized_shape = normalized_shape

def forward(self, input: torch.Tensor) -> torch.Tensor:
input_dtype = input.dtype

input = input.to(torch.float32)
variance = input.pow(2).mean(-1, keepdim=True)
input = input * torch.rsqrt(variance + self.eps)

return self.weight * input.to(input_dtype)

def extra_repr(self) -> str:
return f"{self.normalized_shape}, eps={self.eps}"

def reset_parameters(self) -> None:
torch.nn.init.ones_(self.weight)


_RMSNORM_MODULES = {"torch": RMSNorm}


def get_rmsnorm(
normalized_shape: int,
eps: float,
normalization_implementation: str = "torch",
) -> torch.nn.LayerNorm:
if normalization_implementation in _RMSNORM_MODULES:
return _RMSNORM_MODULES[normalization_implementation](
normalized_shape=normalized_shape, eps=eps
)

raise ValueError(
f"unexpected `normalization_implementation` {normalization_implementation}"
)

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
# ----------------------------------------------------------------
# Local
from .alibi import Alibi
from .rope import RoPE, YaRNScaledRoPE, apply_rotary_pos_emb
from .rope import RoPE, apply_rotary_pos_emb
Loading