Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressed missed out comments in PR #1 #3

Merged
merged 4 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ include = [
"instructlab.dolomite.hf_models.modeling_utils.normalization.layernorm",
"instructlab.dolomite.hf_models.modeling_utils.normalization.rmsnorm",
"instructlab.dolomite.hf_models.modeling_utils.position_embedding",
"instructlab.dolomite.gradient_checkpointing",
"instructlab.dolomite.utils",
]

Expand Down
24 changes: 0 additions & 24 deletions src/instructlab/dolomite/gradient_checkpointing/__init__.py

This file was deleted.

47 changes: 0 additions & 47 deletions src/instructlab/dolomite/gradient_checkpointing/block.py

This file was deleted.

127 changes: 0 additions & 127 deletions src/instructlab/dolomite/hf_models/config.py

This file was deleted.

4 changes: 0 additions & 4 deletions src/instructlab/dolomite/hf_models/defaults.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,5 @@
repeat_key_value,
split_query_key_value_tensor_for_attention,
)
from .embedding import Embedding
from .linear import Linear
from .normalization import RMSNorm, get_normalization_function
from .position_embedding import Alibi, RoPE, YaRNScaledRoPE, apply_rotary_pos_emb
from .position_embedding import Alibi, RoPE, apply_rotary_pos_emb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import torch

# Local
from ...config import CommonConfig
from ...enums import AttentionHeadType
from ...models.gpt_dolomite.config import GPTDolomiteConfig

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might give circular import error due to stuff from this file being imported in the model class

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was fine I have tested training the refactored codel

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if you like, i have moved the config to a top-level config.py in 8afe81d

from .base import Attention
from .flash import FlashAttention2
from .padding_free import PaddingFreeAttention
Expand Down Expand Up @@ -48,7 +48,7 @@


def get_attention_module(
config: CommonConfig,
config: GPTDolomiteConfig,
causal: bool,
attention_implementation: str,
use_padding_free_transformer: bool,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
from typing import Tuple

# Third Party
from torch.nn import Linear # replaces ParameterizedLinear
from transformers import DynamicCache
import torch
import torch.nn.functional as F

# Local
from ...config import CommonConfig
from ...enums import AttentionHeadType, PositionEmbeddingType
from ..linear import Linear
from ...models.gpt_dolomite.config import GPTDolomiteConfig

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see comment above

from ..position_embedding import apply_rotary_pos_emb
from .utils import repeat_key_value


class Attention(torch.nn.Module):
def __init__(
self, config: CommonConfig, causal: bool, layer_idx: int = None
self, config: GPTDolomiteConfig, causal: bool, layer_idx: int = None
) -> None:
super().__init__()

Expand Down

This file was deleted.

8 changes: 0 additions & 8 deletions src/instructlab/dolomite/hf_models/modeling_utils/linear.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import torch

# Local
from .layernorm import get_layernorm
from .rmsnorm import RMSNorm, get_rmsnorm
from .norms import RMSNorm, get_layernorm, get_rmsnorm

_NORMALIZATION_FUNCTIONS = {
"layernorm": get_layernorm,
Expand Down

This file was deleted.

Loading