Skip to content

Commit

Permalink
chore: rm explicit tokenizer param
Browse files Browse the repository at this point in the history
  • Loading branch information
GangGreenTemperTatum committed Feb 5, 2025
1 parent 9a3f008 commit cf9e0f9
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 48 deletions.
36 changes: 13 additions & 23 deletions dyana/loaders/megatron/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,19 @@ RUN apt-get update && \
build-essential \
&& rm -rf /var/lib/apt/lists/*

# Configure environment
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
ENV CUDA_LAUNCH_BLOCKING=1
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32
ENV CUDA_MODULE_LOADING=LAZY
ENV TORCH_USE_CUDA_DSA=0
ENV CUDA_DEVICE_MAX_CONNECTIONS=1
ENV NCCL_ASYNC_ERROR_HANDLING=1
ENV OMP_NUM_THREADS=1
ENV NVTE_FRAMEWORK=pytorch
ENV MAX_JOBS=4
ENV DEBIAN_FRONTEND=noninteractive
ENV TORCH_CUDNN_V8_API_ENABLED=1
ENV TORCH_ALLOW_TF32=1
ENV TORCH_SHOW_CPP_STACKTRACES=0
ENV PYTHONWARNINGS=ignore
ENV NVIDIA_VISIBLE_DEVICES="all"
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
ENV TORCH_USE_CUDA_DSA=1
ENV PYTORCH_JIT=0
ENV TORCH_INDUCTOR_DISABLE_CUDA_GRAPH=0
# Create required directories for multiprocessing
RUN mkdir -p /dev/shm && \
mkdir -p /tmp/pytorch_extensions && \
mkdir -p /run/shm && \
chmod -R 777 /dev/shm /tmp/pytorch_extensions /run/shm

# Create ALL required directories for IPC and shared memory
RUN mkdir -p /dev/shm && \
mkdir -p /run/shm && \
mkdir -p /tmp/pytorch_extensions && \
mkdir -p /tmp/.pytorch_jit_cache && \
mkdir -p /tmp/transformers && \
chmod -R 777 /dev/shm /run/shm /tmp/pytorch_extensions /tmp/.pytorch_jit_cache /tmp/transformers

# Only verify PyTorch version during build (not CUDA)
RUN python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')"
Expand Down
159 changes: 142 additions & 17 deletions dyana/loaders/megatron/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# ruff: noqa: I001, E402, F401, F821
# ruff: noqa: I001, F401, E402, B904, F821
# type: ignore
import os
import sys
Expand All @@ -13,33 +13,107 @@
warnings.filterwarnings("ignore", category=UserWarning)

# Import torch and configure CUDA
import torch # noqa: E402
import torch

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
if torch.cuda.is_available():
torch.cuda.init() # type: ignore[no-untyped-call]
torch.cuda.init()
torch.cuda.set_device(0)


def find_tokenizer(model_path: Path) -> Path:
"""Find tokenizer file in model directory or alongside model file."""
patterns = [
# LLaMA specific patterns first
"llama*tokenizer*.model", # LLaMA specific naming
"tokenizer.model", # Standard LLaMA tokenizer
# Generic patterns as fallback
"*.model", # sentencepiece models
"tokenizer.*", # huggingface style
"*/tokenizer.*", # nested folder
"vocab.*", # vocabulary files
"merges.txt", # BPE merges
]

# Try both the model's directory and its parent directory
search_dirs = [model_path.parent]
if model_path.parent.parent.exists():
search_dirs.append(model_path.parent.parent)

print("\n=== Tokenizer Search ===", file=sys.stderr)

for directory in search_dirs:
print(f"Looking in: {directory}", file=sys.stderr)
print("Directory contents:", file=sys.stderr)
all_files = list(directory.glob("*"))
for f in sorted(all_files):
print(f" {f}", file=sys.stderr)
# If it looks like a LLaMA tokenizer file, try it first
if "tokenizer" in f.name.lower() and f.name.endswith(".model"):
print(f"Found likely LLaMA tokenizer: {f}", file=sys.stderr)
return f

# If no obvious tokenizer found, try the patterns
print("\nTrying patterns:", file=sys.stderr)
for pattern in patterns:
print(f" {pattern}...", file=sys.stderr, end=" ")
matches = list(directory.glob(pattern))
if matches:
print(f"Found: {matches[0]}", file=sys.stderr)
return matches[0]
print("No match", file=sys.stderr)

raise FileNotFoundError(
f"No tokenizer found in {[str(d) for d in search_dirs]} after trying patterns: {patterns}\n"
f"Available files in {model_path.parent}: {[f.name for f in model_path.parent.glob('*')]}"
)


if __name__ == "__main__":
# Set multiprocessing start method
import multiprocessing

multiprocessing.set_start_method("spawn", force=True)

captured_output = StringIO()
with contextlib.redirect_stdout(captured_output), contextlib.redirect_stderr(captured_output):
try:
print("=== Starting Megatron Loader ===", file=sys.stderr)
from dyana import Profiler

# Initialize CUDA
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "0"
os.environ["PYTORCH_JIT"] = "0" # Disable JIT at env level
os.environ["TORCH_USE_RTLD_GLOBAL"] = "1"
os.environ["TORCH_INDUCTOR_DISABLE_CUDA_GRAPH"] = "1" # Disable CUDA graphs

if not os.path.exists("/dev/shm"):
print("Warning: /dev/shm not found, creating...", file=sys.stderr)
os.makedirs("/dev/shm", exist_ok=True)

# PyTorch before other imports
print("=== Configuring PyTorch ===", file=sys.stderr)
# Disable JIT compilation using available methods
if hasattr(torch._C, "_jit_set_profiling_mode"):
torch._C._jit_set_profiling_mode(False)
print("✓ Disabled JIT profiling mode", file=sys.stderr)

profiler = Profiler(gpu=True)

if not torch.cuda.is_available():
raise RuntimeError("CUDA is not available but required")

# Force CUDA initialization
torch.cuda.init() # type: ignore[no-untyped-call]
torch.cuda.init()
torch.cuda.set_device(0)
# Allocate a small tensor to ensure CUDA is working
test_tensor = torch.zeros(1, device="cuda")
del test_tensor
torch.cuda.empty_cache()

# GPU info
device_name = torch.cuda.get_device_name()
device_count = torch.cuda.device_count()
cuda_version = torch.version.cuda
Expand All @@ -53,24 +127,71 @@
)
profiler.on_stage("cuda_initialized")

print("\n=== Importing Dependencies ===", file=sys.stderr)
try:
from transformers import LlamaTokenizer

print("✓ Imported LlamaTokenizer", file=sys.stderr)
from megatron.core import parallel_state

print("✓ Imported parallel_state", file=sys.stderr)
from megatron.core.transformer.transformer_config import TransformerConfig

print("✓ Imported TransformerConfig", file=sys.stderr)
except Exception as e:
print(f"Failed to import dependencies: {e}", file=sys.stderr)
profiler.track_error("imports", str(e))
raise

print("\n=== Parsing Arguments ===", file=sys.stderr)
parser = argparse.ArgumentParser()
parser.add_argument("--model", required=True)
parser.add_argument("--tokenizer", required=True)
parser.add_argument("--size", choices=["7B", "13B"], required=True)
parser.add_argument("--input", default="This is an example prompt.")
parser.add_argument("--tokenizer", help="Optional explicit tokenizer path")
args = parser.parse_args()

model_path = Path(args.model)
tokenizer_path = Path(args.tokenizer)
if not model_path.exists():
raise FileNotFoundError(f"Model not found at {model_path}")
if not tokenizer_path.exists():
raise FileNotFoundError(f"Tokenizer not found at {tokenizer_path}")
profiler.on_stage("args_verified")

from transformers import LlamaTokenizer
from megatron.core import parallel_state
from megatron.core.transformer.transformer_config import TransformerConfig
print("\n=== Checking Files ===", file=sys.stderr)
print(f"Model path: {model_path}", file=sys.stderr)
print("Directory contents:", file=sys.stderr)
for f in sorted(model_path.parent.glob("*")):
print(f" {f}", file=sys.stderr)

# Try explicit tokenizer path
if args.tokenizer:
tokenizer_path = Path(args.tokenizer)
if not tokenizer_path.exists():
raise FileNotFoundError(f"Tokenizer not found at {tokenizer_path}")
print(f"Using provided tokenizer: {tokenizer_path}", file=sys.stderr)
else:
# Otherwise search for tokenizer
tokenizer_path = find_tokenizer(model_path)
print(f"Found tokenizer: {tokenizer_path}", file=sys.stderr)

try:
print("\n=== Loading Tokenizer ===", file=sys.stderr)
print(f"Loading from: {tokenizer_path}", file=sys.stderr)

try:
tokenizer = LlamaTokenizer.from_pretrained(
str(tokenizer_path.parent),
local_files_only=True,
tokenizer_file=str(tokenizer_path.name),
)
print(f"Successfully loaded tokenizer (vocab_size={tokenizer.vocab_size})", file=sys.stderr)
except Exception as e:
print(f"Failed to load tokenizer from {tokenizer_path}: {e}", file=sys.stderr)
raise
print("=======================\n", file=sys.stderr)
profiler.on_stage("tokenizer_loaded")
except Exception as e:
print(f"Error loading tokenizer: {e}", file=sys.stderr)
profiler.track_error("tokenizer", str(e))
raise

# Initialize profiler first
initialized_parallel = False
Expand All @@ -95,20 +216,20 @@

try:
te.initialize()
print(f"Initialized Transformer Engine version: {te.__version__}") # noqa: F821
print(f"Initialized Transformer Engine version: {te.__version__}")
except Exception as e:
print(f"Warning: Transformer Engine initialization failed: {e}")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
print(f"Transformer Engine version: {transformer_engine.__version__}") # noqa: F821
print(f"Transformer Engine version: {te.__version__}") # noqa: F821
print(f"CUDA devices: {torch.cuda.device_count()}")
print(f"CUDA version: {torch.version.cuda}")
profiler.track(
"env_info",
{
"te_version": transformer_engine.__version__, # noqa: F821
"te_version": te.__version__, # noqa: F821
"cuda_devices": torch.cuda.device_count(),
"cuda_version": torch.version.cuda,
},
Expand Down Expand Up @@ -146,7 +267,12 @@
profiler.on_stage("config_created")

try:
# Load tokenizer
print("\n=== Loading Tokenizer ===", file=sys.stderr)
print(f"Loading from: {tokenizer_path}", file=sys.stderr)
tokenizer = LlamaTokenizer.from_pretrained(str(tokenizer_path.parent), local_files_only=True)
print(f"Loaded tokenizer with vocab size: {tokenizer.vocab_size}", file=sys.stderr)
print("=======================\n", file=sys.stderr)
profiler.on_stage("tokenizer_loaded")

model = GPTModel( # noqa: F821
Expand All @@ -155,7 +281,7 @@
max_sequence_length=4096,
parallel_output=False,
share_embeddings_and_output_weights=True,
).cuda() # GPU
).cuda() # Explicit GPU
profiler.on_stage("model_created")

# Load DMC checkpoint directly to GPU
Expand Down Expand Up @@ -198,7 +324,6 @@
raise

finally:
# Clean up Megatron's parallel state only if it was initialized
try:
if initialized_parallel:
parallel_state.destroy_model_parallel()
Expand Down
1 change: 1 addition & 0 deletions dyana/loaders/megatron/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ hydra-core==1.3.2
hydra_colorlog==1.2.0
nltk
datasets
transformers>=4.38.0

# Utilities
psutil>=5.6.7
11 changes: 3 additions & 8 deletions dyana/loaders/megatron/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@ build_args:

args:
- name: model
description: Path to Megatron model checkpoint
required: true
volume: true

- name: tokenizer
description: Path to Llama 2 tokenizer model
description: Path to model checkpoint (tokenizer should be in same directory)
required: true
volume: true

Expand All @@ -25,5 +20,5 @@ args:
required: false

examples:
- description: "Load a Megatron-DMC model with tokenizer:"
command: dyana trace --loader megatron --model /path/to/model --tokenizer /path/to/tokenizer.model --size 7B
- description: "Load a Megatron-DMC model:"
command: dyana trace --loader megatron --model /path/to/model.pt --size 7B

0 comments on commit cf9e0f9

Please sign in to comment.