Skip to content

Commit

Permalink
chore: added missing type annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
le1nux committed Jan 22, 2025
1 parent d3a1795 commit 4bc671b
Showing 1 changed file with 3 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pickle
from itertools import repeat
from pathlib import Path
from typing import BinaryIO

import numpy as np

Expand Down Expand Up @@ -66,12 +67,12 @@ def _update_data_length_in_initial_header(tokenized_dataset_file_path: Path, ind
fout.write(data_section_length_in_bytes)

@staticmethod
def _write_index_segment(file_descriptor, index_list: list[tuple[int, int]]) -> None:
def _write_index_segment(file_descriptor: BinaryIO, index_list: list[tuple[int, int]]) -> None:
file_descriptor.write(pickle.dumps(index_list))

@staticmethod
def _write_data_segment(
file_descriptor, token_data: list[np.ndarray], token_size_in_bytes: int, write_batch_size: int
file_descriptor: BinaryIO, token_data: list[np.ndarray], token_size_in_bytes: int, write_batch_size: int
) -> list[tuple[int, int]]:
def encoded_token_to_bytes(encoded_token: int, token_size_in_bytes: int) -> bytes:
# Converts an token_ids to its byte representation.
Expand Down

0 comments on commit 4bc671b

Please sign in to comment.