Skip to content

Commit

Permalink
review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
menamerai committed Feb 10, 2024
1 parent 86a1965 commit 9367171
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
def token_map(
tokenized_dataset: Dataset,
output_path: str | None = None,
file_name: str | None = None,
) -> dict[int, list[tuple[int, int]]]:
"""Return a mapping of tokens to their (prompt_idx, token_idx) locations in the tokenized_dataset.
Expand All @@ -22,7 +21,6 @@ def token_map(
locations in the tokenized_dataset.
"""
mapping = {}
tokenized_dataset = cast(Dataset, tokenized_dataset)
for prompt_idx, prompt in enumerate(tokenized_dataset):
prompt = cast(dict, prompt)
for token_idx, token in enumerate(prompt["tokens"]):
Expand Down
36 changes: 5 additions & 31 deletions tests/dataset/test_dataset.py → tests/eval/test_token_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from datasets import Dataset

from delphi.dataset.token_map import token_map
from delphi.eval.token_map import token_map


def test_token_map():
Expand All @@ -29,41 +29,15 @@ def test_token_map():
7: [(0, 8), (1, 8), (2, 8)],
}

# fmt: off
tokenized_dataset = Dataset.from_dict(
{
{ # one really long prompt
"tokens": [
[
0,
1,
2,
3,
4,
5,
0,
6,
7,
0,
1,
2,
3,
4,
5,
0,
6,
7,
0,
1,
2,
3,
4,
5,
0,
6,
7,
],
[0, 1, 2, 3, 4, 5, 0, 6, 7, 0, 1, 2, 3, 4, 5, 0, 6, 7, 0, 1, 2, 3, 4, 5, 0, 6, 7]
]
}
)
# fmt: on
mapping = token_map(tokenized_dataset)
assert mapping == {
0: [(0, 0), (0, 6), (0, 9), (0, 15), (0, 18), (0, 24)],
Expand Down

0 comments on commit 9367171

Please sign in to comment.