From 4e790f654c0a07fea6dcf3d8d74c7bc9ce46ff02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Sun, 1 Sep 2024 23:15:43 +0200 Subject: [PATCH] feat: hashable lengths --- changelog.md | 24 +++++++++++++++--------- foldedtensor/__init__.py | 17 ++++++++++++++--- tests/test_folded_tensor.py | 14 ++++++++++++++ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/changelog.md b/changelog.md index 1794637..4c74ffa 100644 --- a/changelog.md +++ b/changelog.md @@ -1,42 +1,48 @@ -# v0.3.4 +# Changelog + +## Unreleased + +- Support hashing the `folded_tensor.length` field (via a UserList), which is convenient for caching + +## v0.3.4 - Fix a data_dims access issue - Marginally improve the speed of handling FoldedTensors in standard torch operations - Use default torch types (e.g. `torch.float32` or `torch.torch64`) -# v0.3.3 +## v0.3.3 - Handle empty inputs (e.g. `as_folded_tensor([[[], []], [[]]])`) by returning an empty tensor - Correctly bubble errors when converting inputs with varying deepness (e.g. `as_folded_tensor([1, [2, 3]])`) -# v0.3.2 +## v0.3.2 - Allow to use `as_folded_tensor` with no args, as a simple padding function -# v0.3.1 +## v0.3.1 - Enable sharing FoldedTensor instances in a multiprocessing + cuda context by autocloning the indexer before fork-pickling an instance - Distribute arm64 wheels for macOS -# v0.3.0 +## v0.3.0 - Allow dims after last foldable dim during list conversion (e.g. embeddings) -# v0.2.2 +## v0.2.2 - Github release :octocat: - Fix backpropagation when refolding -# v0.2.1 +## v0.2.1 - Improve performance by computing the new "padded to flattened" indexer only (and not the previous one) when refolding -# v0.2.0 +## v0.2.0 - Remove C++ torch dependency in favor of Numpy due to lack of torch ABI backward/forward compatibility, making the pre-built wheels unusable in most cases - Require dtype to be specified when creating a FoldedTensor from a nested list -# v0.1.0 +## v0.1.0 Inception ! :tada: diff --git a/foldedtensor/__init__.py b/foldedtensor/__init__.py index 135920a..e3eb63f 100644 --- a/foldedtensor/__init__.py +++ b/foldedtensor/__init__.py @@ -1,3 +1,5 @@ +import typing +from collections import UserList from multiprocessing.reduction import ForkingPickler from typing import List, Optional, Sequence, Tuple, Union @@ -46,6 +48,15 @@ __version__ = "0.3.4" +class FoldedTensorLengths(UserList): + def __hash__(self): + return id(self) + + +if typing.TYPE_CHECKING: + FoldedTensorLengths = List[List[int]] # noqa: F811 + + # noinspection PyMethodOverriding class Refold(Function): @staticmethod @@ -179,7 +190,7 @@ def as_folded_tensor( ) result = FoldedTensor( data=data, - lengths=lengths, + lengths=FoldedTensorLengths(lengths), data_dims=data_dims, full_names=full_names, indexer=torch.from_numpy(np_indexer).to(data.device), @@ -207,7 +218,7 @@ def as_folded_tensor( lengths = (list(lengths) + [[0]] * deepness)[:deepness] result = FoldedTensor( data=padded, - lengths=lengths, + lengths=FoldedTensorLengths(lengths), data_dims=data_dims, full_names=full_names, indexer=indexer, @@ -269,7 +280,7 @@ class FoldedTensor(torch.Tensor): def __new__( cls, data: torch.Tensor, - lengths: List[List[int]], + lengths: FoldedTensorLengths, data_dims: Sequence[int], full_names: Sequence[str], indexer: torch.Tensor, diff --git a/tests/test_folded_tensor.py b/tests/test_folded_tensor.py index 0ce77ad..71614b4 100644 --- a/tests/test_folded_tensor.py +++ b/tests/test_folded_tensor.py @@ -417,3 +417,17 @@ def test_max(): values, indices = ft.max(-1) assert (values == torch.tensor([2, 4])).all() assert (indices == torch.tensor([2, 1])).all() + + +def test_hashable_lengths(): + tensor = as_folded_tensor( + [ + [0, 1, 2], + [3, 4], + ], + dtype=torch.float, + ) + embedding = torch.nn.Embedding(10, 16) + assert tensor.lengths is embedding(tensor).lengths + assert hash(tensor.lengths) is not None + assert hash(tensor.lengths) == hash(embedding(tensor).lengths)