From e8c3a0842b838f9173e7364167ea125b47dba0b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Wed, 14 Feb 2024 18:04:24 +0100 Subject: [PATCH] docs: add benchmarks --- .pre-commit-config.yaml | 2 +- README.md | 31 ++++- docs/benchmark.md | 159 ++++++++++++++++++++++++++ scripts/benchmark.py | 247 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 437 insertions(+), 2 deletions(-) create mode 100644 docs/benchmark.md create mode 100644 scripts/benchmark.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 45fbaae..95bc64c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,4 +31,4 @@ repos: hooks: - id: blacken-docs additional_dependencies: [black==20.8b1] - exclude: notebooks/ + exclude: ^(notebooks/|docs/benchmark) diff --git a/README.md b/README.md index 360aacf..73541c7 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,24 @@ pip install foldedtensor - C++ optimized code for fast data loading from Python lists and refolding - Flexibility in data representation, making it easy to switch between different layouts when needed -## Example +## Examples + +At its simplest, `foldedtensor` can be used to convert nested Python lists into a PyTorch tensor: + +```python +from foldedtensor import as_folded_tensor + +ft = as_folded_tensor( + [ + [0, 1, 2], + [3], + ], +) +# FoldedTensor([[0, 1, 2], +# [3, 0, 0]]) +``` + +You can also specify names and flattened/unflattened dimensions at the time of creation: ```python import torch @@ -54,7 +71,11 @@ ft = as_folded_tensor( print(ft) # FoldedTensor([[1, 2, 3], # [4, 3, 0]]) +``` + +Once created, you can change the shape of the tensor by refolding it: +```python # Refold on the lines and words dims (flatten the samples dim) print(ft.refold(("lines", "words"))) # FoldedTensor([[1, 0], @@ -67,7 +88,11 @@ print(ft.refold(("lines", "words"))) # Refold on the words dim only: flatten everything print(ft.refold(("words",))) # FoldedTensor([1, 2, 3, 4, 3]) +``` +The tensor can be further used with standard PyTorch operations: + +```python # Working with PyTorch operations embedder = torch.nn.Embedding(10, 16) embedding = embedder(ft.refold(("words",))) @@ -79,6 +104,10 @@ print(refolded_embedding.shape) # torch.Size([2, 5, 16]) # 2 samples, 5 words max, 16 dims ``` +## Benchmarks + +View the comparisons of `foldedtensor` against various alternatives here: [docs/benchmarks](https://github.com/aphp/foldedtensor/blob/main/docs/benchmark.md). + ## Comparison with alternatives Unlike other ragged or nested tensor implementations, a FoldedTensor does not enforce a specific structure on the nested data, and does not require padding all dimensions. This provides the user with greater flexibility when working with data that can be arranged in multiple ways depending on the data transformation. Moreover, the C++ optimization ensures high performance, making it ideal for handling deeply nested tensors efficiently. diff --git a/docs/benchmark.md b/docs/benchmark.md new file mode 100644 index 0000000..23e31ee --- /dev/null +++ b/docs/benchmark.md @@ -0,0 +1,159 @@ + +Benchmarks +---------- + +This file was generated from [`scripts/benchmark.py`](../scripts/benchmark.py). + +It compares the performance of `foldedtensor` with various alternatives for padding +and working with nested lists and tensors. + +Versions: +- `torch.__version__ == '2.0.1'` +- `foldedtensor.__version__ == '0.3.2'` + + +## Case 1 (pad variable lengths nested list) + +The following 3-levelled nested lists has lengths of 32, then between 50 and 100, and then between 25 and 30. +nested_list = make_nested_list(32, (50, 100), (25, 30), value=1) + +Comparisons: +%timeit python_padding(nested_list) +# 100 loops, best of 5: 13.32 ms per loop + + +%timeit foldedtensor.as_folded_tensor(nested_list) +# 100 loops, best of 5: 0.63 ms per loop + + + +## Case 2 (same lengths nested lists) + +```python +nested_list = make_nested_list(32, 100, 30, value=1) + +%timeit torch.tensor(nested_list) +# 100 loops, best of 5: 6.42 ms per loop + + +%timeit torch.LongTensor(nested_list) +# 100 loops, best of 5: 2.64 ms per loop + + +%timeit python_padding(nested_list) +# 100 loops, best of 5: 15.92 ms per loop + + +%timeit torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0) +# 100 loops, best of 5: 2.88 ms per loop + + +%timeit foldedtensor.as_folded_tensor(nested_list) +# 100 loops, best of 5: 0.93 ms per loop + + +``` + + +## Case 3 (simple list) + +```python +simple_list = make_nested_list(10000, value=1) + +%timeit torch.tensor(simple_list) +# 100 loops, best of 5: 0.63 ms per loop + + +%timeit torch.LongTensor(simple_list) +# 100 loops, best of 5: 0.26 ms per loop + + +%timeit python_padding(simple_list) +# 100 loops, best of 5: 0.27 ms per loop + + +%timeit foldedtensor.as_folded_tensor(simple_list) +# 100 loops, best of 5: 0.07 ms per loop + + +``` + + +## Case 4 (same lengths nested lists to flat tensor) + +```python +nested_list = make_nested_list(32, 100, 30, value=1) + +%timeit torch.tensor(nested_list).view(-1) +# 100 loops, best of 5: 6.42 ms per loop + + +%timeit torch.LongTensor(nested_list).view(-1) +# 100 loops, best of 5: 2.68 ms per loop + + +%timeit python_padding(nested_list).view(-1) +# 100 loops, best of 5: 15.92 ms per loop + + +%timeit foldedtensor.as_folded_tensor(nested_list).view(-1) +# 100 loops, best of 5: 0.96 ms per loop + + +%timeit foldedtensor.as_folded_tensor(nested_list, data_dims=(2,)) +# 100 loops, best of 5: 0.92 ms per loop + + +``` + +## Case 5 (variable lengths nested lists) to padded embeddings + +Nested lists with different lengths (second level lists have lengths between 50 and 150). We compare `foldedtensor` with `torch.nested`. +```python +nested_list = make_nested_list(32, (50, 150), 30, value=1) + +# Padding with 0 + +%timeit torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0) +# 100 loops, best of 5: 3.05 ms per loop + + +%timeit foldedtensor.as_folded_tensor(nested_list).as_tensor() +# 100 loops, best of 5: 0.95 ms per loop + + +# Padding with 1 + +%timeit torch.nested.nested_tensor([torch.FloatTensor(sub) for sub in nested_list]).to_padded_tensor(1) +# 100 loops, best of 5: 3.59 ms per loop + + +%timeit x = foldedtensor.as_folded_tensor(nested_list); x.masked_fill_(x.mask, 1) +# 100 loops, best of 5: 1.29 ms per loop + + +``` + + +## Case 6 (2d padding) + +```python +nested_list = make_nested_list(160, (50, 150), value=1) + +%timeit python_padding(nested_list) +# 100 loops, best of 5: 1.18 ms per loop + + +%timeit torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0) +# 100 loops, best of 5: 1.06 ms per loop + + +%timeit torch.nn.utils.rnn.pad_sequence([torch.LongTensor(sub) for sub in nested_list], batch_first=True, padding_value=0) +# 100 loops, best of 5: 0.76 ms per loop + + +%timeit foldedtensor.as_folded_tensor(nested_list) +# 100 loops, best of 5: 0.13 ms per loop + + +``` diff --git a/scripts/benchmark.py b/scripts/benchmark.py new file mode 100644 index 0000000..e661c6a --- /dev/null +++ b/scripts/benchmark.py @@ -0,0 +1,247 @@ +# ruff: noqa: F401, E501 +import contextlib +import random +import warnings +from timeit import Timer + +import torch +import torch.nested +import torch.nn.utils.rnn + +import foldedtensor # noqa: F401 + +warnings.filterwarnings("ignore") + +torch.set_default_device("cpu") + + +def pad_tensors(tensors): + """ + Takes a list of `N` M-dimensional tensors (M<4) and returns a padded tensor. + + The padded tensor is `M+1` dimensional with size `N, S1, S2, ..., SM` + where `Si` is the maximum value of dimension `i` amongst all tensors. + """ + rep = tensors[0] + padded_dim = [] + for dim in range(rep.dim()): + max_dim = max([tensor.size(dim) for tensor in tensors]) + padded_dim.append(max_dim) + padded_dim = [len(tensors)] + padded_dim + padded_tensor = torch.zeros(padded_dim) + padded_tensor = padded_tensor.type_as(rep) + for i, tensor in enumerate(tensors): + size = list(tensor.size()) + if len(size) == 1: + padded_tensor[i, : size[0]] = tensor + elif len(size) == 2: + padded_tensor[i, : size[0], : size[1]] = tensor + elif len(size) == 3: + padded_tensor[i, : size[0], : size[1], : size[2]] = tensor + else: + raise ValueError("Padding is supported for upto 3D tensors at max.") + return padded_tensor + + +def python_padding(ints): + """ + Converts a nested list of integers to a padded tensor. + """ + if isinstance(ints, torch.Tensor): + return ints + if isinstance(ints, list): + if isinstance(ints[0], int): + return torch.LongTensor(ints) + if isinstance(ints[0], torch.Tensor): + return pad_tensors(ints) + if isinstance(ints[0], list): + return python_padding([python_padding(inti) for inti in ints]) + + +def make_nested_list(arg, *rest, value): + size = random.randint(*arg) if isinstance(arg, tuple) else arg + if not rest: + return [value] * size + return [make_nested_list(*rest, value=value) for _ in range(size)] + + +def exec_and_print(code): + print(code) + print() + exec(code, globals(), globals()) + + +@contextlib.contextmanager +def block_code(): + print("```python") + yield + print("```") + print() + + +def timeit(stmt, number=100, repeat=5): + t = Timer(stmt, globals=globals()) + + if number == 0: + # determine number so that 0.2 <= total time < 2.0 + callback = None + + try: + number, _ = t.autorange(callback) + except: + t.print_exc() + return 1 + + try: + raw_timings = t.repeat(repeat, number) + except Exception: + t.print_exc() + return 1 + + def format_time(dt): + return f"{dt * 1000:.2f} ms" + + timings = [dt / number for dt in raw_timings] + + best = min(timings) + print("%timeit " + stmt) + print( + "# %d loop%s, best of %d: %s per loop" + % (number, "s" if number != 1 else "", repeat, format_time(best)) + ) + print("\n") + + +print( + f""" +Benchmarks +---------- + +This file was generated from [`scripts/benchmark.py`](../scripts/benchmark.py). + +It compares the performance of `foldedtensor` with various alternatives for padding +and working with nested lists and tensors. + +Versions: +- `torch.__version__ == {torch.__version__!r}` +- `foldedtensor.__version__ == {foldedtensor.__version__!r}` +""" +) + +if __name__ == "__main__": + # fmt: off + cases = [1, 2, 3, 4, 5, 6] + if 1 in cases: + print("\n## Case 1 (pad variable lengths nested list)\n") + + print("The following 3-levelled nested lists has lengths of 32, then " + "between 50 and 100, and then between 25 and 30.") + + exec_and_print("nested_list = make_nested_list(32, (50, 100), (25, 30), value=1)") + + print("Comparisons:") + timeit("python_padding(nested_list)") + timeit("foldedtensor.as_folded_tensor(nested_list)") + + if 2 in cases: + print("\n## Case 2 (same lengths nested lists)\n") + + with block_code(): + exec_and_print("nested_list = make_nested_list(32, 100, 30, value=1)") + timeit("torch.tensor(nested_list)") + timeit("torch.LongTensor(nested_list)") + timeit("python_padding(nested_list)") + timeit("torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0)") + timeit("foldedtensor.as_folded_tensor(nested_list)") + + if 3 in cases: + print("\n## Case 3 (simple list)\n") + + with block_code(): + exec_and_print("simple_list = make_nested_list(10000, value=1)") + timeit("torch.tensor(simple_list)") + timeit("torch.LongTensor(simple_list)") + timeit("python_padding(simple_list)") + timeit("foldedtensor.as_folded_tensor(simple_list)") + + if 4 in cases: + print("\n## Case 4 (same lengths nested lists to flat tensor)\n") + + with block_code(): + exec_and_print("nested_list = make_nested_list(32, 100, 30, value=1)") + timeit("torch.tensor(nested_list).view(-1)") + timeit("torch.LongTensor(nested_list).view(-1)") + timeit("python_padding(nested_list).view(-1)") + timeit("foldedtensor.as_folded_tensor(nested_list).view(-1)") + timeit("foldedtensor.as_folded_tensor(nested_list, data_dims=(2,))") + + if 5 in cases: + print("## Case 5 (variable lengths nested lists) to padded embeddings\n") + print("Nested lists with different lengths (second level lists have lengths " + "between 50 and 150). We compare `foldedtensor` with `torch.nested`.") + + with block_code(): + exec_and_print("nested_list = make_nested_list(32, (50, 150), 30, value=1)") + + print("# Padding with 0\n") + + timeit("torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0)") + timeit("foldedtensor.as_folded_tensor(nested_list).as_tensor()") + + print("# Padding with 1\n") + timeit("torch.nested.nested_tensor([torch.FloatTensor(sub) for sub in nested_list]).to_padded_tensor(1)") + timeit("x = foldedtensor.as_folded_tensor(nested_list); x.masked_fill_(x.mask, 1)") + + if 6 in cases: + print("\n## Case 6 (2d padding)\n") + + with block_code(): + exec_and_print("nested_list = make_nested_list(160, (50, 150), value=1)") + + timeit("python_padding(nested_list)") + timeit("torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0)") + timeit( + "torch.nn.utils.rnn.pad_sequence([torch.LongTensor(sub) for sub in nested_list], batch_first=True, padding_value=0)") + timeit("foldedtensor.as_folded_tensor(nested_list)") + + if 7 in cases: + + def sum_all_words_per_sample(ft): + lengths = ft.lengths + ids = torch.arange(lengths[0][0]) + for i in range(1, len(lengths)): + ids = torch.repeat_interleave( + ids, + lengths[i], + output_size=len(lengths[i + 1]) + if i < len(lengths) - 1 + else ft.size(len(ft.data_dims) - 1), + ) + + out = torch.zeros(lengths[0][0], ft.shape[-1]) + out.index_add_(source=ft.as_tensor(), dim=0, index=ids) + + return out + + + print("\n## Case 7 (flat sums)\n") + + with block_code(): + exec_and_print( + "embedder = torch.nn.Embedding(500, 128)\n" + "nested_list = make_nested_list(320, (150, 250), value=1)\n" + "ft = foldedtensor.as_folded_tensor(nested_list).refold(2)\n" + "nt = torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list])\n" + "ft = embedder(ft)\n" + "nt = embedder(nt)\n" + ) + + timeit("nt.sum(dim=1)") + timeit("sum_all_words_per_sample(ft)") + + # timeit("embedder(ft)") + # timeit("embedder(ft).refold(0, 1)") + # timeit("embedder(nt)") + # fmt: on + + print()