From 0cb7fcca204987ab87f6cae21ef5304a8e2a1603 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 7 May 2024 08:57:05 -0700 Subject: [PATCH] Issue 957 (#963) * TST: regression test for issue #957 * BUG: update_ids data type width was sensitive to strict setting, issue #957 * DOC: mention of issue #957 --- ChangeLog.md | 1 + biom/table.py | 7 ++++++- biom/tests/test_table.py | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 6c8e1d4d..5c86c2b6 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -12,6 +12,7 @@ Bug Fixes: * Fixed an edge case on in `align_tree` when a feature was empty, see issue [#948](https://github.com/biocore/biom-format/issues/948) * In `subsample(..., with_replacement=True)`, it was possible to trigger a numerical stability on sum, see issue [#952](https://github.com/biocore/biom-format/issues/952) +* `update_ids(..., strict=False)` could yield truncated IDs, see issue [#957](https://github.com/biocore/biom-format/issues/957) Performance improvements: diff --git a/biom/table.py b/biom/table.py index 0b6b32a0..7e4d2d00 100644 --- a/biom/table.py +++ b/biom/table.py @@ -1422,7 +1422,12 @@ def update_ids(self, id_map, axis='sample', strict=True, inplace=True): >>> print(updated_table.ids(axis='sample')) ['s1.1' 's2.2' 's3.3'] """ - str_dtype = 'U%d' % max([len(v) for v in id_map.values()]) + max_str_len = max([len(v) for v in id_map.values()]) + if not strict: + ids = self.ids(axis=axis) + max_str_len = max(max_str_len, max([len(i) for i in ids])) + + str_dtype = 'U%d' % max_str_len updated_ids = zeros(self.ids(axis=axis).size, dtype=str_dtype) for idx, old_id in enumerate(self.ids(axis=axis)): if strict and old_id not in id_map: diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py index 5998638f..52c53fc0 100644 --- a/biom/tests/test_table.py +++ b/biom/tests/test_table.py @@ -2412,6 +2412,16 @@ def test_transpose(self): self.st_rich.data('2', 'observation')) self.assertEqual(obs.transpose(), self.st_rich) + def test_update_ids_strict_dtype_bug_issue_957(self): + t = Table(np.arange(6).reshape(2, 3), + ['O1', 'O2'], + ['ab', 'cdef', 'ghijkl']) + exp = Table(np.arange(6).reshape(2, 3), + ['O1', 'O2'], + ['AB', 'cdef', 'ghijkl']) + obs = t.update_ids({'ab': 'AB'}, strict=False, inplace=False) + self.assertEqual(obs, exp) + def test_update_ids_inplace_bug_892(self): t = example_table.copy() exp = t.ids().copy()