From 424b05b840a047a7801016cf91cb19a545ea5402 Mon Sep 17 00:00:00 2001 From: Edward Li <2023edwardll@gmail.com> Date: Wed, 11 Sep 2024 06:41:30 +0800 Subject: [PATCH] remove some warnings (#404) --- pyproject.toml | 4 ++++ src/datachain/lib/clip.py | 2 +- src/datachain/lib/image.py | 2 +- src/datachain/lib/text.py | 2 +- tests/func/test_pytorch.py | 13 ++++++++++--- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3f1d44aa0..222ddb0b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -141,6 +141,10 @@ filterwarnings = [ "error::pytest_mock.PytestMockWarning", "error::pytest.PytestCollectionWarning", "error::sqlalchemy.exc.SADeprecationWarning", + "ignore::DeprecationWarning:timm.*", + "ignore::DeprecationWarning:botocore.auth", + "ignore::DeprecationWarning:datasets.utils._dill", + "ignore::DeprecationWarning:librosa.core.intervals", "ignore:Field name .* shadows an attribute in parent:UserWarning" # datachain.lib.feature ] diff --git a/src/datachain/lib/clip.py b/src/datachain/lib/clip.py index 430cad6eb..9d5163739 100644 --- a/src/datachain/lib/clip.py +++ b/src/datachain/lib/clip.py @@ -18,7 +18,7 @@ def _get_encoder(model: Any, type: Literal["image", "text"]) -> Callable: hasattr(model, method_name) and inspect.ismethod(getattr(model, method_name)) ): method = getattr(model, method_name) - return lambda x: method(torch.tensor(x)) + return lambda x: method(torch.as_tensor(x).clone().detach()) # Check for model from clip or open_clip library method_name = f"encode_{type}" diff --git a/src/datachain/lib/image.py b/src/datachain/lib/image.py index e69908f0d..317d26e17 100644 --- a/src/datachain/lib/image.py +++ b/src/datachain/lib/image.py @@ -34,7 +34,7 @@ def convert_image( from transformers.image_processing_utils import BaseImageProcessor if isinstance(transform, BaseImageProcessor): - img = torch.tensor(img.pixel_values[0]) # type: ignore[assignment,attr-defined] + img = torch.as_tensor(img.pixel_values[0]).clone().detach() # type: ignore[assignment,attr-defined] except ImportError: pass if device: diff --git a/src/datachain/lib/text.py b/src/datachain/lib/text.py index 6f079084d..a547f170a 100644 --- a/src/datachain/lib/text.py +++ b/src/datachain/lib/text.py @@ -33,7 +33,7 @@ def convert_text( res = tokenizer(text) tokens = res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res - tokens = torch.tensor(tokens) + tokens = torch.as_tensor(tokens).clone().detach() if device: tokens = tokens.to(device) diff --git a/tests/func/test_pytorch.py b/tests/func/test_pytorch.py index 29aec6688..68c6910c9 100644 --- a/tests/func/test_pytorch.py +++ b/tests/func/test_pytorch.py @@ -1,5 +1,6 @@ import open_clip import pytest +import torch from datasets import load_dataset from torch import Size, Tensor from torchvision.datasets import FakeData @@ -33,7 +34,9 @@ def fake_dataset(catalog, fake_image_dir): def test_pytorch_dataset(fake_dataset): - transform = v2.Compose([v2.ToTensor(), v2.Resize((64, 64))]) + transform = v2.Compose( + [v2.ToImage(), v2.ToDtype(torch.float32, scale=True), v2.Resize((64, 64))] + ) tokenizer = open_clip.get_tokenizer("ViT-B-32") pt_dataset = PytorchDataset( name=fake_dataset.name, @@ -49,7 +52,9 @@ def test_pytorch_dataset(fake_dataset): def test_pytorch_dataset_sample(fake_dataset): - transform = v2.Compose([v2.ToTensor(), v2.Resize((64, 64))]) + transform = v2.Compose( + [v2.ToImage(), v2.ToDtype(torch.float32, scale=True), v2.Resize((64, 64))] + ) pt_dataset = PytorchDataset( name=fake_dataset.name, version=fake_dataset.version, @@ -62,7 +67,9 @@ def test_pytorch_dataset_sample(fake_dataset): def test_to_pytorch(fake_dataset): from torch.utils.data import IterableDataset - transform = v2.Compose([v2.ToTensor(), v2.Resize((64, 64))]) + transform = v2.Compose( + [v2.ToImage(), v2.ToDtype(torch.float32, scale=True), v2.Resize((64, 64))] + ) tokenizer = open_clip.get_tokenizer("ViT-B-32") pt_dataset = fake_dataset.to_pytorch(transform=transform, tokenizer=tokenizer) assert isinstance(pt_dataset, IterableDataset)