Skip to content

Commit

Permalink
update doc
Browse files Browse the repository at this point in the history
  • Loading branch information
EdwardLi-coder committed Aug 19, 2024
1 parent 293995e commit 2f9ab5f
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
13 changes: 9 additions & 4 deletions src/datachain/lib/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,8 +839,9 @@ def select_except(self, *args: str) -> "Self":
def mutate(self, **kwargs) -> "Self":
"""Create new signals based on existing signals.
This method cannot modify existing columns. If you need to modify an existing column,
use a different name for the new column and then use `select()` to choose which columns to keep.
This method cannot modify existing columns. If you need to modify an
existing column, use a different name for the new column and then use
`select()` to choose which columns to keep.
This method is vectorized and more efficient compared to map(), and it does not
extract or download any data from the internal database. However, it can only
Expand All @@ -864,9 +865,13 @@ def mutate(self, **kwargs) -> "Self":
```
"""
existing_columns = set(self.signals_schema.values.keys())
for col_name in kwargs.keys():
for col_name in kwargs:
if col_name in existing_columns:
raise DataChainColumnError(col_name, "Cannot modify existing column with mutate(). Use a different name for the new column.")
raise DataChainColumnError(
col_name,
"Cannot modify existing column with mutate(). "
"Use a different name for the new column.",
)
for col_name, expr in kwargs.items():
if not isinstance(expr, Column) and isinstance(expr.type, NullType):
raise DataChainColumnError(
Expand Down
16 changes: 15 additions & 1 deletion tests/func/test_datachain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import pytest
import pytz
from PIL import Image
from sqlalchemy import Column

from datachain.data_storage.sqlite import SQLiteWarehouse
from datachain.dataset import DatasetStats
from datachain.lib.dc import DataChain
from datachain.lib.dc import DataChain, DataChainColumnError
from datachain.lib.file import File, ImageFile
from tests.utils import images_equal

Expand Down Expand Up @@ -314,3 +315,16 @@ def test_from_storage_check_rows(tmp_dir, test_session):
location=None,
vtype="",
)


def test_mutate_existing_column(catalog):
ds = DataChain.from_values(ids=[1, 2, 3])

with pytest.raises(DataChainColumnError) as excinfo:
ds.mutate(ids=Column("ids") + 1)

assert (
str(excinfo.value)
== "Error for column ids: Cannot modify existing column with mutate()."
" Use a different name for the new column."
)

0 comments on commit 2f9ab5f

Please sign in to comment.