diff --git a/src/datachain/catalog/catalog.py b/src/datachain/catalog/catalog.py index fa19c13b7..00f0fc790 100644 --- a/src/datachain/catalog/catalog.py +++ b/src/datachain/catalog/catalog.py @@ -546,8 +546,6 @@ def find_column_to_str( # noqa: PLR0911 ) if column == "name": return posixpath.basename(row[field_lookup["path"]]) or "" - if column == "owner": - return row[field_lookup["owner_name"]] or "" if column == "path": is_dir = row[field_lookup["dir_type"]] == DirType.DIR path = row[field_lookup["path"]] @@ -737,8 +735,6 @@ def enlist_source( Column("is_latest", Boolean), Column("last_modified", DateTime(timezone=True)), Column("size", Int64), - Column("owner_name", String), - Column("owner_id", String), Column("location", JSON), Column("source", String), ] @@ -2323,8 +2319,6 @@ def find( field_set.add("path") elif column == "name": field_set.add("path") - elif column == "owner": - field_set.add("owner_name") elif column == "path": field_set.add("dir_type") field_set.add("path") diff --git a/src/datachain/cli.py b/src/datachain/cli.py index 2fe7766ed..4034f08e7 100644 --- a/src/datachain/cli.py +++ b/src/datachain/cli.py @@ -23,7 +23,7 @@ TTL_HUMAN = "4h" TTL_INT = 4 * 60 * 60 -FIND_COLUMNS = ["du", "name", "owner", "path", "size", "type"] +FIND_COLUMNS = ["du", "name", "path", "size", "type"] def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]: @@ -612,9 +612,8 @@ def _node_data_to_ls_values(row, long_format=False): value = name + ending if long_format: last_modified = row[2] - owner_name = row[3] timestamp = last_modified if not is_dir else None - return long_line_str(value, timestamp, owner_name) + return long_line_str(value, timestamp) return value @@ -632,7 +631,7 @@ def _ls_urls_flat( if client_cls.is_root_url(source): buckets = client_cls.ls_buckets(**catalog.client_config) if long: - values = (long_line_str(b.name, b.created, "") for b in buckets) + values = (long_line_str(b.name, b.created) for b in buckets) else: values = (b.name for b in buckets) yield source, values @@ -640,7 +639,7 @@ def _ls_urls_flat( found = False fields = ["name", "dir_type"] if long: - fields.extend(["last_modified", "owner_name"]) + fields.append("last_modified") for data_source, results in catalog.ls([source], fields=fields, **kwargs): values = (_node_data_to_ls_values(r, long) for r in results) found = True @@ -656,7 +655,7 @@ def ls_indexed_storages(catalog: "Catalog", long: bool = False) -> Iterator[str] if long: for uri in storage_uris: # TODO: add Storage.created so it can be used here - yield long_line_str(uri, None, "") + yield long_line_str(uri, None) else: yield from storage_uris @@ -727,7 +726,6 @@ def ls_remote( entry = long_line_str( row["name"] + ("/" if row["dir_type"] else ""), row["last_modified"], - row["owner_name"], ) print(format_ls_entry(entry)) else: diff --git a/src/datachain/client/s3.py b/src/datachain/client/s3.py index dba90c7e5..e859c5431 100644 --- a/src/datachain/client/s3.py +++ b/src/datachain/client/s3.py @@ -119,8 +119,6 @@ def _entry_from_boto(self, v, bucket, versions=False): is_latest=v.get("IsLatest", True), last_modified=v.get("LastModified", ""), size=v["Size"], - owner_name=v.get("Owner", {}).get("DisplayName", ""), - owner_id=v.get("Owner", {}).get("ID", ""), ) async def _fetch_dir( @@ -165,8 +163,6 @@ def convert_info(self, v: dict[str, Any], path: str) -> Entry: is_latest=v.get("IsLatest", True), last_modified=v.get("LastModified", ""), size=v["size"], - owner_name=v.get("Owner", {}).get("DisplayName", ""), - owner_id=v.get("Owner", {}).get("ID", ""), ) def info_to_file(self, v: dict[str, Any], path: str) -> File: diff --git a/src/datachain/data_storage/warehouse.py b/src/datachain/data_storage/warehouse.py index 83b283913..db67e9c2a 100644 --- a/src/datachain/data_storage/warehouse.py +++ b/src/datachain/data_storage/warehouse.py @@ -638,8 +638,6 @@ def with_default(column): with_default(dr.c.is_latest), dr.c.last_modified, with_default(dr.c.size), - with_default(dr.c.owner_name), - with_default(dr.c.owner_id), with_default(dr.c.sys__rand), dr.c.location, de.c.source, diff --git a/src/datachain/node.py b/src/datachain/node.py index 8f1c67ea4..c42094865 100644 --- a/src/datachain/node.py +++ b/src/datachain/node.py @@ -54,8 +54,6 @@ class Node: is_latest: bool = True last_modified: Optional[datetime] = None size: int = 0 - owner_name: str = "" - owner_id: str = "" location: Optional[str] = None source: StorageURI = StorageURI("") dir_type: int = DirType.FILE @@ -148,8 +146,6 @@ class Entry: is_latest: bool = True last_modified: Optional[datetime] = None size: int = 0 - owner_name: str = "" - owner_id: str = "" location: Optional[str] = None @classmethod @@ -198,9 +194,9 @@ def full_path(self) -> str: TIME_FMT = "%Y-%m-%d %H:%M" -def long_line_str(name: str, timestamp: Optional[datetime], owner: str) -> str: +def long_line_str(name: str, timestamp: Optional[datetime]) -> str: if timestamp is None: time = "-" else: time = timestamp.strftime(TIME_FMT) - return f"{owner: <19} {time: <19} {name}" + return f"{time: <19} {name}" diff --git a/src/datachain/query/builtins.py b/src/datachain/query/builtins.py index 9b23e8c0a..fb9f5ffb2 100644 --- a/src/datachain/query/builtins.py +++ b/src/datachain/query/builtins.py @@ -22,8 +22,6 @@ def load_tar(raw): C.source, C.path, C.size, - C.owner_name, - C.owner_id, C.is_latest, C.last_modified, C.version, @@ -36,8 +34,6 @@ def index_tar( source, parent_path, size, - owner_name, - owner_id, is_latest, last_modified, version, @@ -49,8 +45,6 @@ def index_tar( source=source, path=parent_path, size=size, - owner_name=owner_name, - owner_id=owner_id, is_latest=bool(is_latest), last_modified=last_modified, version=version, diff --git a/src/datachain/query/schema.py b/src/datachain/query/schema.py index 270e71c54..64399ce63 100644 --- a/src/datachain/query/schema.py +++ b/src/datachain/query/schema.py @@ -222,8 +222,6 @@ class DatasetRow: "path": String, "size": Int64, "location": JSON, - "owner_name": String, - "owner_id": String, "is_latest": Boolean, "last_modified": DateTime, "version": String, @@ -236,8 +234,6 @@ def create( source: str = "", size: int = 0, location: Optional[dict[str, Any]] = None, - owner_name: str = "", - owner_id: str = "", is_latest: bool = True, last_modified: Optional[datetime] = None, version: str = "", @@ -248,8 +244,6 @@ def create( int, Optional[str], int, - str, - str, bool, datetime, str, @@ -266,8 +260,6 @@ def create( path, size, location, - owner_name, - owner_id, is_latest, last_modified, version, diff --git a/src/datachain/utils.py b/src/datachain/utils.py index b5d331f66..b8efec683 100644 --- a/src/datachain/utils.py +++ b/src/datachain/utils.py @@ -340,8 +340,6 @@ def show_df( "etag", "is_latest", "last_modified", - "owner_id", - "owner_name", "size", "version", ], diff --git a/tests/conftest.py b/tests/conftest.py index e63960cce..459ad9fae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -579,8 +579,6 @@ def dataset_rows(): "is_latest": True, "name": f"dql_1m_meta_text_emd.parquet_3_{i}_0.snappy.parquet", "etag": f"72b35c8e9b8eed1636c91eb94241c2f8-{i}", - "owner_id": "owner", - "owner_name": "aws-iterative-sandbox", "last_modified": "2024-02-23T10:42:31.842944+00:00", "size": 49807360, "sys__rand": 12123123123, diff --git a/tests/data.py b/tests/data.py index ec8f21b13..30d58e13f 100644 --- a/tests/data.py +++ b/tests/data.py @@ -13,8 +13,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=13, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="cats/cat1", @@ -23,8 +21,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="cats/cat2", @@ -33,8 +29,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="dogs/dog1", @@ -43,8 +37,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="dogs/dog2", @@ -53,8 +45,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=3, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="dogs/dog3", @@ -63,8 +53,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="dogs/others/dog4", @@ -73,8 +61,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), ] @@ -90,8 +76,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="dogs/others", @@ -100,8 +84,6 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), Entry.from_file( path="dogs/", @@ -110,7 +92,5 @@ is_latest=True, last_modified=datetime(2023, 2, 27, 18, 28, 54, tzinfo=utc), size=4, - owner_name="webfile", - owner_id="75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a", ), ] diff --git a/tests/func/test_catalog.py b/tests/func/test_catalog.py index 4581add72..5629f090c 100644 --- a/tests/func/test_catalog.py +++ b/tests/func/test_catalog.py @@ -146,8 +146,6 @@ def test_find_names_columns(cloud_test_catalog, cloud_type): src_uri = cloud_test_catalog.src_uri catalog = cloud_test_catalog.catalog - owner = "webfile" if cloud_type == "s3" else "" - src_uri_path = src_uri if cloud_type == "file": src_uri_path = LocalFileSystem._strip_protocol(src_uri) @@ -156,14 +154,14 @@ def test_find_names_columns(cloud_test_catalog, cloud_type): catalog.find( [src_uri], names=["*cat*"], - columns=["du", "name", "owner", "path", "size", "type"], + columns=["du", "name", "path", "size", "type"], ) ) == { "\t".join(columns) for columns in [ - ["8", "cats", "", f"{src_uri_path}/cats/", "0", "d"], - ["4", "cat1", owner, f"{src_uri_path}/cats/cat1", "4", "f"], - ["4", "cat2", owner, f"{src_uri_path}/cats/cat2", "4", "f"], + ["8", "cats", f"{src_uri_path}/cats/", "0", "d"], + ["4", "cat1", f"{src_uri_path}/cats/cat1", "4", "f"], + ["4", "cat2", f"{src_uri_path}/cats/cat2", "4", "f"], ] } diff --git a/tests/func/test_dataset_query.py b/tests/func/test_dataset_query.py index 8e520eee0..ec67c1ace 100644 --- a/tests/func/test_dataset_query.py +++ b/tests/func/test_dataset_query.py @@ -506,7 +506,7 @@ def test_mutate(cloud_test_catalog, save): else: result = q.db_results(row_factory=lambda c, v: dict(zip(c, v))) assert len(result) == 4 - assert len(result[0]) == 17 + assert len(result[0]) == 15 cols = {"size10x", "size1000x", "s2", "s3", "s4"} new_data = [[v for k, v in r.items() if k in cols] for r in result] assert new_data == [ diff --git a/tests/func/test_ls.py b/tests/func/test_ls.py index 175c39404..cef6977cd 100644 --- a/tests/func/test_ls.py +++ b/tests/func/test_ls.py @@ -275,7 +275,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): assert captured.out == ls_remote_sources_output.format(src=src) -owner_id = "a13a3ff923430363b098ce9c769e450724e74e646332b08ca6b3ac4f96dae083" REMOTE_DATA: dict[str, list[dict[str, Any]]] = { "": [ { @@ -287,8 +286,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2023, 1, 17, 21, 39, 0, 88564), "size": 0, - "owner_name": "", - "owner_id": "", "path_str": "{src}/cats", "path": [], }, @@ -301,8 +298,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2023, 1, 17, 21, 39, 0, 88567), "size": 0, - "owner_name": "", - "owner_id": "", "path_str": "{src}/dogs", "path": [], }, @@ -315,8 +310,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2022, 2, 10, 3, 39, 9), "size": 350496, - "owner_name": "", - "owner_id": owner_id, "path_str": "{src}/description", "path": [], }, @@ -331,8 +324,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2022, 6, 28, 22, 39, 1), "size": 32975, - "owner_name": "", - "owner_id": owner_id, "path_str": "{src}/dogs/others/dog4", "path": [], }, @@ -347,8 +338,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2022, 6, 28, 22, 39, 1), "size": 101, - "owner_name": "", - "owner_id": owner_id, "path_str": "{src}/dogs/dog1", "path": [], }, @@ -361,8 +350,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2022, 6, 28, 22, 39, 1), "size": 29759, - "owner_name": "", - "owner_id": owner_id, "path_str": "{src}/dogs/dog2", "path": [], }, @@ -375,8 +362,6 @@ def test_ls_remote_sources(cloud_type, capsys, monkeypatch): "is_latest": True, "last_modified": datetime(2022, 6, 28, 22, 39, 1), "size": 102, - "owner_name": "", - "owner_id": owner_id, "path_str": "{src}/dogs/dog3", "path": [], }, diff --git a/tests/func/test_pull.py b/tests/func/test_pull.py index 0ca456ce8..7c78bd3f9 100644 --- a/tests/func/test_pull.py +++ b/tests/func/test_pull.py @@ -64,8 +64,6 @@ def schema(): "is_latest": {"type": "Boolean"}, "last_modified": {"type": "DateTime"}, "size": {"type": "Int64"}, - "owner_name": {"type": "String"}, - "owner_id": {"type": "String"}, "sys__rand": {"type": "Int64"}, "location": {"type": "String"}, "source": {"type": "String"}, diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 4ce6d9a95..4072d8846 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -28,8 +28,6 @@ def test_dataset_table_compilation(): Column("is_latest", Boolean), Column("last_modified", DateTime(timezone=True)), Column("size", Int64, nullable=False, index=True), - Column("owner_name", String), - Column("owner_id", String), Column("location", JSON), Column("source", String, nullable=False), Column("score", Float, nullable=False), @@ -50,8 +48,6 @@ def test_dataset_table_compilation(): "\tis_latest BOOLEAN, \n" "\tlast_modified DATETIME, \n" "\tsize INTEGER NOT NULL, \n" - "\towner_name VARCHAR, \n" - "\towner_id VARCHAR, \n" "\tlocation JSON, \n" "\tsource VARCHAR NOT NULL, \n" "\tscore FLOAT NOT NULL, \n" diff --git a/tests/unit/test_udf.py b/tests/unit/test_udf.py index aeaee7ee0..c26c67132 100644 --- a/tests/unit/test_udf.py +++ b/tests/unit/test_udf.py @@ -70,8 +70,6 @@ def sum(self, size): version="", is_latest=True, size=size, - owner_name="", - owner_id="", source="", random=1234, location=None,