From 6f14cc4ae30763ae5b083dc870894358e6342710 Mon Sep 17 00:00:00 2001 From: Einar Date: Mon, 18 Nov 2024 07:34:00 +0100 Subject: [PATCH] Actually fix the file update issue (#393) * Actually fix the file update issue * Bump version --------- Co-authored-by: cognite-bulldozer[bot] <51074376+cognite-bulldozer[bot]@users.noreply.github.com> --- CHANGELOG.md | 6 +++++ cognite/extractorutils/__init__.py | 2 +- cognite/extractorutils/uploader/files.py | 24 +++++++++++++------ pyproject.toml | 2 +- .../test_file_integration.py | 24 +++++++++++++++++++ 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54e4670c..a5eff816 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,12 @@ Changes are grouped as follows - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## 7.5.4 + +### Fixed + + * Fix issue caused by attempting to update file mimeType on AWS clusters. + ## 7.5.3 ### Fixed diff --git a/cognite/extractorutils/__init__.py b/cognite/extractorutils/__init__.py index cd17fb0a..9048cf0f 100644 --- a/cognite/extractorutils/__init__.py +++ b/cognite/extractorutils/__init__.py @@ -16,5 +16,5 @@ Cognite extractor utils is a Python package that simplifies the development of new extractors. """ -__version__ = "7.5.3" +__version__ = "7.5.4" from .base import Extractor diff --git a/cognite/extractorutils/uploader/files.py b/cognite/extractorutils/uploader/files.py index 7fb25d3a..bfdda308 100644 --- a/cognite/extractorutils/uploader/files.py +++ b/cognite/extractorutils/uploader/files.py @@ -25,7 +25,7 @@ from requests.utils import super_len from cognite.client import CogniteClient -from cognite.client.data_classes import FileMetadata +from cognite.client.data_classes import FileMetadata, FileMetadataUpdate from cognite.client.data_classes.data_modeling import NodeId from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorFileApply from cognite.client.utils._identifier import IdentifierSequence @@ -261,13 +261,23 @@ def _upload_empty( file_meta_response, url = self.cdf_client.files.create( file_metadata=file_meta, overwrite=self.overwrite_existing ) - # trigger update after creation (upsert =P) - basic_attributes = set(["externalId", "name"]) - attr = set(file_meta.dump().keys()) - diff = attr - basic_attributes - if len(diff) >= 1 and "externalId" in attr: - file_meta_response = self.cdf_client.files.update(file_meta) + # The files API for whatever reason doesn't update directory or source when you overwrite, + # so we need to update those later. + any_unchaged = ( + file_meta_response.directory != file_meta.directory or file_meta_response.source != file_meta.source + ) + if any_unchaged: + update = FileMetadataUpdate(external_id=file_meta.external_id) + any = False + if file_meta.source: + any = True + update.source.set(file_meta.source) + if file_meta.directory: + any = True + update.directory.set(file_meta.directory) + if any: + self.cdf_client.files.update(update) return file_meta_response, url diff --git a/pyproject.toml b/pyproject.toml index a2de2140..c6cf6f4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cognite-extractor-utils" -version = "7.5.3" +version = "7.5.4" description = "Utilities for easier development of extractors for CDF" authors = ["Mathias Lohne "] license = "Apache-2.0" diff --git a/tests/tests_integration/test_file_integration.py b/tests/tests_integration/test_file_integration.py index cce82065..92fc68d1 100644 --- a/tests/tests_integration/test_file_integration.py +++ b/tests/tests_integration/test_file_integration.py @@ -277,3 +277,27 @@ def read_file() -> BufferedReadWithLength: assert len(bigfile) == 10_000_000 assert len(bigfile2) == 10_000_000 + + +def test_update_files(set_upload_test: Tuple[CogniteClient, ParamTest]) -> None: + client, test_parameter = set_upload_test + queue = BytesUploadQueue(cdf_client=client, overwrite_existing=True, max_queue_size=1) + + queue.add_to_upload_queue( + content=b"bytes content", + file_meta=FileMetadata(external_id=test_parameter.external_ids[0], name=test_parameter.external_ids[0]), + ) + queue.add_to_upload_queue( + content=b"bytes content", + file_meta=FileMetadata( + external_id=test_parameter.external_ids[0], + name=test_parameter.external_ids[0], + source="some-source", + directory="/some/directory", + ), + ) + + queue.upload() + file = client.files.retrieve(external_id=test_parameter.external_ids[0]) + assert file.source == "some-source" + assert file.directory == "/some/directory"