From eda441b098ff211e565afff7c3ef91980bc99890 Mon Sep 17 00:00:00 2001 From: Matt Seddon Date: Mon, 9 Sep 2024 11:29:24 +1000 Subject: [PATCH] avoid mutate by using expressions in merges --- computer_vision/fashion_product_images/1-quick-start.ipynb | 2 +- .../fashion_product_images/scripts/1-quick-start.py | 4 +--- formats/json-metadata-tutorial.ipynb | 6 +++--- multimodal/clip_fine_tuning.ipynb | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/computer_vision/fashion_product_images/1-quick-start.ipynb b/computer_vision/fashion_product_images/1-quick-start.ipynb index 748b3a0..77e06f7 100644 --- a/computer_vision/fashion_product_images/1-quick-start.ipynb +++ b/computer_vision/fashion_product_images/1-quick-start.ipynb @@ -863,7 +863,7 @@ } ], "source": [ - "dc_annotated = dc.mutate(filename=path.name(C(\"file.path\"))).merge(dc_meta, on=\"filename\")\n", + "dc_annotated = dc.merge(dc_meta, on=path.name(dc.c(\"file.path\")), right_on=\"filename\")\n", "\n", "dc_annotated.show(3)" ] diff --git a/computer_vision/fashion_product_images/scripts/1-quick-start.py b/computer_vision/fashion_product_images/scripts/1-quick-start.py index 31687ee..9190e9e 100644 --- a/computer_vision/fashion_product_images/scripts/1-quick-start.py +++ b/computer_vision/fashion_product_images/scripts/1-quick-start.py @@ -27,9 +27,7 @@ dc_meta.show(3) print("\n# Merge the original image and metadata datachains") -dc_annotated = dc.mutate(filename=path.name(C("file.path"))).merge( - dc_meta, on="filename" -) +dc_annotated = dc.merge(dc_meta, on=path.name(dc.c("file.path")), right_on="filename") print("\n# Save dataset") dc_annotated.save("fashion-product-images") diff --git a/formats/json-metadata-tutorial.ipynb b/formats/json-metadata-tutorial.ipynb index 5b7902f..3e4db32 100644 --- a/formats/json-metadata-tutorial.ipynb +++ b/formats/json-metadata-tutorial.ipynb @@ -1783,7 +1783,7 @@ } ], "source": [ - "images_meta = images.mutate(file_name=path.name(Column(\"file.path\"))).merge(meta, on=\"file_name\", right_on=\"image.file_name\", inner=True)\n", + "images_meta = images.merge(meta, on=path.name(images.c(\"file.path\")), right_on=\"image.file_name\", inner=True)\n", "images_meta.print_schema()" ] }, @@ -2176,10 +2176,10 @@ "cats = instances.filter(Column(\"instance.category_id\") == coco_dict[\"cat\"])\n", "\n", "# drop all columns in \"cats\" except id and the filename we will be merging on, rename columns to avoid collision at merge\n", - "cat_ids = cats.mutate(cat_id=Column(\"instance.id\")).mutate(cat_fpath=Column(\"file.path\")).select(\"cat_id\", \"cat_fpath\")\n", + "cat_ids = cats.mutate(cat_id=Column(\"instance.id\")).select(\"cat_id\", \"file.path\")\n", "\n", "# inner = True, drop all records without a merging match:\n", - "cats_and_dogs = dogs.merge(cat_ids, on=\"file.path\", right_on=\"cat_fpath\", inner=True)" + "cats_and_dogs = dogs.merge(cat_ids, on=\"file.path\", inner=True)" ] }, { diff --git a/multimodal/clip_fine_tuning.ipynb b/multimodal/clip_fine_tuning.ipynb index 7d241d7..58c8604 100644 --- a/multimodal/clip_fine_tuning.ipynb +++ b/multimodal/clip_fine_tuning.ipynb @@ -786,7 +786,7 @@ "metadata": {}, "outputs": [], "source": [ - "dc = img_dc.mutate(filename=path.name(C(\"file.path\"))).merge(meta_dc, on=\"filename\")" + "dc = img_dc.merge(meta_dc, on=path.name(img_dc.c(\"file.path\")), right_on=\"filename\")" ] }, {