From a00ce95eddced667000dcf70a1c2965966f527dc Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Fri, 18 Mar 2022 14:08:06 -0400 Subject: [PATCH 1/5] Additional changes --- tutorials/radiant-mlhub-publish-dataset.ipynb | 490 ++++++++++++++++++ 1 file changed, 490 insertions(+) create mode 100644 tutorials/radiant-mlhub-publish-dataset.ipynb diff --git a/tutorials/radiant-mlhub-publish-dataset.ipynb b/tutorials/radiant-mlhub-publish-dataset.ipynb new file mode 100644 index 00000000..c3661dd1 --- /dev/null +++ b/tutorials/radiant-mlhub-publish-dataset.ipynb @@ -0,0 +1,490 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bf73380d-383d-499e-baad-ead1f5948bf5", + "metadata": {}, + "source": [ + "## Publish an ML Training Dataset on Radiant MLHub\n", + "\n", + "Radiant MLHub Logo\n" + ] + }, + { + "cell_type": "markdown", + "id": "4c593de1-8451-4bfd-b004-a0fc6de1aa27", + "metadata": {}, + "source": [ + "In this tutorial, we will walk through the process of creating STAC Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for submitting this dataset to [Radiant MLHub](https://mlhub.earth/) for publication.\n", + "\n", + "For this example, we will use the sample training dataset from the [SpaceNet 7: Multi-Temporal Urban Development Challenge](https://spacenet.ai/sn7-challenge/)." + ] + }, + { + "cell_type": "markdown", + "id": "720968ee-1e83-4a34-b2b1-182def1c1586", + "metadata": {}, + "source": [ + "### Setup" + ] + }, + { + "cell_type": "markdown", + "id": "41bdab98-4567-4763-a735-6da5cf0cd68a", + "metadata": {}, + "source": [ + "Let's start by importing the libraries we will use through the rest of the tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ce566ab8-e215-4367-9fe6-4da05286d6b5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: rio-stac==0.3.2 in /srv/conda/envs/notebook/lib/python3.8/site-packages (0.3.2)\n", + "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", + "Requirement already satisfied: rasterio in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.10)\n", + "Requirement already satisfied: python-dateutil>=2.7.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (2.8.2)\n", + "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from python-dateutil>=2.7.0->pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (1.16.0)\n", + "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", + "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", + "Requirement already satisfied: setuptools in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (59.8.0)\n", + "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", + "Requirement already satisfied: affine in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2.3.0)\n", + "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", + "Requirement already satisfied: certifi in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2021.10.8)\n", + "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.20.0)\n", + "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (8.0.3)\n", + "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n" + ] + } + ], + "source": [ + "!pip install rio-stac==0.3.2" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "52420a33-ae4e-4b16-b893-f42dd67909fb", + "metadata": {}, + "outputs": [], + "source": [ + "import enum\n", + "import os\n", + "import pathlib\n", + "import re\n", + "import shutil\n", + "import tarfile\n", + "import tempfile\n", + "import urllib.parse\n", + "\n", + "import pystac\n", + "import rasterio\n", + "from pystac.utils import str_to_datetime\n", + "from pystac.extensions.eo import Band, EOExtension\n", + "from rio_stac.stac import create_stac_item" + ] + }, + { + "cell_type": "markdown", + "id": "7d7026bd-5035-49c8-807d-8c9063547191", + "metadata": {}, + "source": [ + "### Data Exploration" + ] + }, + { + "cell_type": "markdown", + "id": "2f3295f2-431b-448a-ae1b-ab293ea7aadd", + "metadata": {}, + "source": [ + "First, we will download the sample subset of training data provided by SpaceNet and extract the tar archive. This sample does not include the full set of labels for the dataset, but it will give us enough to work with for this example." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a6be708a-b187-48c9-8d5f-27a896ed10a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File /tmp/sample_data.tar.gz already exists, skipping download\n", + "Data already extracted from archive; skipping extract.\n" + ] + } + ], + "source": [ + "# Get the TMP directory for this system\n", + "tmp_dir = pathlib.Path(tempfile.gettempdir())\n", + "\n", + "tar_url = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + "tar_path = tmp_dir / \"sample_data.tar.gz\"\n", + "data_dir = tmp_dir / \"sample_data\"\n", + "\n", + "if tar_path.exists():\n", + " print(f\"File {tar_path} already exists, skipping download\")\n", + "else:\n", + " !curl {tar_url} -o {tar_path}\n", + " \n", + "if data_dir.exists():\n", + " print(f\"Data already extracted from archive; skipping extract.\")\n", + "else:\n", + " os.makedirs(data_dir, exist_ok=True)\n", + " !tar -xzf {tar_path} -C {tmp_dir} --transform s/SN6_buildings_AOI_11_Rotterdam_train_sample/{data_dir.name}/\n", + " print(f\"Extracted data to {data_dir}\")" + ] + }, + { + "cell_type": "markdown", + "id": "68d03841-d930-480f-b3f3-f4a03ad1af33", + "metadata": {}, + "source": [ + "Next, let's take a look at the directory structure within the sample data directory." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "273826a9-e03a-4124-8b9f-8ceeb61fde51", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/tmp/sample_data\n", + "/tmp/sample_data/AOI_11_Rotterdam\n", + "/tmp/sample_data/AOI_11_Rotterdam/SummaryData\n", + "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", + "/tmp/sample_data/AOI_11_Rotterdam/geojson_buildings\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", + "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", + "/tmp/sample_data/AOI_11_Rotterdam/RGBNIR\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/sample_data/AOI_11_Rotterdam/PS-RGBNIR\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/sample_data/AOI_11_Rotterdam/SAR-Intensity\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/sample_data/AOI_11_Rotterdam/PAN\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/sample_data/AOI_11_Rotterdam/PS-RGB\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n" + ] + } + ], + "source": [ + "for root, _, files in os.walk(data_dir):\n", + " print(root)\n", + " if files:\n", + " print(\"\\t\" + \"\\n\\t\".join(sorted(files)))" + ] + }, + { + "cell_type": "markdown", + "id": "8959e18f-199f-42c8-8309-36deb3791262", + "metadata": {}, + "source": [ + "We can see from the directory layout that our sample data has a single AOI directory (`AOI_11_Rotterdam`), which in turn has sub-directories containing GeoJSON labels and various types of source imagery. Based on the naming convention of the files, we can guess that each GeoJSON label can be matched to the corresponding source imagery based on the filename. Furthermore, the last part of the filename (before `tile_*`) looks like a timestamp range, probably representing the datetime of the imagery capture.\n", + "\n", + "For example, the `SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson` label could be applied to the pansharpened RGB imagery in `SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif` or the SAR intensity data in `SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif`.\n", + "\n", + "Based on this observation, we can come up with a regular expression to capture the relevant parts of the label filename and use them to find different source images for those labels." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a5e16d25-ddf3-45ae-bd49-91888ab5a89c", + "metadata": {}, + "outputs": [], + "source": [ + "aoi_name = \"AOI_11_Rotterdam\"\n", + "aoi_dir = data_dir / aoi_name\n", + "\n", + "labels_pattern = re.compile(\n", + " r\"^(?PSN6_Train_AOI_11_Rotterdam)\"\n", + " \"_Buildings_\"\n", + " \"(?P20190804120223)\"\n", + " \"_\"\n", + " \"(?P20190804120456)\"\n", + " \"_tile_\"\n", + " \"(?P55)\"\n", + " \"\\.geojson$\"\n", + ")\n", + "\n", + "class SourceType(str, enum.Enum):\n", + " \"\"\"Enumerates the possible source types.\n", + " \"\"\"\n", + " RGBNIR = \"RGBNIR\"\n", + " PS_RGBNIR = \"PS-RGBNIR\"\n", + " SAR_Intensity = \"SAR-Intensity\"\n", + " PAN = \"PAN\"\n", + " PS_RGB = \"PS-RGB\"\n", + "\n", + "def get_source_info(label_path):\n", + " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated with\n", + " the given label file path.\n", + " \"\"\"\n", + " label_path = os.fspath(label_path)\n", + " label_path = pathlib.Path(label_path)\n", + " match = labels_pattern.match(label_path.name)\n", + " if match is None:\n", + " raise ValueError(f\"Invalid filename {label_filename}\")\n", + " \n", + " prefix = match.group(\"prefix\")\n", + " start_datetime = match.group(\"start_datetime\")\n", + " end_datetime = match.group(\"end_datetime\")\n", + " tile = match.group(\"tile\")\n", + " \n", + " return [\n", + " {\n", + " # We use the path on S3 instead of the local path here\n", + " \"href\": f\"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/\" \\\n", + " f\"{aoi_name}/{source_type.value}/\" \\\n", + " f\"{prefix}_{source_type.value}_{start_datetime}_{end_datetime}_tile_{tile}.tif\",\n", + " \"type\": source_type.value,\n", + " \"start_datetime\": start_datetime,\n", + " \"end_datetime\": end_datetime,\n", + " }\n", + " for source_type in SourceType\n", + " ]\n", + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "c453e704-34cf-4d7d-8de1-044c3dbf83ea", + "metadata": {}, + "source": [ + "For example..." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "dcb37528-fc62-4eeb-904d-704ec85b9695", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " 'type': 'RGBNIR',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'},\n", + " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " 'type': 'PS-RGBNIR',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'},\n", + " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", + " 'type': 'SAR-Intensity',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'},\n", + " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", + " 'type': 'PAN',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'},\n", + " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", + " 'type': 'PS-RGB',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'}]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "label = \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", + "sources = get_source_info(label)\n", + "sources" + ] + }, + { + "cell_type": "markdown", + "id": "c7dda111-e0b3-42ff-bdbc-f9ad49f50e15", + "metadata": {}, + "source": [ + "### Catalog Source Imagery" + ] + }, + { + "cell_type": "markdown", + "id": "5a0c4fb6-7005-4189-afe8-75781461f7c9", + "metadata": {}, + "source": [ + "Since each of the sources for a given label cover the same spatial and temporal extents, we can combine them into a single STAC Item, with each source represented as a distinct Asset. We will create the helper functions " + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6782d5c1-100b-4f56-960a-844fcbdb0d09", + "metadata": {}, + "outputs": [], + "source": [ + "def create_source_item(label_path):\n", + " sources = get_source_info(label_path)\n", + " \n", + " first_source = sources[0]\n", + " \n", + " # Bootstrap the source item using rio-stac based on the first asset\n", + " with rasterio.open(sources[0][\"href\"]) as src:\n", + " item = create_stac_item(\n", + " source=src,\n", + " asset_name=first_source[\"type\"],\n", + " asset_roles=[\"data\"],\n", + " # Note that we use a single datetime here instead of the range from the filename\n", + " input_datetime=str_to_datetime(first_source[\"start_datetime\"]),\n", + " with_proj=True,\n", + " )\n", + " \n", + " # rio-stac does not add the Asset \"type\" or \"title\" fields, so we add them manually\n", + " # (all assets are Cloud-Optimized GeoTIFFs)\n", + " item.assets[first_source[\"type\"]].type = pystac.MediaType.COG\n", + " item.assets[first_source[\"type\"]].title = first_source[\"type\"]\n", + " \n", + " # Since the spatiotemporal metadata is the same for all assets, we do not need to read \n", + " # each one.\n", + " for source in sources[1:]:\n", + " asset = pystac.Asset.from_dict({\n", + " \"href\": source[\"href\"],\n", + " \"roles\": [\"data\"],\n", + " \"type\": str(pystac.MediaType.COG),\n", + " \"title\": source[\"type\"]\n", + " })\n", + " item.add_asset(source[\"type\"], asset)\n", + " \n", + " return item" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e769016b-cf1c-4dca-ab42-2e3588f8e668", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "66a48b4f-9772-46b9-a96f-ec7591c6b3c8", + "metadata": {}, + "source": [ + "### Catalog Labels" + ] + }, + { + "cell_type": "markdown", + "id": "b02acefe-ee14-4dfb-a826-b2af28a46594", + "metadata": {}, + "source": [ + "### Submit to Radiant MLHub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", + "metadata": {}, + "outputs": [], + "source": [ + "tar_path.unlink(missing_ok=True)\n", + "shutil.rmtree(data_dir, ignore_errors=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff611fad-f7a5-45c3-8108-3035e028777f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From fa705fbe7ee6b50d271c6fcb5a1d397bbe1379b0 Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Fri, 18 Mar 2022 14:12:46 -0400 Subject: [PATCH 2/5] Add tutorial to main README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4a4d64d0..a9ac4e76 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ These tutorials introduce a large topic and cover it in detail. * [Local tools](tutorials/local-tools.ipynb) * [Label-Maker](tutorials/label-maker-dask.ipynb) * [LandCoverNet Dataset on Radiant MLHub](tutorials/radiant-mlhub-landcovernet.ipynb) +* [Publish an ML Training Dataset on Radiant MLHub](tutorials/radiant-mlhub-publish-dataset.ipynb) ## Learn More From 4ca3ede15e878a7ca85489ce5ea7addb8e6e0fe0 Mon Sep 17 00:00:00 2001 From: Kendall Smith Date: Sat, 26 Mar 2022 10:01:18 -0700 Subject: [PATCH 3/5] notebook ready for review --- tutorials/radiant-mlhub-publish-dataset.ipynb | 1076 +++++++++++++++-- 1 file changed, 958 insertions(+), 118 deletions(-) diff --git a/tutorials/radiant-mlhub-publish-dataset.ipynb b/tutorials/radiant-mlhub-publish-dataset.ipynb index c3661dd1..abc32831 100644 --- a/tutorials/radiant-mlhub-publish-dataset.ipynb +++ b/tutorials/radiant-mlhub-publish-dataset.ipynb @@ -15,9 +15,9 @@ "id": "4c593de1-8451-4bfd-b004-a0fc6de1aa27", "metadata": {}, "source": [ - "In this tutorial, we will walk through the process of creating STAC Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for submitting this dataset to [Radiant MLHub](https://mlhub.earth/) for publication.\n", + "In this tutorial, we will walk through the process of creating a self-contained STAC Catalog, and its children Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for getting the dataset read for submission to [Radiant MLHub](https://mlhub.earth/) for manual review and publication.\n", "\n", - "For this example, we will use the sample training dataset from the [SpaceNet 7: Multi-Temporal Urban Development Challenge](https://spacenet.ai/sn7-challenge/)." + "For this example, we will use the sample training dataset from the [SpaceNet 6: Multi-Sensor All-Weather Mapping](https://spacenet.ai/sn6-challenge/)." ] }, { @@ -46,21 +46,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: rio-stac==0.3.2 in /srv/conda/envs/notebook/lib/python3.8/site-packages (0.3.2)\n", - "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", + "Collecting rio-stac==0.3.2\n", + " Downloading rio-stac-0.3.2.tar.gz (8.1 kB)\n", "Requirement already satisfied: rasterio in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.10)\n", + "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", "Requirement already satisfied: python-dateutil>=2.7.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from python-dateutil>=2.7.0->pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (1.16.0)\n", - "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", - "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", "Requirement already satisfied: setuptools in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (59.8.0)\n", - "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", + "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.22.3)\n", + "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", "Requirement already satisfied: affine in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2.3.0)\n", - "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", + "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", + "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (7.1.2)\n", "Requirement already satisfied: certifi in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2021.10.8)\n", - "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.20.0)\n", - "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (8.0.3)\n", - "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n" + "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", + "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", + "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n", + "Building wheels for collected packages: rio-stac\n", + " Building wheel for rio-stac (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for rio-stac: filename=rio_stac-0.3.2-py3-none-any.whl size=8279 sha256=13e01dd9e6dcd02c086ddfc260de260387dea212829ab082de08519d764b8999\n", + " Stored in directory: /home/jovyan/.cache/pip/wheels/42/12/1a/677dda98b5bb48936e8636e4e71ddc6ed65ee7f3a849ca2c77\n", + "Successfully built rio-stac\n", + "Installing collected packages: rio-stac\n", + "Successfully installed rio-stac-0.3.2\n" ] } ], @@ -70,25 +78,39 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "id": "52420a33-ae4e-4b16-b893-f42dd67909fb", "metadata": {}, "outputs": [], "source": [ "import enum\n", "import os\n", + "import tarfile\n", + "import shutil\n", "import pathlib\n", "import re\n", "import shutil\n", "import tarfile\n", "import tempfile\n", "import urllib.parse\n", + "from dateutil.parser import parse\n", + "import datetime as dt\n", + "from typing import List, Dict, Tuple\n", "\n", "import pystac\n", "import rasterio\n", "from pystac.utils import str_to_datetime\n", "from pystac.extensions.eo import Band, EOExtension\n", - "from rio_stac.stac import create_stac_item" + "from pystac.extensions.label import LabelExtension\n", + "from rio_stac.stac import create_stac_item\n", + "import geopandas as gpd\n", + "from pystac import Catalog, Collection, Item, MediaType, \\\n", + " Asset, Link, Extent, SpatialExtent, TemporalExtent, CatalogType\n", + "from pystac.extensions.scientific import ScientificExtension\n", + "from shapely.geometry import GeometryCollection, Polygon, mapping, shape\n", + "\n", + "from pprint import PrettyPrinter\n", + "pp = PrettyPrinter(indent=2)" ] }, { @@ -110,6 +132,33 @@ { "cell_type": "code", "execution_count": 3, + "id": "022cbe6f-1d8e-4a61-8615-0736926f4a27", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the TMP directory for this system\n", + "tmp_dir = pathlib.Path(tempfile.gettempdir())\n", + "\n", + "tar_url = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + "tar_root = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/\"\n", + "# tar_path = tmp_dir / \"sample_data.tar.gz\"\n", + "# data_dir = tmp_dir / \"sample_data\"\n", + "tar_path = tmp_dir / \"SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + "untar_path = tmp_dir / \"SN6_buildings_AOI_11_Rotterdam_train_sample\"\n", + "data_dir = tmp_dir / \"spacenet_6_rotterdam\"" + ] + }, + { + "cell_type": "markdown", + "id": "3e66fd96-889e-4bc5-8154-2e25dcc7f022", + "metadata": {}, + "source": [ + "If the archive `SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz` does not already exists in our temporary directory, then we will download it using `curl` command." + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "a6be708a-b187-48c9-8d5f-27a896ed10a0", "metadata": {}, "outputs": [ @@ -117,30 +166,57 @@ "name": "stdout", "output_type": "stream", "text": [ - "File /tmp/sample_data.tar.gz already exists, skipping download\n", - "Data already extracted from archive; skipping extract.\n" + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 123M 100 123M 0 0 17.2M 0 0:00:07 0:00:07 --:--:-- 21.9M\n" ] } ], "source": [ - "# Get the TMP directory for this system\n", - "tmp_dir = pathlib.Path(tempfile.gettempdir())\n", - "\n", - "tar_url = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", - "tar_path = tmp_dir / \"sample_data.tar.gz\"\n", - "data_dir = tmp_dir / \"sample_data\"\n", - "\n", "if tar_path.exists():\n", " print(f\"File {tar_path} already exists, skipping download\")\n", "else:\n", - " !curl {tar_url} -o {tar_path}\n", - " \n", - "if data_dir.exists():\n", + " !curl {tar_url} -o {tar_path}" + ] + }, + { + "cell_type": "markdown", + "id": "22078ea3-197b-4bda-b7bb-6ce8f6710205", + "metadata": {}, + "source": [ + "Then to make the directory names more meaningful, we will rename the directory to `spacenet_6_rotterdam`, which later matches the name of the catalog." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d76015d1-794c-412c-9352-6787be3a35f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracted data to /tmp/SN6_buildings_AOI_11_Rotterdam_train_sample\n", + "Renamed folder to /tmp/spacenet_6_rotterdam\n" + ] + } + ], + "source": [ + "if untar_path.exists():\n", " print(f\"Data already extracted from archive; skipping extract.\")\n", "else:\n", - " os.makedirs(data_dir, exist_ok=True)\n", - " !tar -xzf {tar_path} -C {tmp_dir} --transform s/SN6_buildings_AOI_11_Rotterdam_train_sample/{data_dir.name}/\n", - " print(f\"Extracted data to {data_dir}\")" + " os.makedirs(untar_path)\n", + " !tar -zxf {tar_path} -C {tmp_dir}\n", + " \n", + " if os.path.exists(untar_path):\n", + " print(f\"Extracted data to {untar_path}\")\n", + " \n", + " os.makedirs(data_dir, exist_ok=True)\n", + " !mv {untar_path}/* {data_dir}\n", + " print(f\"Renamed folder to {data_dir}\")\n", + "\n", + " !rm -rf {untar_path}" ] }, { @@ -153,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "273826a9-e03a-4124-8b9f-8ceeb61fde51", "metadata": {}, "outputs": [ @@ -161,11 +237,42 @@ "name": "stdout", "output_type": "stream", "text": [ - "/tmp/sample_data\n", - "/tmp/sample_data/AOI_11_Rotterdam\n", - "/tmp/sample_data/AOI_11_Rotterdam/SummaryData\n", - "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", - "/tmp/sample_data/AOI_11_Rotterdam/geojson_buildings\n", + "/tmp/spacenet_6_rotterdam\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGB\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PAN\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SAR-Intensity\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/geojson_buildings\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", @@ -176,18 +283,7 @@ "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", - "/tmp/sample_data/AOI_11_Rotterdam/RGBNIR\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/PS-RGBNIR\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGBNIR\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_69.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804133131_20190804133356_tile_783.tif\n", @@ -198,39 +294,19 @@ "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/SAR-Intensity\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/PAN\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/PS-RGB\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n" + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SummaryData\n", + "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/RGBNIR\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n" ] } ], @@ -255,22 +331,23 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "id": "a5e16d25-ddf3-45ae-bd49-91888ab5a89c", "metadata": {}, "outputs": [], "source": [ "aoi_name = \"AOI_11_Rotterdam\"\n", "aoi_dir = data_dir / aoi_name\n", + "os.chdir(data_dir)\n", "\n", "labels_pattern = re.compile(\n", " r\"^(?PSN6_Train_AOI_11_Rotterdam)\"\n", " \"_Buildings_\"\n", - " \"(?P20190804120223)\"\n", + " \"(?P\\d{14})\"\n", " \"_\"\n", - " \"(?P20190804120456)\"\n", + " \"(?P\\d{14})\"\n", " \"_tile_\"\n", - " \"(?P55)\"\n", + " \"(?P\\d+)\"\n", " \"\\.geojson$\"\n", ")\n", "\n", @@ -283,10 +360,8 @@ " PAN = \"PAN\"\n", " PS_RGB = \"PS-RGB\"\n", "\n", - "def get_source_info(label_path):\n", - " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated with\n", - " the given label file path.\n", - " \"\"\"\n", + "def strip_meta_matches(label_path: str) -> Tuple[any]:\n", + " \"\"\"Uses Regex pattern above to strip out relevant metadata about the file\"\"\"\n", " label_path = os.fspath(label_path)\n", " label_path = pathlib.Path(label_path)\n", " match = labels_pattern.match(label_path.name)\n", @@ -298,11 +373,19 @@ " end_datetime = match.group(\"end_datetime\")\n", " tile = match.group(\"tile\")\n", " \n", + " return prefix, start_datetime, end_datetime, tile\n", + " \n", + "def get_source_info(label_path: str) -> List[Dict[str, any]]:\n", + " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated with\n", + " the given label file path.\n", + " \"\"\"\n", + " \n", + " prefix, start_datetime, end_datetime, tile = strip_meta_matches(label_path)\n", + "\n", " return [\n", " {\n", - " # We use the path on S3 instead of the local path here\n", - " \"href\": f\"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/\" \\\n", - " f\"{aoi_name}/{source_type.value}/\" \\\n", + " # We will use relative paths here when archiving the entire catalog with the dataset\n", + " \"href\": f\"{aoi_name}/{source_type.value}/\" \\\n", " f\"{prefix}_{source_type.value}_{start_datetime}_{end_datetime}_tile_{tile}.tif\",\n", " \"type\": source_type.value,\n", " \"start_datetime\": start_datetime,\n", @@ -311,7 +394,19 @@ " for source_type in SourceType\n", " ]\n", " \n", - " " + "def get_label_info(label_path: str) -> List[Dict[str, any]]:\n", + " \"\"\"Gets the single path and metadata attributes from the given label path\n", + " \"\"\"\n", + " \n", + " prefix, start_datetime, end_datetime, tile = strip_meta_matches(label_path)\n", + " \n", + " return {\n", + " \"href\": f\"{aoi_name}/geojson_buildings/\" \\\n", + " f\"{prefix}_Buildings_{start_datetime}_{end_datetime}_tile_{tile}.geojson\",\n", + " \"type\": \"Buildings\",\n", + " \"start_datetime\": start_datetime,\n", + " \"end_datetime\": end_datetime\n", + " }" ] }, { @@ -319,49 +414,74 @@ "id": "c453e704-34cf-4d7d-8de1-044c3dbf83ea", "metadata": {}, "source": [ - "For example..." + "For example, we can see what information our regex pattern above can learn about the geojson label filename..." ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "id": "dcb37528-fc62-4eeb-904d-704ec85b9695", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + "[{'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'RGBNIR',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'PS-RGBNIR',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'SAR-Intensity',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'PAN',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'PS-RGB',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'}]" ] }, - "execution_count": 22, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "label = \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", - "sources = get_source_info(label)\n", - "sources" + "source_info = get_source_info(label)\n", + "source_info" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "07aa9f59-7a7c-41d4-a5ce-49c7e2342d50", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", + " 'type': 'Buildings',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "label_info = get_label_info(label)\n", + "label_info" ] }, { @@ -369,7 +489,7 @@ "id": "c7dda111-e0b3-42ff-bdbc-f9ad49f50e15", "metadata": {}, "source": [ - "### Catalog Source Imagery" + "### Create Catalog Source Items" ] }, { @@ -377,24 +497,45 @@ "id": "5a0c4fb6-7005-4189-afe8-75781461f7c9", "metadata": {}, "source": [ - "Since each of the sources for a given label cover the same spatial and temporal extents, we can combine them into a single STAC Item, with each source represented as a distinct Asset. We will create the helper functions " + "Since each of the sources for a given label cover the same spatial and temporal extents, we can combine them into a single STAC Item, with each source represented as a distinct Asset. We will create the helper functions that allow us to easily create a STAC Item from just the label filename based on the source imagery in our dataset directory." ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, + "id": "eff8b798-25ca-4a9f-abf8-c8e59f962f2f", + "metadata": {}, + "outputs": [], + "source": [ + "def get_item_id(source_href: str, source_type: str, item_type: str) -> str:\n", + " \"\"\"Helper function to return the appropriate Item ID\"\"\"\n", + " return source_href.split('/')[-1].replace(f'_{source_type}','').replace('.tif',f'_{item_type}').replace('.geojson',f'_{item_type}')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "6782d5c1-100b-4f56-960a-844fcbdb0d09", "metadata": {}, "outputs": [], "source": [ - "def create_source_item(label_path):\n", + "def create_source_item(label_path: str) -> Item:\n", + " \"\"\"Helper function that leverages rio-stac to create a STAC item\n", + " from a source image Asset, and adds the rest of the images as Assets\n", + " \"\"\"\n", " sources = get_source_info(label_path)\n", " \n", + " # we need the first source object\n", " first_source = sources[0]\n", " \n", + " # rio-stac by default provides the filepath, so we override the item id\n", + " item_id = get_item_id(first_source[\"href\"], first_source[\"type\"], \"source\")\n", + " \n", " # Bootstrap the source item using rio-stac based on the first asset\n", " with rasterio.open(sources[0][\"href\"]) as src:\n", + " \n", " item = create_stac_item(\n", + " id=item_id,\n", " source=src,\n", " asset_name=first_source[\"type\"],\n", " asset_roles=[\"data\"],\n", @@ -405,7 +546,7 @@ " \n", " # rio-stac does not add the Asset \"type\" or \"title\" fields, so we add them manually\n", " # (all assets are Cloud-Optimized GeoTIFFs)\n", - " item.assets[first_source[\"type\"]].type = pystac.MediaType.COG\n", + " item.assets[first_source[\"type\"]].type = MediaType.COG\n", " item.assets[first_source[\"type\"]].title = first_source[\"type\"]\n", " \n", " # Since the spatiotemporal metadata is the same for all assets, we do not need to read \n", @@ -414,7 +555,7 @@ " asset = pystac.Asset.from_dict({\n", " \"href\": source[\"href\"],\n", " \"roles\": [\"data\"],\n", - " \"type\": str(pystac.MediaType.COG),\n", + " \"type\": str(MediaType.COG),\n", " \"title\": source[\"type\"]\n", " })\n", " item.add_asset(source[\"type\"], asset)\n", @@ -422,45 +563,744 @@ " return item" ] }, + { + "cell_type": "markdown", + "id": "956f9cd3-8e3c-4850-94a6-1d9459ce7c40", + "metadata": {}, + "source": [ + "We can examine the output of our helper function `create_source_item` above to see that it has populated the required attributes for a generic source item. However, per the [STAC Item Specification](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md), it is recommended to add more properties to the Item and its Assets, such as the [EOExtension](https://github.com/stac-extensions/eo) for electro-optical bands, e.g. RGB. For now we will stick with the core required properties for a source item." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "e769016b-cf1c-4dca-ab42-2e3588f8e668", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{ 'assets': { 'PAN': { 'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'PAN',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'},\n", + " 'PS-RGB': { 'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'PS-RGB',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'},\n", + " 'PS-RGBNIR': { 'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'PS-RGBNIR',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'},\n", + " 'RGBNIR': { 'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'RGBNIR'},\n", + " 'SAR-Intensity': { 'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'SAR-Intensity',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'}},\n", + " 'bbox': [ 4.350277623237341,\n", + " 51.90744725678807,\n", + " 4.356939496573024,\n", + " 51.9115675425874],\n", + " 'geometry': { 'coordinates': [ [ [4.350277623237341, 51.9115675425874],\n", + " [4.350277623237341, 51.90744725678807],\n", + " [4.356939496573024, 51.90744725678807],\n", + " [4.356939496573024, 51.9115675425874],\n", + " [4.350277623237341, 51.9115675425874]]],\n", + " 'type': 'Polygon'},\n", + " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_source',\n", + " 'links': [],\n", + " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", + " 'proj:bbox': [ 592886.1399464327,\n", + " 5751614.151231687,\n", + " 593336.1616884505,\n", + " 5752064.1729737045],\n", + " 'proj:epsg': 32631,\n", + " 'proj:geometry': { 'coordinates': [ [ [ 592886.1399464327,\n", + " 5752064.1729737045],\n", + " [ 592886.1399464327,\n", + " 5751614.151231687],\n", + " [ 593336.1616884505,\n", + " 5751614.151231687],\n", + " [ 593336.1616884505,\n", + " 5752064.1729737045],\n", + " [ 592886.1399464327,\n", + " 5752064.1729737045]]],\n", + " 'type': 'Polygon'},\n", + " 'proj:shape': [450, 450],\n", + " 'proj:transform': [ 1.0000483155950517,\n", + " 0.0,\n", + " 592886.1399464327,\n", + " 0.0,\n", + " -1.0000483155950517,\n", + " 5752064.1729737045,\n", + " 0.0,\n", + " 0.0,\n", + " 1.0]},\n", + " 'stac_extensions': [ 'https://stac-extensions.github.io/projection/v1.0.0/schema.json'],\n", + " 'stac_version': '1.0.0',\n", + " 'type': 'Feature'}\n" + ] + } + ], + "source": [ + "source_item = create_source_item(label)\n", + "pp.pprint(source_item.to_dict())" + ] }, { "cell_type": "markdown", "id": "66a48b4f-9772-46b9-a96f-ec7591c6b3c8", "metadata": {}, "source": [ - "### Catalog Labels" + "### Create Catalog Label Items" ] }, { "cell_type": "markdown", - "id": "b02acefe-ee14-4dfb-a826-b2af28a46594", + "id": "61847d62-9ee6-4ce6-85c7-16baaa285140", "metadata": {}, "source": [ - "### Submit to Radiant MLHub" + "Similar to the helper functions created above, we need some functions to more easily create a label STAC Item for the catalog." ] }, { "cell_type": "code", - "execution_count": null, - "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", + "execution_count": 26, + "id": "29098a46-f86a-4439-9538-c351b011ab95", "metadata": {}, "outputs": [], "source": [ - "tar_path.unlink(missing_ok=True)\n", - "shutil.rmtree(data_dir, ignore_errors=True)" + "def get_item_datetime(dt_str: str) -> dt.datetime:\n", + " \"\"\"Returns an items datetime based on ID string pattern\"\"\"\n", + " return dt.datetime.strptime(str(dt_str), '%Y%m%d%H%M%S') #20190804120223" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "d28005c3-feaf-4932-a376-bf3c85cae173", + "metadata": {}, + "outputs": [], + "source": [ + "def get_geojson_extent(fname: str) -> Polygon:\n", + " \"\"\"Takes a path to GeoJSON vector file and returns the Polygon geometry for an Item reprojected\"\"\"\n", + " \n", + " gdf = gpd.read_file(fname)\n", + " gdf = gdf.to_crs(\"EPSG:4326\")\n", + " bounds = gdf.total_bounds\n", + " geometry = Polygon(\n", + " (\n", + " (bounds[0], bounds[1]),\n", + " (bounds[0], bounds[3]),\n", + " (bounds[2], bounds[3]),\n", + " (bounds[2], bounds[1]),\n", + " (bounds[0], bounds[1])\n", + " )\n", + " )\n", + " return geometry" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "fc37d0b5-5e2a-47b5-9837-78aa5f30a9c0", + "metadata": {}, + "outputs": [], + "source": [ + "def add_label_extension(label: Item, label_meta: Dict[str, any]) -> Item:\n", + " \"\"\"This applies the STAC LabelExtension to the label item and related properties\"\"\"\n", + " # apply the Label Extention\n", + " label_ext = LabelExtension.ext(\n", + " label, \n", + " add_if_missing = True\n", + " )\n", + "\n", + " label_ext.apply(\n", + " label_description = \"SpaceNet 6 Building Footprints\",\n", + " label_type = 'vector'\n", + " )\n", + "\n", + " # instantiate GeoJSON Asset\n", + " asset=Asset(\n", + " href = label_meta[\"href\"],\n", + " media_type = MediaType.GEOJSON,\n", + " )\n", + "\n", + " # add GeoTiff Asset to item\n", + " label.add_asset(\n", + " key = 'buildings',\n", + " asset = asset\n", + " )\n", + " \n", + " return label" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "35057b0c-107f-43db-ba38-07d9e0a848e7", + "metadata": {}, + "outputs": [], + "source": [ + "def create_label_item(label_path: str) -> Item:\n", + " \"\"\"Helper function that creates a STAC label item\n", + " from a geojson label path and adds it as the Asset\n", + " \"\"\"\n", + " label_meta = get_label_info(label_path)\n", + " \n", + " # rio-stac by default provides the filepath, so we override the item id\n", + " item_id = get_item_id(label_meta[\"href\"], label_meta[\"type\"], \"labels\").replace('_' + label_meta['type'],'')\n", + " item_geometry = get_geojson_extent(label_meta[\"href\"])\n", + " \n", + " return add_label_extension(\n", + " Item(\n", + " id=item_id,\n", + " datetime = get_item_datetime(label_meta['start_datetime']),\n", + " geometry = mapping(item_geometry),\n", + " bbox = item_geometry.bounds,\n", + " properties = {}\n", + " ), \n", + " label_meta,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "6fe27c8b-d0c6-4d8f-8d02-9a45e2738f26", + "metadata": {}, + "outputs": [], + "source": [ + "def add_label_source_link(source: Item, label: Item) -> Item:\n", + " \"\"\"Takes a 1:1 source to label item relationship, and adds the source link to label Item\"\"\"\n", + " \n", + " source_link = Link(\n", + " rel = 'source',\n", + " target = source,\n", + " media_type = MediaType.COG\n", + " )\n", + " label.add_link(source_link)" + ] + }, + { + "cell_type": "markdown", + "id": "dcf45223-af5b-4c27-b730-ac46a8b6382c", + "metadata": {}, + "source": [ + "Now we can examine the label Item output of our function `create_label_item` above after adding the source Item object reference to the Links in the label Item. This is a necessary step so that the label items can point to the appropriate source imagery Items and related Assets in our Catalog. " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "e8b2c289-1787-4235-8ca2-b684f118516e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{ 'assets': { 'buildings': { 'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", + " 'type': }},\n", + " 'bbox': ( 4.350277630686814,\n", + " 51.90744911466251,\n", + " 4.356899148164554,\n", + " 51.91047050708562),\n", + " 'geometry': { 'coordinates': ( ( (4.350277630686814, 51.90744911466251),\n", + " (4.350277630686814, 51.91047050708562),\n", + " (4.356899148164554, 51.91047050708562),\n", + " (4.356899148164554, 51.90744911466251),\n", + " (4.350277630686814, 51.90744911466251)),),\n", + " 'type': 'Polygon'},\n", + " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_labels',\n", + " 'links': [ { 'href': None,\n", + " 'rel': 'source',\n", + " 'type': }],\n", + " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", + " 'label:description': 'SpaceNet 6 Building Footprints',\n", + " 'label:properties': None,\n", + " 'label:type': 'vector'},\n", + " 'stac_extensions': [ 'https://stac-extensions.github.io/label/v1.0.0/schema.json'],\n", + " 'stac_version': '1.0.0',\n", + " 'type': 'Feature'}\n" + ] + } + ], + "source": [ + "label_item = create_label_item(label)\n", + "add_label_source_link(source_item, label_item)\n", + "pp.pprint(label_item.to_dict())" + ] + }, + { + "cell_type": "markdown", + "id": "f09578e1-b674-463d-98fb-eb29129257db", + "metadata": {}, + "source": [ + "Similar to `EOExtention` there are other best practices that can be employed when creating a STAC Item. For example, since this is a label Item, we could add `label:overviews`, `label_classes` and `file:values` properties to store more information about the labels that improve indexing on the Catalog:\n", + "\n", + "* `label:overviews` contain the names of the unique classes in the label file and the [Count Objects](https://github.com/stac-extensions/label#count-object) with associated classes\n", + "* `label:classes` is a list of all [Class Objects](https://github.com/stac-extensions/label#count-object) representing possible classes across the labels found in a dataset\n", + "* `file:values` can be used to store the [Mapping Object](https://github.com/stac-extensions/file#mapping-object) between numeric classification values and the descriptive string text equivelant " + ] + }, + { + "cell_type": "markdown", + "id": "67d44b63-2109-48b1-8955-a4088e6dcd29", + "metadata": {}, + "source": [ + "### Define Catalog and Collection metadata properties" + ] + }, + { + "cell_type": "markdown", + "id": "5f67997e-52e0-4c25-a1a8-9385fb717aea", + "metadata": {}, + "source": [ + "Now that we have all the helper functions in place to create both our source and label Items, we need to create the actual Catalog and its children Collections. Collections. There will be two Collections in this Catalog, one for each source and labels. The reason for this is that per [STAC Collection Specification](https://github.com/radiantearth/stac-spec/tree/master/collection-spec), we should use Collections so as to make logically related groups of Items and store the metadata that they share. In this example, the first clear delineation between the Collections is one set is raster source images in `.tif` files, while the other set is vector building footprints in `.geojson` files. The second is that the rasters are the source data while the vectors are the label data.\n", + "\n", + "All of the metadata information defined below, except for the Catalog and Collection names, all came from the [SpaceNet 6 Challenge](https://spacenet.ai/sn6-challenge/) webpage." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "51d22430-5614-4a16-a9a0-34119aceb0a6", + "metadata": {}, + "outputs": [], + "source": [ + "# catalog specific properties\n", + "catalog_id = \"spacenet_6_rotterdam\"\n", + "catalog_title = \"SpaceNet Multi-Sensor All-Weather Mapping Challenge - Rotterdam\"\n", + "catalog_description = \"\"\"\n", + "In this challenge, the training dataset contained both SAR and EO imagery, however, \n", + "the testing and scoring datasets contained only SAR data. Consequently, the EO data \n", + "could be used for pre-processing the SAR data in some fashion, such as colorization, \n", + "domain adaptation, or image translation, but cannot be used to directly map buildings. \n", + "The dataset was structured to mimic real-world scenarios where historical EO data \n", + "may be available, but concurrent EO collection with SAR is often not possible due to \n", + "inconsistent orbits of the sensors, or cloud cover that will render the EO data unusable.\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "id": "a203e004-dfed-416d-ba37-83606f9d4f03", + "metadata": {}, + "source": [ + "We can create a barebones Catalog with the above required properties" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8c2fb77d-591d-4553-80f6-4acfcc663ba3", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog = Catalog(\n", + " id=catalog_id,\n", + " title=catalog_title,\n", + " description=catalog_description\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7835c567-5b97-4bc1-8ff2-a85b7e742f2c", + "metadata": {}, + "outputs": [], + "source": [ + "# collection specific properties\n", + "source_collection_id = \"spacenet_6_rotterdam_source\"\n", + "source_collection_title = \"SpaceNet 6 Rotterdam Source Imagery\"\n", + "\n", + "labels_collection_id = \"spacenet_6_rotterdam_labels\"\n", + "labels_collection_title = \"SpaceNet 6 Rotterdam Labels\"\n", + "\n", + "citation = \"Shermeyer, J., Hogan, D., Brown, J., Etten, A.V., Weir, N., Pacifici, F., Hänsch, R., Bastidas, A., Soenen, S., Bacastow, T.M., & Lewis, R. (2020). SpaceNet 6: Multi-Sensor All Weather Mapping Dataset. 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), 768-777.\"\n", + "license = \"CC-BY-SA-4.0\"" + ] + }, + { + "cell_type": "markdown", + "id": "1c2eaf79-ccb0-45a4-bef7-c81878272d26", + "metadata": {}, + "source": [ + "Here we will define another helper function that loads a default spatial and temporal extent to each Collection as they're being created, as this is a required attribute. That can be manually defined if known up front, or it can be implicitly learned from the spatial and temporal attributes of the Items in each Collection using the `Collection.update_extent_from_items` function, as seen below." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "3ed27d56-3bf0-41cf-9368-aebd2c474258", + "metadata": {}, + "outputs": [], + "source": [ + "def get_default_extent():\n", + " \"\"\"Returns a default spatial and temporal Extent STAC object\"\"\"\n", + " # default spatial extent is the entire globe\n", + " default_spatial_extent = SpatialExtent([[-180, -90, 180, 90]])\n", + " \n", + " # default temporal extent is the current date\n", + " right_now = dt.datetime.now().strftime('%Y-%m-%d')\n", + " default_temporal_extent = TemporalExtent([[]])\n", + " \n", + " return Extent(default_spatial_extent, default_temporal_extent)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "e3b0f434-d132-4fd0-9fe1-6e67361b5eb0", + "metadata": {}, + "outputs": [], + "source": [ + "def create_collection(id, description, license, citation):\n", + " \"\"\"Creates a skeleton Collection with required properties\"\"\"\n", + " collection = Collection(\n", + " id=id,\n", + " license=license,\n", + " extent=get_default_extent(),\n", + " description=description\n", + " )\n", + " \n", + " sci_ext = ScientificExtension.ext(collection, add_if_missing=True)\n", + " sci_ext.apply(citation=citation)\n", + " \n", + " return collection" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "3b9d62ea-4070-48e3-9928-f37eb99c45a1", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_source_collection = create_collection(\n", + " source_collection_id, \n", + " source_collection_title,\n", + " license,\n", + " citation\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "ac1d0c86-1197-47a0-ad2d-e60bf158c122", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_labels_collection = create_collection(\n", + " labels_collection_id, \n", + " labels_collection_title,\n", + " license,\n", + " citation\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "19e3f7f4-8d53-4098-8e9e-ed0a954ac0a0", + "metadata": {}, + "source": [ + "### Iteratively add items to Source and Label Collections" + ] + }, + { + "cell_type": "markdown", + "id": "165d88e4-352e-4cc2-8305-5342e287703a", + "metadata": {}, + "source": [ + "There are many ways to do this next step, but given our dataset is so small, we can just use a non-parallelized iterative loop to create the related source and label items at the same time, and then add them to their respective Collections." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "02e683c0-f27c-4154-a438-ecf2598aa417", + "metadata": {}, + "outputs": [], + "source": [ + "label_paths = [f for f in os.listdir(aoi_dir / \"geojson_buildings\") if f.endswith('geojson')]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "02a68a71-f8b8-4279-b0b0-40db5d3115aa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n" + ] + } + ], + "source": [ + "for label_path in label_paths:\n", + " # get the geojson label filename\n", + " label_filename = label_path.split('/')[-1]\n", + " print(f'Creating source and label items from {label_filename}')\n", + " \n", + " # create the source and label items for a given label path\n", + " source_item = create_source_item(label_filename)\n", + " label_item = create_label_item(label_filename)\n", + " \n", + " # add the source link to label item\n", + " add_label_source_link(source_item, label_item)\n", + " \n", + " # add the source and label items to collections\n", + " sn6_source_collection.add_item(source_item)\n", + " sn6_labels_collection.add_item(label_item)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "9a0f7839-f6dc-449b-a591-6619e768b9d2", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_source_collection.update_extent_from_items()\n", + "sn6_labels_collection.update_extent_from_items()" + ] + }, + { + "cell_type": "markdown", + "id": "f6a02ea6-0952-4319-9092-6c094afd8a70", + "metadata": {}, + "source": [ + "### Add children Collections to Catalog" + ] + }, + { + "cell_type": "markdown", + "id": "9acb42f9-34ba-4f5d-8ea3-ce1f446c42af", + "metadata": {}, + "source": [ + "With all the Items added to the source and labels Collections, we can add the two Collections as children of the Catalog." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "f0defa15-2428-4a6d-aef0-d4e9b808f1d3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n" + ] + } + ], + "source": [ + "sn6_catalog.add_children([sn6_source_collection, sn6_labels_collection])\n", + "sn6_catalog.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "2f78189b-ecf4-4be0-8c8a-df35d1eb5002", + "metadata": {}, + "source": [ + "### Normalize Links, validate Catalog and save to file" + ] + }, + { + "cell_type": "markdown", + "id": "3a0caf4e-54f7-46e1-a970-6b404b2c5977", + "metadata": {}, + "source": [ + "The last few steps we need to take in created the Catalog are normalizing all of the links between the related Items and Collections, validate that it's a valid STAC Catalog, and then save it to JSON file in our temporary `spacenet_6_rotterdam` directory." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "fd3c9feb-7a4f-4865-9344-5f785a5343b5", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog.normalize_hrefs(data_dir.as_posix())" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "9275083f-3595-46ad-9bcd-fea994ed789a", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog.validate_all()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e44ce22b-4771-4921-8d12-ebd6602f405f", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)" + ] + }, + { + "cell_type": "markdown", + "id": "89d5084b-40a7-4dbd-8d72-ddf65f489f34", + "metadata": {}, + "source": [ + "### Compress catalog with dataset source images and labels into single archive" + ] + }, + { + "cell_type": "markdown", + "id": "129e5891-33e4-4205-b704-a4cc58f9b45f", + "metadata": {}, + "source": [ + "The very last step in the Catalog creation process before submitting to Radiant MLHub is compressing the entire archive we just created, so that we have a self-contained catalog bundled with all the source imagery and label files together in a single place. This will speed up processing for the Radiant team downstream." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "23c0eff1-8bc5-4fe6-8230-982cd72cfb24", + "metadata": {}, + "outputs": [], + "source": [ + "def create_tar_gz(archive_name, target_dir):\n", + " with tarfile.open(archive_name, \"w:gz\") as tar:\n", + " tar.add(target_dir)\n", + " print(f\"Archive file {archive_name} created\")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "54a2bd88-eb2a-4fda-8d6e-d0bace01f7e9", + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir('/home/jovyan/tutorials')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "2badc04c-59bb-4757-a084-ba6c5ba48d6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive file spacenet_6_rotterdam.tar.gz created\n" + ] + } + ], + "source": [ + "output_archive_filename = f'{data_dir.name}.tar.gz'\n", + "create_tar_gz(output_archive_filename, data_dir.as_posix())" + ] + }, + { + "cell_type": "markdown", + "id": "b02acefe-ee14-4dfb-a826-b2af28a46594", + "metadata": {}, + "source": [ + "### Submit to Radiant MLHub" + ] + }, + { + "cell_type": "markdown", + "id": "c887989c-3202-4325-9cd0-c84f46e58001", + "metadata": {}, + "source": [ + "Now that the archive of your dataset and the Catalog has been created, you should see the tar file in your browser view to the left titled `spacenet_6_rotterdam.tar.gz`. This is the file you will share with the Radiant Earth engineering team to streamline the process of publishing your dataset to Radiant MLHub. To start the process, go to the [General Dataset Inquiry Form](https://radiantearth.typeform.com/to/j0duax) and submit the form with as complete details as possible. This will automatically notify the Radiant team of your request. When we're ready to process and ingest your dataset, we will ask taht you share this archive file with us on a cloud storage solution, such as Azure, AWS or Google Cloud." + ] + }, + { + "cell_type": "markdown", + "id": "7f7e9b40-4ad5-45a4-b9bd-335ee5fd364b", + "metadata": {}, + "source": [ + "### Garbage Cleanup" + ] + }, + { + "cell_type": "markdown", + "id": "3422890d-7297-41fb-90a0-bdf4ba730e5d", + "metadata": {}, + "source": [ + "The following commands simply clean-up the instance enviroment of all the archive files and directories you created in this notebook. They are not necessary to run, however it should be noted that anything kept in the `tmp` directory will be flushed when the notebook server instance is shutdown. Therefore make sure to backup/download any files you wish to keep." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", + "metadata": {}, + "outputs": [], + "source": [ + "tar_path.unlink(missing_ok=True)\n", + "shutil.rmtree(data_dir, ignore_errors=True)\n", + "\n", + "if os.path.exists(tar_path):\n", + " os.remove(tar_path)\n", + "\n", + "if os.path.exists(data_dir):\n", + " os.remove(data_dir)" ] }, { "cell_type": "code", "execution_count": null, - "id": "ff611fad-f7a5-45c3-8108-3035e028777f", + "id": "eab53ca3-1876-434d-8bf6-56d59dd19c5c", "metadata": {}, "outputs": [], "source": [] From 6e6dcd372efd000b7181c969bcb73fa1ccd0b250 Mon Sep 17 00:00:00 2001 From: Hamed Alemohammad Date: Tue, 29 Mar 2022 17:05:56 -0400 Subject: [PATCH 4/5] revised descriptions, and file path for storing the catalog --- tutorials/radiant-mlhub-publish-dataset.ipynb | 439 +++--------------- 1 file changed, 53 insertions(+), 386 deletions(-) diff --git a/tutorials/radiant-mlhub-publish-dataset.ipynb b/tutorials/radiant-mlhub-publish-dataset.ipynb index abc32831..0570312f 100644 --- a/tutorials/radiant-mlhub-publish-dataset.ipynb +++ b/tutorials/radiant-mlhub-publish-dataset.ipynb @@ -15,7 +15,7 @@ "id": "4c593de1-8451-4bfd-b004-a0fc6de1aa27", "metadata": {}, "source": [ - "In this tutorial, we will walk through the process of creating a self-contained STAC Catalog, and its children Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for getting the dataset read for submission to [Radiant MLHub](https://mlhub.earth/) for manual review and publication.\n", + "In this tutorial, we will walk through the process of creating a self-contained STAC Catalog, and its children Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for getting the dataset read for submission to [Radiant MLHub](https://mlhub.earth/) for review and publication.\n", "\n", "For this example, we will use the sample training dataset from the [SpaceNet 6: Multi-Sensor All-Weather Mapping](https://spacenet.ai/sn6-challenge/)." ] @@ -38,47 +38,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "ce566ab8-e215-4367-9fe6-4da05286d6b5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting rio-stac==0.3.2\n", - " Downloading rio-stac-0.3.2.tar.gz (8.1 kB)\n", - "Requirement already satisfied: rasterio in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.10)\n", - "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from python-dateutil>=2.7.0->pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (1.16.0)\n", - "Requirement already satisfied: setuptools in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (59.8.0)\n", - "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.22.3)\n", - "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", - "Requirement already satisfied: affine in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2.3.0)\n", - "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", - "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (7.1.2)\n", - "Requirement already satisfied: certifi in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2021.10.8)\n", - "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", - "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", - "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n", - "Building wheels for collected packages: rio-stac\n", - " Building wheel for rio-stac (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for rio-stac: filename=rio_stac-0.3.2-py3-none-any.whl size=8279 sha256=13e01dd9e6dcd02c086ddfc260de260387dea212829ab082de08519d764b8999\n", - " Stored in directory: /home/jovyan/.cache/pip/wheels/42/12/1a/677dda98b5bb48936e8636e4e71ddc6ed65ee7f3a849ca2c77\n", - "Successfully built rio-stac\n", - "Installing collected packages: rio-stac\n", - "Successfully installed rio-stac-0.3.2\n" - ] - } - ], + "outputs": [], "source": [ "!pip install rio-stac==0.3.2" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "52420a33-ae4e-4b16-b893-f42dd67909fb", "metadata": {}, "outputs": [], @@ -131,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "022cbe6f-1d8e-4a61-8615-0736926f4a27", "metadata": {}, "outputs": [], @@ -158,20 +128,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "a6be708a-b187-48c9-8d5f-27a896ed10a0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 123M 100 123M 0 0 17.2M 0 0:00:07 0:00:07 --:--:-- 21.9M\n" - ] - } - ], + "outputs": [], "source": [ "if tar_path.exists():\n", " print(f\"File {tar_path} already exists, skipping download\")\n", @@ -189,19 +149,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "d76015d1-794c-412c-9352-6787be3a35f4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracted data to /tmp/SN6_buildings_AOI_11_Rotterdam_train_sample\n", - "Renamed folder to /tmp/spacenet_6_rotterdam\n" - ] - } - ], + "outputs": [], "source": [ "if untar_path.exists():\n", " print(f\"Data already extracted from archive; skipping extract.\")\n", @@ -229,87 +180,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "273826a9-e03a-4124-8b9f-8ceeb61fde51", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/spacenet_6_rotterdam\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGB\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PAN\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SAR-Intensity\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/geojson_buildings\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGBNIR\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SummaryData\n", - "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/RGBNIR\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n" - ] - } - ], + "outputs": [], "source": [ "for root, _, files in os.walk(data_dir):\n", " print(root)\n", @@ -331,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "a5e16d25-ddf3-45ae-bd49-91888ab5a89c", "metadata": {}, "outputs": [], @@ -419,40 +293,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "dcb37528-fc62-4eeb-904d-704ec85b9695", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'RGBNIR',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'PS-RGBNIR',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'SAR-Intensity',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'PAN',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'PS-RGB',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'}]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "label = \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", "source_info = get_source_info(label)\n", @@ -461,24 +305,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "07aa9f59-7a7c-41d4-a5ce-49c7e2342d50", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", - " 'type': 'Buildings',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'}" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "label_info = get_label_info(label)\n", "label_info" @@ -502,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "eff8b798-25ca-4a9f-abf8-c8e59f962f2f", "metadata": {}, "outputs": [], @@ -514,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "6782d5c1-100b-4f56-960a-844fcbdb0d09", "metadata": {}, "outputs": [], @@ -573,82 +403,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "e769016b-cf1c-4dca-ab42-2e3588f8e668", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ 'assets': { 'PAN': { 'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'PAN',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'},\n", - " 'PS-RGB': { 'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'PS-RGB',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'},\n", - " 'PS-RGBNIR': { 'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'PS-RGBNIR',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'},\n", - " 'RGBNIR': { 'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'RGBNIR'},\n", - " 'SAR-Intensity': { 'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'SAR-Intensity',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'}},\n", - " 'bbox': [ 4.350277623237341,\n", - " 51.90744725678807,\n", - " 4.356939496573024,\n", - " 51.9115675425874],\n", - " 'geometry': { 'coordinates': [ [ [4.350277623237341, 51.9115675425874],\n", - " [4.350277623237341, 51.90744725678807],\n", - " [4.356939496573024, 51.90744725678807],\n", - " [4.356939496573024, 51.9115675425874],\n", - " [4.350277623237341, 51.9115675425874]]],\n", - " 'type': 'Polygon'},\n", - " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_source',\n", - " 'links': [],\n", - " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", - " 'proj:bbox': [ 592886.1399464327,\n", - " 5751614.151231687,\n", - " 593336.1616884505,\n", - " 5752064.1729737045],\n", - " 'proj:epsg': 32631,\n", - " 'proj:geometry': { 'coordinates': [ [ [ 592886.1399464327,\n", - " 5752064.1729737045],\n", - " [ 592886.1399464327,\n", - " 5751614.151231687],\n", - " [ 593336.1616884505,\n", - " 5751614.151231687],\n", - " [ 593336.1616884505,\n", - " 5752064.1729737045],\n", - " [ 592886.1399464327,\n", - " 5752064.1729737045]]],\n", - " 'type': 'Polygon'},\n", - " 'proj:shape': [450, 450],\n", - " 'proj:transform': [ 1.0000483155950517,\n", - " 0.0,\n", - " 592886.1399464327,\n", - " 0.0,\n", - " -1.0000483155950517,\n", - " 5752064.1729737045,\n", - " 0.0,\n", - " 0.0,\n", - " 1.0]},\n", - " 'stac_extensions': [ 'https://stac-extensions.github.io/projection/v1.0.0/schema.json'],\n", - " 'stac_version': '1.0.0',\n", - " 'type': 'Feature'}\n" - ] - } - ], + "outputs": [], "source": [ "source_item = create_source_item(label)\n", "pp.pprint(source_item.to_dict())" @@ -672,7 +430,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "29098a46-f86a-4439-9538-c351b011ab95", "metadata": {}, "outputs": [], @@ -684,7 +442,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "d28005c3-feaf-4932-a376-bf3c85cae173", "metadata": {}, "outputs": [], @@ -709,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "fc37d0b5-5e2a-47b5-9837-78aa5f30a9c0", "metadata": {}, "outputs": [], @@ -744,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "35057b0c-107f-43db-ba38-07d9e0a848e7", "metadata": {}, "outputs": [], @@ -773,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "6fe27c8b-d0c6-4d8f-8d02-9a45e2738f26", "metadata": {}, "outputs": [], @@ -799,40 +557,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "e8b2c289-1787-4235-8ca2-b684f118516e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ 'assets': { 'buildings': { 'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", - " 'type': }},\n", - " 'bbox': ( 4.350277630686814,\n", - " 51.90744911466251,\n", - " 4.356899148164554,\n", - " 51.91047050708562),\n", - " 'geometry': { 'coordinates': ( ( (4.350277630686814, 51.90744911466251),\n", - " (4.350277630686814, 51.91047050708562),\n", - " (4.356899148164554, 51.91047050708562),\n", - " (4.356899148164554, 51.90744911466251),\n", - " (4.350277630686814, 51.90744911466251)),),\n", - " 'type': 'Polygon'},\n", - " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_labels',\n", - " 'links': [ { 'href': None,\n", - " 'rel': 'source',\n", - " 'type': }],\n", - " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", - " 'label:description': 'SpaceNet 6 Building Footprints',\n", - " 'label:properties': None,\n", - " 'label:type': 'vector'},\n", - " 'stac_extensions': [ 'https://stac-extensions.github.io/label/v1.0.0/schema.json'],\n", - " 'stac_version': '1.0.0',\n", - " 'type': 'Feature'}\n" - ] - } - ], + "outputs": [], "source": [ "label_item = create_label_item(label)\n", "add_label_source_link(source_item, label_item)\n", @@ -848,7 +576,7 @@ "\n", "* `label:overviews` contain the names of the unique classes in the label file and the [Count Objects](https://github.com/stac-extensions/label#count-object) with associated classes\n", "* `label:classes` is a list of all [Class Objects](https://github.com/stac-extensions/label#count-object) representing possible classes across the labels found in a dataset\n", - "* `file:values` can be used to store the [Mapping Object](https://github.com/stac-extensions/file#mapping-object) between numeric classification values and the descriptive string text equivelant " + "* `file:values` can be used to store the [Mapping Object](https://github.com/stac-extensions/file#mapping-object) between numeric classification values and the descriptive string text equivalent " ] }, { @@ -871,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "51d22430-5614-4a16-a9a0-34119aceb0a6", "metadata": {}, "outputs": [], @@ -900,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "8c2fb77d-591d-4553-80f6-4acfcc663ba3", "metadata": {}, "outputs": [], @@ -914,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "7835c567-5b97-4bc1-8ff2-a85b7e742f2c", "metadata": {}, "outputs": [], @@ -940,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "3ed27d56-3bf0-41cf-9368-aebd2c474258", "metadata": {}, "outputs": [], @@ -959,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "e3b0f434-d132-4fd0-9fe1-6e67361b5eb0", "metadata": {}, "outputs": [], @@ -981,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "3b9d62ea-4070-48e3-9928-f37eb99c45a1", "metadata": {}, "outputs": [], @@ -996,7 +724,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "ac1d0c86-1197-47a0-ad2d-e60bf158c122", "metadata": {}, "outputs": [], @@ -1027,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "02e683c0-f27c-4154-a438-ecf2598aa417", "metadata": {}, "outputs": [], @@ -1037,27 +765,10 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "02a68a71-f8b8-4279-b0b0-40db5d3115aa", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n" - ] - } - ], + "outputs": [], "source": [ "for label_path in label_paths:\n", " # get the geojson label filename\n", @@ -1078,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "9a0f7839-f6dc-449b-a591-6619e768b9d2", "metadata": {}, "outputs": [], @@ -1105,40 +816,10 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "f0defa15-2428-4a6d-aef0-d4e9b808f1d3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "* \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n" - ] - } - ], + "outputs": [], "source": [ "sn6_catalog.add_children([sn6_source_collection, sn6_labels_collection])\n", "sn6_catalog.describe()" @@ -1162,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "fd3c9feb-7a4f-4865-9344-5f785a5343b5", "metadata": {}, "outputs": [], @@ -1172,7 +853,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "id": "9275083f-3595-46ad-9bcd-fea994ed789a", "metadata": {}, "outputs": [], @@ -1182,7 +863,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "id": "e44ce22b-4771-4921-8d12-ebd6602f405f", "metadata": {}, "outputs": [], @@ -1208,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "id": "23c0eff1-8bc5-4fe6-8230-982cd72cfb24", "metadata": {}, "outputs": [], @@ -1221,28 +902,20 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "id": "54a2bd88-eb2a-4fda-8d6e-d0bace01f7e9", "metadata": {}, "outputs": [], "source": [ - "os.chdir('/home/jovyan/tutorials')" + "os.chdir('/home/jovyan/PlanetaryComputerExamples/tutorials')" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "id": "2badc04c-59bb-4757-a084-ba6c5ba48d6b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Archive file spacenet_6_rotterdam.tar.gz created\n" - ] - } - ], + "outputs": [], "source": [ "output_archive_filename = f'{data_dir.name}.tar.gz'\n", "create_tar_gz(output_archive_filename, data_dir.as_posix())" @@ -1261,7 +934,9 @@ "id": "c887989c-3202-4325-9cd0-c84f46e58001", "metadata": {}, "source": [ - "Now that the archive of your dataset and the Catalog has been created, you should see the tar file in your browser view to the left titled `spacenet_6_rotterdam.tar.gz`. This is the file you will share with the Radiant Earth engineering team to streamline the process of publishing your dataset to Radiant MLHub. To start the process, go to the [General Dataset Inquiry Form](https://radiantearth.typeform.com/to/j0duax) and submit the form with as complete details as possible. This will automatically notify the Radiant team of your request. When we're ready to process and ingest your dataset, we will ask taht you share this archive file with us on a cloud storage solution, such as Azure, AWS or Google Cloud." + "Now that the archive of your dataset and the Catalog has been created, you should see the tar file in your browser view to the left titled `spacenet_6_rotterdam.tar.gz`. You would need to generate a similar archive for your own dataset if you want to publish it on [Radiant MLHub](www.mlhub.earth). This is the file you will share with the Radiant Earth engineering team to streamline the process of publishing your dataset to Radiant MLHub. \n", + "\n", + "To start the process, go to the [Contribute](https://mlhub.earth/contribute) page on Radiant MLHub website, and click on General Dataset Inquiry Form (you need to create an account on Radiant MLHub to access this page). Submit the form with as complete details as possible. This will automatically notify the Radiant team of your request. When we're ready to process and ingest your dataset, we will ask that you share this archive file with us on a cloud storage solution, such as Azure, AWS, Google Cloud/Drive or Dropbox." ] }, { @@ -1282,7 +957,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", "metadata": {}, "outputs": [], @@ -1296,14 +971,6 @@ "if os.path.exists(data_dir):\n", " os.remove(data_dir)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eab53ca3-1876-434d-8bf6-56d59dd19c5c", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 43c7839154856e7ea2593da390e26267bfe36e20 Mon Sep 17 00:00:00 2001 From: Kendall Smith Date: Thu, 31 Mar 2022 16:41:10 -0700 Subject: [PATCH 5/5] adding mlhub publish notebook passing flake8, click bug in config.yaml --- .pre-commit-config.yaml | 2 +- tutorials/radiant-mlhub-publish-dataset.ipynb | 258 +++++++++--------- 2 files changed, 136 insertions(+), 124 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 351d5ff5..c4b8db11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: 0.12.0 hooks: - id: nbqa-black - additional_dependencies: [black==21.5b2] + additional_dependencies: [black==21.5b2, click==8.0.4] args: [--nbqa-mutate] - id: nbqa-flake8 additional_dependencies: [flake8==3.9.2] diff --git a/tutorials/radiant-mlhub-publish-dataset.ipynb b/tutorials/radiant-mlhub-publish-dataset.ipynb index 0570312f..6290099e 100644 --- a/tutorials/radiant-mlhub-publish-dataset.ipynb +++ b/tutorials/radiant-mlhub-publish-dataset.ipynb @@ -55,31 +55,37 @@ "source": [ "import enum\n", "import os\n", - "import tarfile\n", - "import shutil\n", "import pathlib\n", "import re\n", "import shutil\n", "import tarfile\n", "import tempfile\n", - "import urllib.parse\n", - "from dateutil.parser import parse\n", "import datetime as dt\n", "from typing import List, Dict, Tuple\n", "\n", "import pystac\n", "import rasterio\n", "from pystac.utils import str_to_datetime\n", - "from pystac.extensions.eo import Band, EOExtension\n", "from pystac.extensions.label import LabelExtension\n", "from rio_stac.stac import create_stac_item\n", "import geopandas as gpd\n", - "from pystac import Catalog, Collection, Item, MediaType, \\\n", - " Asset, Link, Extent, SpatialExtent, TemporalExtent, CatalogType\n", + "from pystac import (\n", + " Catalog,\n", + " Collection,\n", + " Item,\n", + " MediaType,\n", + " Asset,\n", + " Link,\n", + " Extent,\n", + " SpatialExtent,\n", + " TemporalExtent,\n", + " CatalogType,\n", + ")\n", "from pystac.extensions.scientific import ScientificExtension\n", - "from shapely.geometry import GeometryCollection, Polygon, mapping, shape\n", + "from shapely.geometry import Polygon, mapping\n", "\n", "from pprint import PrettyPrinter\n", + "\n", "pp = PrettyPrinter(indent=2)" ] }, @@ -109,7 +115,10 @@ "# Get the TMP directory for this system\n", "tmp_dir = pathlib.Path(tempfile.gettempdir())\n", "\n", - "tar_url = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + "tar_url = (\n", + " \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings\"\n", + " \"/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + ")\n", "tar_root = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/\"\n", "# tar_path = tmp_dir / \"sample_data.tar.gz\"\n", "# data_dir = tmp_dir / \"sample_data\"\n", @@ -155,14 +164,14 @@ "outputs": [], "source": [ "if untar_path.exists():\n", - " print(f\"Data already extracted from archive; skipping extract.\")\n", + " print(\"Data already extracted from archive; skipping extract.\")\n", "else:\n", " os.makedirs(untar_path)\n", " !tar -zxf {tar_path} -C {tmp_dir}\n", - " \n", + "\n", " if os.path.exists(untar_path):\n", " print(f\"Extracted data to {untar_path}\")\n", - " \n", + "\n", " os.makedirs(data_dir, exist_ok=True)\n", " !mv {untar_path}/* {data_dir}\n", " print(f\"Renamed folder to {data_dir}\")\n", @@ -216,24 +225,26 @@ "\n", "labels_pattern = re.compile(\n", " r\"^(?PSN6_Train_AOI_11_Rotterdam)\"\n", - " \"_Buildings_\"\n", - " \"(?P\\d{14})\"\n", - " \"_\"\n", - " \"(?P\\d{14})\"\n", - " \"_tile_\"\n", - " \"(?P\\d+)\"\n", - " \"\\.geojson$\"\n", + " r\"_Buildings_\"\n", + " r\"(?P\\d{14})\"\n", + " r\"_\"\n", + " r\"(?P\\d{14})\"\n", + " r\"_tile_\"\n", + " r\"(?P\\d+)\"\n", + " r\"\\.geojson$\"\n", ")\n", "\n", + "\n", "class SourceType(str, enum.Enum):\n", - " \"\"\"Enumerates the possible source types.\n", - " \"\"\"\n", + " \"\"\"Enumerates the possible source types.\"\"\"\n", + "\n", " RGBNIR = \"RGBNIR\"\n", " PS_RGBNIR = \"PS-RGBNIR\"\n", " SAR_Intensity = \"SAR-Intensity\"\n", " PAN = \"PAN\"\n", " PS_RGB = \"PS-RGB\"\n", "\n", + "\n", "def strip_meta_matches(label_path: str) -> Tuple[any]:\n", " \"\"\"Uses Regex pattern above to strip out relevant metadata about the file\"\"\"\n", " label_path = os.fspath(label_path)\n", @@ -241,45 +252,46 @@ " match = labels_pattern.match(label_path.name)\n", " if match is None:\n", " raise ValueError(f\"Invalid filename {label_filename}\")\n", - " \n", + "\n", " prefix = match.group(\"prefix\")\n", " start_datetime = match.group(\"start_datetime\")\n", " end_datetime = match.group(\"end_datetime\")\n", " tile = match.group(\"tile\")\n", - " \n", + "\n", " return prefix, start_datetime, end_datetime, tile\n", - " \n", + "\n", + "\n", "def get_source_info(label_path: str) -> List[Dict[str, any]]:\n", - " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated with\n", - " the given label file path.\n", + " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated\n", + " with the given label file path.\n", " \"\"\"\n", - " \n", + "\n", " prefix, start_datetime, end_datetime, tile = strip_meta_matches(label_path)\n", "\n", " return [\n", " {\n", - " # We will use relative paths here when archiving the entire catalog with the dataset\n", - " \"href\": f\"{aoi_name}/{source_type.value}/\" \\\n", - " f\"{prefix}_{source_type.value}_{start_datetime}_{end_datetime}_tile_{tile}.tif\",\n", + " # We will use relative paths when archiving the entire catalog with the dataset\n", + " \"href\": f\"{aoi_name}/{source_type.value}/{prefix}_{source_type.value}\"\n", + " f\"_{start_datetime}_{end_datetime}_tile_{tile}.tif\",\n", " \"type\": source_type.value,\n", " \"start_datetime\": start_datetime,\n", " \"end_datetime\": end_datetime,\n", " }\n", " for source_type in SourceType\n", " ]\n", - " \n", + "\n", + "\n", "def get_label_info(label_path: str) -> List[Dict[str, any]]:\n", - " \"\"\"Gets the single path and metadata attributes from the given label path\n", - " \"\"\"\n", - " \n", + " \"\"\"Gets the single path and metadata attributes from the given label path\"\"\"\n", + "\n", " prefix, start_datetime, end_datetime, tile = strip_meta_matches(label_path)\n", - " \n", + "\n", " return {\n", - " \"href\": f\"{aoi_name}/geojson_buildings/\" \\\n", - " f\"{prefix}_Buildings_{start_datetime}_{end_datetime}_tile_{tile}.geojson\",\n", + " \"href\": f\"{aoi_name}/geojson_buildings/{prefix}_Buildings\"\n", + " f\"_{start_datetime}_{end_datetime}_tile_{tile}.geojson\",\n", " \"type\": \"Buildings\",\n", " \"start_datetime\": start_datetime,\n", - " \"end_datetime\": end_datetime\n", + " \"end_datetime\": end_datetime,\n", " }" ] }, @@ -298,7 +310,9 @@ "metadata": {}, "outputs": [], "source": [ - "label = \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", + "label = (\n", + " \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", + ")\n", "source_info = get_source_info(label)\n", "source_info" ] @@ -339,7 +353,12 @@ "source": [ "def get_item_id(source_href: str, source_type: str, item_type: str) -> str:\n", " \"\"\"Helper function to return the appropriate Item ID\"\"\"\n", - " return source_href.split('/')[-1].replace(f'_{source_type}','').replace('.tif',f'_{item_type}').replace('.geojson',f'_{item_type}')" + " return (\n", + " source_href.split(\"/\")[-1]\n", + " .replace(f\"_{source_type}\", \"\")\n", + " .replace(\".tif\", f\"_{item_type}\")\n", + " .replace(\".geojson\", f\"_{item_type}\")\n", + " )" ] }, { @@ -354,16 +373,16 @@ " from a source image Asset, and adds the rest of the images as Assets\n", " \"\"\"\n", " sources = get_source_info(label_path)\n", - " \n", + "\n", " # we need the first source object\n", " first_source = sources[0]\n", - " \n", + "\n", " # rio-stac by default provides the filepath, so we override the item id\n", " item_id = get_item_id(first_source[\"href\"], first_source[\"type\"], \"source\")\n", - " \n", + "\n", " # Bootstrap the source item using rio-stac based on the first asset\n", " with rasterio.open(sources[0][\"href\"]) as src:\n", - " \n", + "\n", " item = create_stac_item(\n", " id=item_id,\n", " source=src,\n", @@ -373,23 +392,25 @@ " input_datetime=str_to_datetime(first_source[\"start_datetime\"]),\n", " with_proj=True,\n", " )\n", - " \n", + "\n", " # rio-stac does not add the Asset \"type\" or \"title\" fields, so we add them manually\n", " # (all assets are Cloud-Optimized GeoTIFFs)\n", " item.assets[first_source[\"type\"]].type = MediaType.COG\n", " item.assets[first_source[\"type\"]].title = first_source[\"type\"]\n", - " \n", - " # Since the spatiotemporal metadata is the same for all assets, we do not need to read \n", + "\n", + " # Since the spatiotemporal metadata is the same for all assets, we do not need to read\n", " # each one.\n", " for source in sources[1:]:\n", - " asset = pystac.Asset.from_dict({\n", - " \"href\": source[\"href\"],\n", - " \"roles\": [\"data\"],\n", - " \"type\": str(MediaType.COG),\n", - " \"title\": source[\"type\"]\n", - " })\n", + " asset = pystac.Asset.from_dict(\n", + " {\n", + " \"href\": source[\"href\"],\n", + " \"roles\": [\"data\"],\n", + " \"type\": str(MediaType.COG),\n", + " \"title\": source[\"type\"],\n", + " }\n", + " )\n", " item.add_asset(source[\"type\"], asset)\n", - " \n", + "\n", " return item" ] }, @@ -437,7 +458,7 @@ "source": [ "def get_item_datetime(dt_str: str) -> dt.datetime:\n", " \"\"\"Returns an items datetime based on ID string pattern\"\"\"\n", - " return dt.datetime.strptime(str(dt_str), '%Y%m%d%H%M%S') #20190804120223" + " return dt.datetime.strptime(str(dt_str), \"%Y%m%d%H%M%S\")" ] }, { @@ -448,8 +469,10 @@ "outputs": [], "source": [ "def get_geojson_extent(fname: str) -> Polygon:\n", - " \"\"\"Takes a path to GeoJSON vector file and returns the Polygon geometry for an Item reprojected\"\"\"\n", - " \n", + " \"\"\"Takes a path to GeoJSON vector file and returns\n", + " the Polygon geometry for an Item reprojected\n", + " \"\"\"\n", + "\n", " gdf = gpd.read_file(fname)\n", " gdf = gdf.to_crs(\"EPSG:4326\")\n", " bounds = gdf.total_bounds\n", @@ -459,7 +482,7 @@ " (bounds[0], bounds[3]),\n", " (bounds[2], bounds[3]),\n", " (bounds[2], bounds[1]),\n", - " (bounds[0], bounds[1])\n", + " (bounds[0], bounds[1]),\n", " )\n", " )\n", " return geometry" @@ -475,28 +498,21 @@ "def add_label_extension(label: Item, label_meta: Dict[str, any]) -> Item:\n", " \"\"\"This applies the STAC LabelExtension to the label item and related properties\"\"\"\n", " # apply the Label Extention\n", - " label_ext = LabelExtension.ext(\n", - " label, \n", - " add_if_missing = True\n", - " )\n", + " label_ext = LabelExtension.ext(label, add_if_missing=True)\n", "\n", " label_ext.apply(\n", - " label_description = \"SpaceNet 6 Building Footprints\",\n", - " label_type = 'vector'\n", + " label_description=\"SpaceNet 6 Building Footprints\", label_type=\"vector\"\n", " )\n", "\n", " # instantiate GeoJSON Asset\n", - " asset=Asset(\n", - " href = label_meta[\"href\"],\n", - " media_type = MediaType.GEOJSON,\n", + " asset = Asset(\n", + " href=label_meta[\"href\"],\n", + " media_type=MediaType.GEOJSON,\n", " )\n", "\n", " # add GeoTiff Asset to item\n", - " label.add_asset(\n", - " key = 'buildings',\n", - " asset = asset\n", - " )\n", - " \n", + " label.add_asset(key=\"buildings\", asset=asset)\n", + "\n", " return label" ] }, @@ -512,19 +528,21 @@ " from a geojson label path and adds it as the Asset\n", " \"\"\"\n", " label_meta = get_label_info(label_path)\n", - " \n", + "\n", " # rio-stac by default provides the filepath, so we override the item id\n", - " item_id = get_item_id(label_meta[\"href\"], label_meta[\"type\"], \"labels\").replace('_' + label_meta['type'],'')\n", + " item_id = get_item_id(label_meta[\"href\"], label_meta[\"type\"], \"labels\").replace(\n", + " \"_\" + label_meta[\"type\"], \"\"\n", + " )\n", " item_geometry = get_geojson_extent(label_meta[\"href\"])\n", - " \n", + "\n", " return add_label_extension(\n", " Item(\n", " id=item_id,\n", - " datetime = get_item_datetime(label_meta['start_datetime']),\n", - " geometry = mapping(item_geometry),\n", - " bbox = item_geometry.bounds,\n", - " properties = {}\n", - " ), \n", + " datetime=get_item_datetime(label_meta[\"start_datetime\"]),\n", + " geometry=mapping(item_geometry),\n", + " bbox=item_geometry.bounds,\n", + " properties={},\n", + " ),\n", " label_meta,\n", " )" ] @@ -537,13 +555,11 @@ "outputs": [], "source": [ "def add_label_source_link(source: Item, label: Item) -> Item:\n", - " \"\"\"Takes a 1:1 source to label item relationship, and adds the source link to label Item\"\"\"\n", - " \n", - " source_link = Link(\n", - " rel = 'source',\n", - " target = source,\n", - " media_type = MediaType.COG\n", - " )\n", + " \"\"\"Takes a 1:1 source to label item relationship,\n", + " and adds the source link to label Item\n", + " \"\"\"\n", + "\n", + " source_link = Link(rel=\"source\", target=source, media_type=MediaType.COG)\n", " label.add_link(source_link)" ] }, @@ -608,12 +624,12 @@ "catalog_id = \"spacenet_6_rotterdam\"\n", "catalog_title = \"SpaceNet Multi-Sensor All-Weather Mapping Challenge - Rotterdam\"\n", "catalog_description = \"\"\"\n", - "In this challenge, the training dataset contained both SAR and EO imagery, however, \n", - "the testing and scoring datasets contained only SAR data. Consequently, the EO data \n", - "could be used for pre-processing the SAR data in some fashion, such as colorization, \n", - "domain adaptation, or image translation, but cannot be used to directly map buildings. \n", - "The dataset was structured to mimic real-world scenarios where historical EO data \n", - "may be available, but concurrent EO collection with SAR is often not possible due to \n", + "In this challenge, the training dataset contained both SAR and EO imagery, however,\n", + "the testing and scoring datasets contained only SAR data. Consequently, the EO data\n", + "could be used for pre-processing the SAR data in some fashion, such as colorization,\n", + "domain adaptation, or image translation, but cannot be used to directly map buildings.\n", + "The dataset was structured to mimic real-world scenarios where historical EO data\n", + "may be available, but concurrent EO collection with SAR is often not possible due to\n", "inconsistent orbits of the sensors, or cloud cover that will render the EO data unusable.\n", "\"\"\"" ] @@ -634,9 +650,7 @@ "outputs": [], "source": [ "sn6_catalog = Catalog(\n", - " id=catalog_id,\n", - " title=catalog_title,\n", - " description=catalog_description\n", + " id=catalog_id, title=catalog_title, description=catalog_description\n", ")" ] }, @@ -654,7 +668,13 @@ "labels_collection_id = \"spacenet_6_rotterdam_labels\"\n", "labels_collection_title = \"SpaceNet 6 Rotterdam Labels\"\n", "\n", - "citation = \"Shermeyer, J., Hogan, D., Brown, J., Etten, A.V., Weir, N., Pacifici, F., Hänsch, R., Bastidas, A., Soenen, S., Bacastow, T.M., & Lewis, R. (2020). SpaceNet 6: Multi-Sensor All Weather Mapping Dataset. 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), 768-777.\"\n", + "citation = \"\"\"\n", + "Shermeyer, J., Hogan, D., Brown, J., Etten, A.V., Weir, N., Pacifici, F.,\n", + "Hänsch, R., Bastidas, A., Soenen, S., Bacastow, T.M., & Lewis, R. (2020).\n", + "SpaceNet 6: Multi-Sensor All Weather Mapping Dataset. 2020 IEEE/CVF\n", + "Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), 768-777.\n", + "\"\"\"\n", + "\n", "license = \"CC-BY-SA-4.0\"" ] }, @@ -677,11 +697,10 @@ " \"\"\"Returns a default spatial and temporal Extent STAC object\"\"\"\n", " # default spatial extent is the entire globe\n", " default_spatial_extent = SpatialExtent([[-180, -90, 180, 90]])\n", - " \n", + "\n", " # default temporal extent is the current date\n", - " right_now = dt.datetime.now().strftime('%Y-%m-%d')\n", " default_temporal_extent = TemporalExtent([[]])\n", - " \n", + "\n", " return Extent(default_spatial_extent, default_temporal_extent)" ] }, @@ -695,15 +714,12 @@ "def create_collection(id, description, license, citation):\n", " \"\"\"Creates a skeleton Collection with required properties\"\"\"\n", " collection = Collection(\n", - " id=id,\n", - " license=license,\n", - " extent=get_default_extent(),\n", - " description=description\n", + " id=id, license=license, extent=get_default_extent(), description=description\n", " )\n", - " \n", + "\n", " sci_ext = ScientificExtension.ext(collection, add_if_missing=True)\n", " sci_ext.apply(citation=citation)\n", - " \n", + "\n", " return collection" ] }, @@ -715,10 +731,7 @@ "outputs": [], "source": [ "sn6_source_collection = create_collection(\n", - " source_collection_id, \n", - " source_collection_title,\n", - " license,\n", - " citation\n", + " source_collection_id, source_collection_title, license, citation\n", ")" ] }, @@ -730,10 +743,7 @@ "outputs": [], "source": [ "sn6_labels_collection = create_collection(\n", - " labels_collection_id, \n", - " labels_collection_title,\n", - " license,\n", - " citation\n", + " labels_collection_id, labels_collection_title, license, citation\n", ")" ] }, @@ -760,7 +770,9 @@ "metadata": {}, "outputs": [], "source": [ - "label_paths = [f for f in os.listdir(aoi_dir / \"geojson_buildings\") if f.endswith('geojson')]" + "label_paths = [\n", + " f for f in os.listdir(aoi_dir / \"geojson_buildings\") if f.endswith(\"geojson\")\n", + "]" ] }, { @@ -772,16 +784,16 @@ "source": [ "for label_path in label_paths:\n", " # get the geojson label filename\n", - " label_filename = label_path.split('/')[-1]\n", - " print(f'Creating source and label items from {label_filename}')\n", - " \n", + " label_filename = label_path.split(\"/\")[-1]\n", + " print(f\"Creating source and label items from {label_filename}\")\n", + "\n", " # create the source and label items for a given label path\n", " source_item = create_source_item(label_filename)\n", " label_item = create_label_item(label_filename)\n", - " \n", + "\n", " # add the source link to label item\n", " add_label_source_link(source_item, label_item)\n", - " \n", + "\n", " # add the source and label items to collections\n", " sn6_source_collection.add_item(source_item)\n", " sn6_labels_collection.add_item(label_item)" @@ -907,7 +919,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.chdir('/home/jovyan/PlanetaryComputerExamples/tutorials')" + "os.chdir(\"/home/jovyan/PlanetaryComputerExamples/tutorials\")" ] }, { @@ -917,7 +929,7 @@ "metadata": {}, "outputs": [], "source": [ - "output_archive_filename = f'{data_dir.name}.tar.gz'\n", + "output_archive_filename = f\"{data_dir.name}.tar.gz\"\n", "create_tar_gz(output_archive_filename, data_dir.as_posix())" ] }, @@ -989,7 +1001,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.7" } }, "nbformat": 4,