Skip to content

Commit

Permalink
fix cli scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
mayrajeo committed Feb 19, 2024
1 parent d65f91e commit a3de1a0
Show file tree
Hide file tree
Showing 18 changed files with 375 additions and 375 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,7 @@ checklink/cookies.txt

# Workflow example data
nbs/workflow_examples/

# Pytorch models

*.pt
2 changes: 1 addition & 1 deletion geo2ml/data/cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

# %% ../../nbs/13_data.cv.ipynb 3
from .coordinates import *
import rasterio as rio
from pathlib import Path
import os
import yaml
Expand All @@ -13,7 +14,6 @@
from tqdm.auto import tqdm
import geopandas as gpd
import pandas as pd
import rasterio as rio
import numpy as np
from shapely.geometry import box
import datetime
Expand Down
2 changes: 1 addition & 1 deletion geo2ml/data/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

# %% ../../nbs/10_data.tabular.ipynb 3
from fastcore.basics import *
import rasterio as rio
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio as rio
import logging
from rasterstats import zonal_stats
from pathlib import Path
Expand Down
2 changes: 1 addition & 1 deletion geo2ml/data/tiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
__all__ = ['Tiler', 'untile_raster', 'copy_sum', 'untile_vector']

# %% ../../nbs/12_data.tiling.ipynb 4
import rasterio as rio
import numpy as np
import itertools
import pandas as pd
Expand All @@ -14,7 +15,6 @@
from tqdm.auto import tqdm
import shapely
from shapely.geometry import box
import rasterio as rio
import rasterio.mask as rio_mask
import rasterio.windows as rio_windows
import fiona
Expand Down
2 changes: 1 addition & 1 deletion geo2ml/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

# %% ../nbs/31_plotting.ipynb 3
from fastcore.basics import *
import rasterio as rio
import os
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio as rio
import rasterio.plot as rioplot
import matplotlib.patches as mpatches
from PIL import Image
Expand Down
54 changes: 44 additions & 10 deletions geo2ml/scripts/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

# %% ../../nbs/41_scripts.data.ipynb 3
from fastcore.script import *
import rasterio as rio
import datetime
import geopandas as gpd
import rasterio as rio
from pathlib import Path
import os

Expand Down Expand Up @@ -139,6 +139,8 @@ def create_raster_dataset(
mask_path: Path, # Path to corresponding mask raster or polygon layer. Must have the same extent and resolution as the raster in `raster_path`
outpath: Path, # Where to save the results
save_grid: bool = False, # Whether to save the tiling grid
allow_partial_data: bool = False, # Whether to create tiles that have only partial data
keep_bg_only: bool = False, # Keep the mask chips that contain only the background class
target_column: str = None, # If mask_path contains vector data, identifier of the column containing the class information
gpkg_layer: str = None, # If `polygon_path` is a geopackage, specify the layer used. Ignored otherwise.
gridsize_x: int = 256, # Size of tiles in x-axis in pixels
Expand All @@ -153,7 +155,7 @@ def create_raster_dataset(
gridsize_y=gridsize_y,
overlap=(overlap_x, overlap_y),
)
tiler.tile_raster(raster_path)
tiler.tile_raster(raster_path, allow_partial_data=allow_partial_data)

polygon_extensions = [".shp", ".geojson", ".gpkg"]
raster_extensions = [".tif"]
Expand All @@ -167,7 +169,11 @@ def create_raster_dataset(
"If mask_path contains polygon data, target_column must be provided"
)
tiler.tile_and_rasterize_vector(
raster_path, mask_path, column=target_column, gpkg_layer=gpkg_layer
raster_path,
mask_path,
column=target_column,
gpkg_layer=gpkg_layer,
keep_bg_only=keep_bg_only,
)
os.rename(tiler.rasterized_vector_path, outpath / "mask_images")
if save_grid:
Expand All @@ -183,7 +189,9 @@ def create_coco_dataset(
dataset_name: str, # Name of the dataset
gpkg_layer: str = None, # If `polygon_path` is a geopackage, specify the layer used. Ignored otherwise.
min_area_pct: float = 0.0, # How small polygons keep after tiling?
output_format: str = "geojson", # Which format to use for saving, either 'geojson' or 'gpkg'
save_grid: bool = False, # Should tiling grid be saved
allow_partial_data: bool = False, # Whether to create tiles that have only partial image data
gridsize_x: int = 320, # Size of tiles in x-axis in pixels
gridsize_y: int = 320, # Size of tiles in y-axis in pixels
overlap_x: int = 0, # Overlap of tiles in x-axis in pixels
Expand All @@ -198,8 +206,13 @@ def create_coco_dataset(
gridsize_y=gridsize_y,
overlap=(overlap_x, overlap_y),
)
tiler.tile_raster(raster_path)
tiler.tile_vector(polygon_path)
tiler.tile_raster(raster_path, allow_partial_data=allow_partial_data)
tiler.tile_vector(
polygon_path,
min_area_pct=min_area_pct,
gpkg_layer=gpkg_layer,
output_format=output_format,
)

cats = gpd.read_file(polygon_path)[target_column].unique()

Expand All @@ -215,9 +228,16 @@ def create_coco_dataset(
}

coco_licenses = {}

match output_format:
case "geojson":
vector_path = outpath / "vectors"
case "gpkg":
vector_path = outpath / "vectors.gpkg"

shp_to_coco(
outpath / "images",
outpath / "vectors",
vector_path,
outpath,
label_col=target_column,
dataset_name=dataset_name,
Expand All @@ -239,7 +259,9 @@ def create_yolo_dataset(
dataset_name: str = None, # Optional name of the dataset
gpkg_layer: str = None, # If `polygon_path` is a geopackage, specify the layer used. Ignored otherwise.
min_area_pct: float = 0.0, # How small polygons keep after tiling?
save_grid: bool = False, # Should tiling grid be saved?
output_format: str = "geojson", # Which format to use for saving, either 'geojson' or 'gpkg'
save_grid: bool = False, # Should tiling grid be saved
allow_partial_data: bool = False, # Whether to create tiles that have only partial image data
gridsize_x: int = 320, # Size of tiles in x-axis, pixels
gridsize_y: int = 320, # Size fo tiles in y-axis, pixels
overlap_x: int = 0, # Overlap of tiles in x-axis
Expand All @@ -254,12 +276,24 @@ def create_yolo_dataset(
gridsize_y=gridsize_y,
overlap=(overlap_x, overlap_y),
)
tiler.tile_raster(raster_path)
tiler.tile_vector(polygon_path)
tiler.tile_raster(raster_path, allow_partial_data=allow_partial_data)
tiler.tile_vector(
polygon_path,
min_area_pct=min_area_pct,
gpkg_layer=gpkg_layer,
output_format=output_format,
)
cats = gpd.read_file(polygon_path)[target_column].unique()

match output_format:
case "geojson":
vector_path = outpath / "vectors"
case "gpkg":
vector_path = outpath / "vectors.gpkg"

shp_to_yolo(
outpath / "images",
outpath / "vectors",
vector_path,
outpath,
label_col=target_column,
names=cats,
Expand Down
2 changes: 1 addition & 1 deletion nbs/10_data.tabular.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"source": [
"#| export\n",
"from fastcore.basics import *\n",
"import rasterio as rio\n",
"import pandas as pd\n",
"import numpy as np\n",
"import geopandas as gpd\n",
"import rasterio as rio\n",
"import logging\n",
"from rasterstats import zonal_stats\n",
"from pathlib import Path"
Expand Down
2 changes: 1 addition & 1 deletion nbs/12_data.tiling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"source": [
"#| export\n",
"\n",
"import rasterio as rio\n",
"import numpy as np\n",
"import itertools\n",
"import pandas as pd\n",
Expand All @@ -58,7 +59,6 @@
"from tqdm.auto import tqdm\n",
"import shapely\n",
"from shapely.geometry import box\n",
"import rasterio as rio\n",
"import rasterio.mask as rio_mask\n",
"import rasterio.windows as rio_windows\n",
"import fiona\n",
Expand Down
2 changes: 1 addition & 1 deletion nbs/13_data.cv.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"#| export\n",
"\n",
"from geo2ml.data.coordinates import *\n",
"import rasterio as rio\n",
"from pathlib import Path\n",
"import os\n",
"import yaml\n",
Expand All @@ -50,7 +51,6 @@
"from tqdm.auto import tqdm\n",
"import geopandas as gpd\n",
"import pandas as pd\n",
"import rasterio as rio\n",
"import numpy as np\n",
"from shapely.geometry import box\n",
"import datetime\n",
Expand Down
2 changes: 1 addition & 1 deletion nbs/31_plotting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@
"#| export\n",
"\n",
"from fastcore.basics import *\n",
"import rasterio as rio\n",
"import os\n",
"import sys\n",
"from pathlib import Path\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import rasterio as rio\n",
"import rasterio.plot as rioplot\n",
"import matplotlib.patches as mpatches\n",
"from PIL import Image"
Expand Down
40 changes: 30 additions & 10 deletions nbs/41_scripts.data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@
"#| export\n",
"\n",
"from fastcore.script import *\n",
"import rasterio as rio\n",
"import datetime\n",
"import geopandas as gpd\n",
"import rasterio as rio\n",
"from pathlib import Path\n",
"import os\n",
"\n",
Expand Down Expand Up @@ -242,6 +242,8 @@
" mask_path:Path, # Path to corresponding mask raster or polygon layer. Must have the same extent and resolution as the raster in `raster_path`\n",
" outpath:Path, # Where to save the results\n",
" save_grid:bool=False, # Whether to save the tiling grid\n",
" allow_partial_data:bool=False, # Whether to create tiles that have only partial data\n",
" keep_bg_only:bool=False, # Keep the mask chips that contain only the background class\n",
" target_column:str=None, # If mask_path contains vector data, identifier of the column containing the class information\n",
" gpkg_layer:str=None, # If `polygon_path` is a geopackage, specify the layer used. Ignored otherwise.\n",
" gridsize_x:int=256, # Size of tiles in x-axis in pixels\n",
Expand All @@ -251,7 +253,7 @@
"):\n",
" \"Create a semantic segmentation dataset from a `raster_path` and corresponding mask `mask_path`. Raster image patches are saved to `outpath/raster_tiles` and mask patches to `outpath/mask_tiles`\"\n",
" tiler = Tiler(outpath, gridsize_x=gridsize_x, gridsize_y=gridsize_y, overlap=(overlap_x, overlap_y))\n",
" tiler.tile_raster(raster_path)\n",
" tiler.tile_raster(raster_path, allow_partial_data=allow_partial_data)\n",
" \n",
" polygon_extensions = ['.shp', '.geojson', '.gpkg']\n",
" raster_extensions = ['.tif']\n",
Expand All @@ -264,7 +266,7 @@
" raise Exception(\n",
" \"If mask_path contains polygon data, target_column must be provided\"\n",
" )\n",
" tiler.tile_and_rasterize_vector(raster_path, mask_path, column=target_column, gpkg_layer=gpkg_layer)\n",
" tiler.tile_and_rasterize_vector(raster_path, mask_path, column=target_column, gpkg_layer=gpkg_layer, keep_bg_only=keep_bg_only)\n",
" os.rename(tiler.rasterized_vector_path, outpath/'mask_images')\n",
" if save_grid: tiler.grid.to_file(outpath/'grid.geojson')"
]
Expand All @@ -287,7 +289,9 @@
" dataset_name:str, # Name of the dataset\n",
" gpkg_layer:str=None, # If `polygon_path` is a geopackage, specify the layer used. Ignored otherwise.\n",
" min_area_pct:float=0.0, # How small polygons keep after tiling?\n",
" output_format:str='geojson', # Which format to use for saving, either 'geojson' or 'gpkg'\n",
" save_grid:bool=False, # Should tiling grid be saved\n",
" allow_partial_data:bool=False, # Whether to create tiles that have only partial image data\n",
" gridsize_x:int=320, # Size of tiles in x-axis in pixels\n",
" gridsize_y:int=320, # Size of tiles in y-axis in pixels\n",
" overlap_x:int=0, # Overlap of tiles in x-axis in pixels\n",
Expand All @@ -297,8 +301,8 @@
"):\n",
" \"Create a COCO-format dataset from `raster` and `polygon` shapefile\"\n",
" tiler = Tiler(outpath, gridsize_x=gridsize_x, gridsize_y=gridsize_y, overlap=(overlap_x, overlap_y))\n",
" tiler.tile_raster(raster_path)\n",
" tiler.tile_vector(polygon_path)\n",
" tiler.tile_raster(raster_path, allow_partial_data=allow_partial_data)\n",
" tiler.tile_vector(polygon_path, min_area_pct=min_area_pct, gpkg_layer=gpkg_layer, output_format=output_format)\n",
"\n",
" cats = gpd.read_file(polygon_path)[target_column].unique()\n",
"\n",
Expand All @@ -311,7 +315,14 @@
" }\n",
"\n",
" coco_licenses = {}\n",
" shp_to_coco(outpath/'images', outpath/'vectors', outpath, label_col=target_column, \n",
" \n",
" match output_format:\n",
" case 'geojson':\n",
" vector_path = outpath/'vectors'\n",
" case 'gpkg':\n",
" vector_path = outpath/'vectors.gpkg'\n",
" \n",
" shp_to_coco(outpath/'images', vector_path, outpath, label_col=target_column, \n",
" dataset_name=dataset_name, coco_info=coco_info, coco_categories=coco_cats,\n",
" min_bbox_area=min_bbox_area, rotated_bbox=ann_format=='rotated box')\n",
" if save_grid: tiler.grid.to_file(outpath/'grid.geojson')"
Expand All @@ -335,7 +346,9 @@
" dataset_name:str=None, # Optional name of the dataset\n",
" gpkg_layer:str=None, # If `polygon_path` is a geopackage, specify the layer used. Ignored otherwise.\n",
" min_area_pct:float=0.0, # How small polygons keep after tiling?\n",
" save_grid:bool=False, # Should tiling grid be saved?\n",
" output_format:str='geojson', # Which format to use for saving, either 'geojson' or 'gpkg'\n",
" save_grid:bool=False, # Should tiling grid be saved\n",
" allow_partial_data:bool=False, # Whether to create tiles that have only partial image data\n",
" gridsize_x:int=320, # Size of tiles in x-axis, pixels\n",
" gridsize_y:int=320, # Size fo tiles in y-axis, pixels\n",
" overlap_x:int=0, # Overlap of tiles in x-axis\n",
Expand All @@ -345,10 +358,17 @@
"):\n",
" \"Create a YOLO-format dataset from `raster` and `polygon` shapefile\"\n",
" tiler = Tiler(outpath, gridsize_x=gridsize_x, gridsize_y=gridsize_y, overlap=(overlap_x, overlap_y))\n",
" tiler.tile_raster(raster_path)\n",
" tiler.tile_vector(polygon_path)\n",
" tiler.tile_raster(raster_path, allow_partial_data=allow_partial_data)\n",
" tiler.tile_vector(polygon_path, min_area_pct=min_area_pct, gpkg_layer=gpkg_layer, output_format=output_format)\n",
" cats = gpd.read_file(polygon_path)[target_column].unique()\n",
" shp_to_yolo(outpath/'images', outpath/'vectors', outpath, label_col=target_column,\n",
"\n",
" match output_format:\n",
" case 'geojson':\n",
" vector_path = outpath/'vectors'\n",
" case 'gpkg':\n",
" vector_path = outpath/'vectors.gpkg'\n",
" \n",
" shp_to_yolo(outpath/'images', vector_path, outpath, label_col=target_column,\n",
" names=cats, dataset_name=dataset_name, ann_format=ann_format, min_bbox_area=0)\n",
" if save_grid: tiler.grid.to_file(outpath/'grid.geojson')"
]
Expand Down
Binary file modified nbs/example_data/R70C21.dbf
Binary file not shown.
2 changes: 1 addition & 1 deletion nbs/example_data/tiles/coco_norm.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion nbs/example_data/tiles/coco_rot.json

Large diffs are not rendered by default.

Binary file modified nbs/example_data/tiles/vectors.gpkg
Binary file not shown.
Binary file modified nbs/example_data/tiles_partial/vectors.gpkg
Binary file not shown.
Loading

0 comments on commit a3de1a0

Please sign in to comment.