Skip to content

Commit

Permalink
Fixing tiling
Browse files Browse the repository at this point in the history
  • Loading branch information
Erick Verleye committed Jun 26, 2023
1 parent 0ccf734 commit fc78deb
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 38 deletions.
44 changes: 30 additions & 14 deletions bin/upload_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import os

from src.utilities.aws import upload_files
from file_types import MultiVariateComposite, TrainedModel, InferenceResultsCSV, InferenceResultsShapefile, File
from file_types import MultiVariateComposite, TrainedModel, InferenceResultsCSV, InferenceResultsShapefile, File,\
InferenceResultsTarfile


if __name__ == "__main__":
Expand All @@ -34,17 +35,23 @@
# Inference file arguments
inference_parser = argparse.ArgumentParser(add_help=False)
inference_parser.add_argument('--regions', nargs='+', required=False,
help='If specified, only files made from these regions will be uploaded, otherwise all region combinations will be found')
help='If specified, only files made from these regions will be uploaded, otherwise'
' all region combinations will be found')
inference_parser.add_argument('--architecture', required=False, type=str,
help='If specified, only files of this architecture will be uploaded, otherwise all architectures will be found')
help='If specified, only files of this architecture will be uploaded, otherwise all '
'architectures will be found')
inference_parser.add_argument('--layers', required=False, nargs='+',
help='If specified, only files made from these layers will be uploaded, otherwise all layer combinations will be found')
help='If specified, only files made from these layers will be uploaded, otherwise '
'all layer combinations will be found')
inference_parser.add_argument('--epoch', required=False, type=int,
help='If specified, only files from this epoch will be uploaded, otherwise all epochs will be found')
help='If specified, only files from this epoch will be uploaded, otherwise all epochs'
' will be found')
inference_parser.add_argument('--ratio', required=False, type=float,
help='If specified only files of this no bridge / bridge ratio will be uploaded, otherwise all ratios will be found')
help='If specified only files of this no bridge / bridge ratio will be uploaded,'
' otherwise all ratios will be found')
inference_parser.add_argument('--tile_size', required=False, type=int,
help='If specified only files of this tile size will be uploaded, otherwise all tile sizes will be found')
help='If specified only files of this tile size will be uploaded, otherwise all tile'
' sizes will be found')
inference_parser.add_argument('--best', required=False, action='store_true',
help='If set, only files marked as best will be uploaded')

Expand All @@ -56,18 +63,27 @@
if args.file_type == 'composites':
files = MultiVariateComposite.find_files(region=args.region, district=args.district, mgrs=args.mgrs)
elif args.file_type == 'models':
files = TrainedModel.find_files(regions=args.regions, architecture=args.architecture, layers=args.layers, epoch=args.epoch, ratio=args.ratio, tile_size=args.tile_size, best=args.best)
files = TrainedModel.find_files(regions=args.regions, architecture=args.architecture, layers=args.layers,
epoch=args.epoch, ratio=args.ratio, tile_size=args.tile_size, best=args.best)
elif args.file_type == 'inference_results':
shape_files = InferenceResultsShapefile.find_files(regions=args.regions, architecture=args.architecture, layers=args.layers, epoch=args.epoch, ratio=args.ratio, tile_size=args.tile_size, best=args.best)
shape_files = InferenceResultsShapefile.find_files(regions=args.regions, architecture=args.architecture,
layers=args.layers, epoch=args.epoch, ratio=args.ratio,
tile_size=args.tile_size, best=args.best)
files = []
for shape_file in shape_files:
if not os.path.exists(shape_file.tar_file.archive_path):
shape_file.create_tar_file()
files.append(shape_file.tar_file.archive_path)
files += InferenceResultsCSV.find_files(regions=args.regions, architecture=args.architecture, layers=args.layers, epoch=args.epoch, ratio=args.ratio, tile_size=args.tile_size, best=args.best)
shape_file_object = InferenceResultsShapefile.create(shape_file)
if not shape_file_object.tar_file.exists:
shape_file_object.create_tar_file()
files.append(shape_file_object.tar_file.archive_path)
files += InferenceResultsCSV.find_files(regions=args.regions, architecture=args.architecture,
layers=args.layers, epoch=args.epoch, ratio=args.ratio,
tile_size=args.tile_size, best=args.best)
else:
raise ValueError('Missing first positional argument for file type. Must be one of [composites, models, inference_results]')
raise ValueError('Missing first positional argument for file type. Must be one of [composites, models,'
' inference_results]')

print(f'Found {len(files)} files to upload')

# TODO: Parallelize this

upload_files([File.create(f) for f in files], args.s3_bucket_name)
4 changes: 3 additions & 1 deletion file_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,9 @@ class InferenceResultsShapefile(_BaseInferenceFiles):
def __init__(self, regions: List[str], architecture: str, layers: List[str], epoch: int, ratio: float,
tile_size: int, best: bool = False):
super().__init__(regions, architecture, layers, epoch, ratio, tile_size, best)
self.tar_file = InferenceResultsTarfile(regions=self.regions, architecture=self.architecture, layers=self.layers, epoch=self.epoch, ratio=self.ratio, tile_size=self.tile_size, best=self.best)
self.tar_file = InferenceResultsTarfile(regions=self.regions, architecture=self.architecture,
layers=self.layers, epoch=self.epoch, ratio=self.ratio,
tile_size=self.tile_size, best=self.best)

@property
def name(self) -> str:
Expand Down
43 changes: 20 additions & 23 deletions src/utilities/imaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,23 +318,22 @@ def composite_to_tiles(composite: MultiVariateComposite, bridge_locations: Union
df (pd.DataFrame): Dataframe with file and geographic location of each tile, its bounding box, and if there was
ground truth data, whether the tile contains a bridge or not
"""
grid_geoloc_file = SingleRegionTileMatch(tile_size)
grid_geoloc_file = SingleRegionTileMatch(region=composite.region, tile_size=tile_size, district=composite.district,
military_grid=composite.mgrs)

grid_geoloc_path = grid_geoloc_file.archive_path(composite.region, composite.district, composite.mgrs,
create_dir=True)
if os.path.exists(grid_geoloc_path):
df = pd.read_csv(grid_geoloc_path)
if grid_geoloc_file.exists:
df = pd.read_csv(grid_geoloc_file.archive_path)
return df

rf = gdal.Open(composite.archive_path())
rf = gdal.Open(composite.archive_path)
_, xres, _, _, _, yres = rf.GetGeoTransform()
nxpix = int(tile_size / abs(xres))
nypix = int(tile_size / abs(yres))
xsteps = np.arange(0, rf.RasterXSize, nxpix).astype(np.int64).tolist()
ysteps = np.arange(0, rf.RasterYSize, nypix).astype(np.int64).tolist()

if bridge_locations is not None:
bbox = tiff_to_bbox(composite.archive_path())
bbox = tiff_to_bbox(composite.archive_path)
this_bridge_locs = []
p = polygon.Polygon(bbox)
for loc in bridge_locations:
Expand Down Expand Up @@ -362,44 +361,42 @@ def composite_to_tiles(composite: MultiVariateComposite, bridge_locations: Union
k = 0
for xmin in xsteps:
for ymin in ysteps:
tile_tiff = Tile(x_min=xmin, y_min=ymin)
tile_tiff_path = tile_tiff.archive_path(composite.region, composite.district, composite.mgrs,
tile_size=tile_size, create_dir=True)
pt_file = PyTorch(x_min=xmin, y_min=ymin)
pt_file_path = pt_file.archive_path(composite.region, composite.district, composite.mgrs,
tile_size=tile_size)
if not os.path.isfile(tile_tiff_path):
tile_tiff = Tile(region=composite.region, district=composite.district, military_grid=composite.mgrs,
tile_size=tile_size, x_min=xmin, y_min=ymin)
pt_file = PyTorch(region=composite.region, district=composite.district, military_grid=composite.mgrs,
tile_size=tile_size, x_min=xmin, y_min=ymin)
if not tile_tiff.exists:
gdal.Translate(
tile_tiff_path,
tile_tiff.archive_path,
rf,
srcWin=(xmin, ymin, nxpix, nypix),
)
bbox = tiff_to_bbox(tile_tiff_path)
df.at[k, 'tile'] = pt_file_path
bbox = tiff_to_bbox(tile_tiff.archive_path)
df.at[k, 'tile'] = pt_file.archive_path
df.at[k, 'bbox'] = bbox
if bridge_locations is not None:
df.at[k, 'is_bridge'], df.at[k, 'bridge_loc'], ix = bridge_in_bbox(bbox, this_bridge_locs)
if ix is not None:
this_bridge_locs.pop(ix)
if not os.path.exists(pt_file_path):
with rasterio.open(tile_tiff_path, 'r') as tmp:
if not pt_file.exists:
with rasterio.open(tile_tiff.archive_path, 'r') as tmp:
scale_img = tmp.read()
scale_img = np.moveaxis(scale_img, 0, -1) # make dims be c, w, h
scale_img = scale(scale_img)
tensor = torch_transformer(scale_img)
torch.save(tensor, pt_file_path)
torch.save(tensor, pt_file.archive_path)
if remove_tiff:
os.remove(tile_tiff_path)
os.remove(tile_tiff.archive_path)
k += 1
if k % tqdm_update_rate == 0:
pbar.update(tqdm_update_rate)
pbar.refresh()
if k % int(round(numTiles / 4)) == 0 and k < numTiles - 1:
percent = int(round(k / int(round(numTiles)) * 100))
pbar.set_description(f'Saving {composite.mgrs} {percent}%')
df.to_csv(grid_geoloc_path, index=False)
df.to_csv(grid_geoloc_file.archive_path, index=False)
pbar.set_description(f'Saving to file {grid_geoloc_file}')
df.to_csv(grid_geoloc_path, index=False)
df.to_csv(grid_geoloc_file.archive_path, index=False)
return df


Expand Down

0 comments on commit fc78deb

Please sign in to comment.