Skip to content

Commit

Permalink
add option to valhalla_build_extract to use existing tar file for loo…
Browse files Browse the repository at this point in the history
…kup/copy (valhalla#4255)

* add option to valhalla_build_extract to make an extract tar from an existing extract tar

* format and some cleanup

* changelog

* black

* amend tests

* deduplicate before writing to tar.. geojsons might catch many duplicates

* make the parent dir of the output tar file

* oops

* more oops

* one more ooops

* changelog
  • Loading branch information
nilsnolde authored Aug 29, 2023
1 parent f14e32a commit 9c8bd05
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 78 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
* ADDED: support for `:forward` and `:backward` for `motor_vehicle`, `vehicle`, `foot` and `bicycle` tag prefixes [#4204](https://github.com/valhalla/valhalla/pull/4204)
* ADDED: add `valhalla_build_landmarks` to parse POIs from osm pbfs and store them as landmarks in the landmark sqlite database [#4201](https://github.com/valhalla/valhalla/pull/4201)
* ADDED: add primary key in the landmark sqlite database and a method to retrieve landmarks via their primary keys [#4224](https://github.com/valhalla/valhalla/pull/4224)
* ADDED: `sources_to_targets` action for `/expansion` [#4263](https://github.com/valhalla/valhalla/pull/4263)
* ADDED: update graph tile to allow adding landmarks to edge info, and refactor edgeinfo.cc [#4233](https://github.com/valhalla/valhalla/pull/4233)
* ADDED: `sources_to_targets` action for `/expansion` [#4263](https://github.com/valhalla/valhalla/pull/4263)
* ADDED: option `--extract-tar` to `valhalla_build_extract` to create extracts from .tar files instead of tile directory [#4255](https://github.com/valhalla/valhalla/pull/4255)
* ADDED: Support for `bannerInstructions` attribute in OSRM serializer via `banner_instructions` request parameter [#4093](https://github.com/valhalla/valhalla/pull/4093)
* UPDATED: submodules which had new releases, unless it was a major version change [#4231](https://github.com/valhalla/valhalla/pull/4231)

Expand Down
156 changes: 107 additions & 49 deletions scripts/valhalla_build_extract
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import sys
import tarfile
from tarfile import BLOCKSIZE
from time import time
from typing import List, Tuple, Union, Set
from typing import List, Tuple, Optional

# "<" prefix means little-endian and no alignment
# order is important! if uint64_t is not first, c++ will use padding bytes to unpack
Expand Down Expand Up @@ -53,6 +53,51 @@ class TileHeader(ctypes.Structure):
]


class TileResolver:
def __init__(self, path: Path):
"""
Abstraction so we don't have to care whether we're looking at a tile directory or tar file.
:param path: path to the tile directory or tar file.
"""
self.path = path.resolve()
self._is_tar = path.is_file()
self._tar_obj: Optional[tarfile.TarFile] = tarfile.open(self.path, "r") if self._is_tar else None

self.normalized_tile_paths: List[Path] = list()
self.matched_paths: List[Path] = list()

# pre-populate the available paths
if self._is_tar:
self.normalized_tile_paths = sorted(
[Path(m.name) for m in self._tar_obj.getmembers() if m.name.endswith('.gph')]
)
else:
self.normalized_tile_paths = sorted(
p.relative_to(self.path) for p in self.path.rglob('*.gph')
)

def __del__(self):
# close the tar object on GC
if self._tar_obj:
self._tar_obj.close()

def add_to_tar(self, tar: tarfile.TarFile):
"""
Adds the self.matched_paths to the passed tar file.
"""
# deduplicate the list (geojson variant might've added dups)
# since 3.7 python dicts are insertion-ordered, so order is preserved
for t in list(dict.fromkeys(self.matched_paths)):
LOGGER.debug(f"Adding tile {t} to the tar file")
if self._is_tar:
tar_member = self._tar_obj.getmember(str(t))
tar.addfile(tar_member, self._tar_obj.extractfile(tar_member.name))
else:
tar.add(str(self.path.joinpath(t)), arcname=t)
tar_member = tar.getmember(str(t))


description = "Builds a tar extract from the tiles in mjolnir.tile_dir to the path specified in mjolnir.tile_extract."

parser = argparse.ArgumentParser(description=description)
Expand All @@ -66,6 +111,14 @@ parser.add_argument(
type=str,
default='{}',
)
parser.add_argument(
"-e",
"--extract-tar",
help="If specified, will build an extract from an existing tar file at 'mjolnir.tile_extract' and save it to this specified path.",
type=str,
default="",
)
parser.add_argument("-O", "--overwrite", help="Overwrites an output tar file", action="store_true")
parser.add_argument(
"-t", "--with-traffic", help="Flag to add a traffic.tar skeleton", action="store_true", default=False
)
Expand Down Expand Up @@ -117,7 +170,7 @@ def get_tile_bbox(tile_path_id: str) -> Tuple[float, float, float, float]:
return tile_base_x, tile_base_y, tile_base_x + tile_size, tile_base_y + tile_size


def get_tiles_with_geojson(all_tile_paths: List[Path], geojson_dir: Path, tiles_dir_: Path) -> Set[Path]:
def get_tiles_with_geojson(tile_resolver_: TileResolver, geojson_dir: Path):
"""Returns all tile paths intersecting with the GeoJSON (multi)polygons"""
try:
from shapely.geometry import Polygon, box
Expand Down Expand Up @@ -146,19 +199,15 @@ def get_tiles_with_geojson(all_tile_paths: List[Path], geojson_dir: Path, tiles_
polygons.append(Polygon(single_polygon[0]))
return polygons

tile_paths_ = set()
geojson_polys = get_outer_rings(geojson_dir)
for tile_path in all_tile_paths:
tile_path_id = str(tile_path.relative_to(tiles_dir_))
tile_bbox = box(*get_tile_bbox(tile_path_id))
for tile_path in tile_resolver_.normalized_tile_paths:
tile_bbox = box(*get_tile_bbox(str(tile_path)))
for poly in geojson_polys:
if poly.intersects(tile_bbox):
tile_paths_.add(tile_path)

return tile_paths_
tile_resolver_.matched_paths.append(tile_path)


def get_tiles_with_bbox(all_tile_paths: List[Path], bbox_str: str, tiles_dir_: Path) -> Set[Path]:
def get_tiles_with_bbox(tile_resolver_: TileResolver, bbox_str: str):
"""Returns all tile paths intersecting with the bbox"""
try:
bbox = Bbox(*[float(x) for x in bbox_str.split(",")])
Expand All @@ -173,11 +222,9 @@ def get_tiles_with_bbox(all_tile_paths: List[Path], bbox_str: str, tiles_dir_: P
LOGGER.critical(f"Bbox invalid: {list(bbox)}")
sys.exit(1)

tile_paths_ = set()
for tile_path in all_tile_paths:
tile_path_id = str(tile_path.relative_to(tiles_dir_))
tile_bbox = Bbox(*get_tile_bbox(tile_path_id))
# check if tile_bbox is outside of bbox
for tile_path in tile_resolver_.normalized_tile_paths:
tile_bbox = Bbox(*get_tile_bbox(str(tile_path)))
# check if tile_bbox is outside bbox
if not any(
[
tile_bbox.min_x < bbox.min_x and tile_bbox.max_x < bbox.min_x, # left of bbox
Expand All @@ -186,9 +233,7 @@ def get_tiles_with_bbox(all_tile_paths: List[Path], bbox_str: str, tiles_dir_: P
tile_bbox.min_y > bbox.max_y and tile_bbox.max_y > bbox.max_y, # above bbox
]
):
tile_paths_.add(tile_path)

return tile_paths_
tile_resolver_.matched_paths.append(tile_path)


def get_tile_level_id(path: str) -> List[str]:
Expand Down Expand Up @@ -234,19 +279,11 @@ def write_index_to_tar(tar_fp_: Path):
tar.write(struct.pack(INDEX_BIN_FORMAT, *entry))


def create_extracts(config_: dict, do_traffic: bool, tile_paths_: Union[Set[Path], List[Path]]):
def create_extracts(config_: dict, do_traffic: bool, tile_resolver_: TileResolver, extract_fp: Path):
"""Actually creates the tar ball. Break out of main function for testability."""
tiles_fp: Path = Path(config_["mjolnir"].get("tile_dir", '/dev/null'))
extract_fp: Path = Path(
config_["mjolnir"].get("tile_extract") or tiles_fp.parent.joinpath('tiles.tar')
)
traffic_fp: Path = Path(
config_["mjolnir"].get("traffic_extract") or tiles_fp.parent.joinpath('traffic.tar')
)

tiles_count = len(tile_paths_)
tiles_count = len(tile_resolver_.matched_paths)
if not tiles_count:
LOGGER.critical(f"Directory {tiles_fp.resolve()} does not contain any usable graph tiles.")
LOGGER.critical(f"Couldn't find usable tiles in {tile_resolver_.path}")
sys.exit(1)

# write the in-memory index file
Expand All @@ -256,12 +293,10 @@ def create_extracts(config_: dict, do_traffic: bool, tile_paths_: Union[Set[Path

# first add the index file, then the sorted tiles to the tarfile
# TODO: come up with a smarter strategy to cluster the tiles in the tar
extract_fp.parent.mkdir(parents=True, exist_ok=True)
with tarfile.open(extract_fp, 'w') as tar:
tar.addfile(get_tar_info(INDEX_FILE, index_size), index_fd)
for t in tile_paths_:
rel_path = str(t.relative_to(tiles_fp))
LOGGER.debug(f"Adding tile {rel_path} to the path")
tar.add(str(t.resolve()), arcname=rel_path)
tile_resolver_.add_to_tar(tar)

write_index_to_tar(extract_fp)

Expand All @@ -273,6 +308,9 @@ def create_extracts(config_: dict, do_traffic: bool, tile_paths_: Union[Set[Path
sys.exit(0)

LOGGER.info("Start creating traffic extract...")
traffic_fp: Path = Path(
config_["mjolnir"].get("traffic_extract") or extract_fp.parent.joinpath("traffic.tar")
)

# we already have the right size of the index file, simply reset it
index_fd.seek(0)
Expand Down Expand Up @@ -328,6 +366,14 @@ def create_extracts(config_: dict, do_traffic: bool, tile_paths_: Union[Set[Path
if __name__ == '__main__':
args = parser.parse_args()

# set the right logger level
if args.verbosity == 0:
LOGGER.setLevel(logging.CRITICAL)
elif args.verbosity == 1:
LOGGER.setLevel(logging.INFO)
elif args.verbosity >= 2:
LOGGER.setLevel(logging.DEBUG)

if not args.config and not args.inline_config:
LOGGER.critical("No valid config file or inline config used.")
sys.exit(1)
Expand All @@ -342,27 +388,39 @@ if __name__ == '__main__':
# override with inline-config
config.update(**json.loads(args.inline_config))

# get and validate the tiles directory
# if output file exists and not --overwrite specified, fail
tiles_extract_out = args.extract_tar or config["mjolnir"].get("tile_extract")
if not tiles_extract_out:
LOGGER.critical("No output file path specified in 'mjolnir.tile_extract' or --extract-tar.")
sys.exit(1)

tiles_extract_out = Path(tiles_extract_out)
if tiles_extract_out.is_file() and not args.overwrite:
LOGGER.critical(f"File exists. Specify --overwrite to overwrite {tiles_extract_out}")
sys.exit(1)

# prefer the tar file to extract from, fall back to the tile dir
tiles_dir: Path = Path(config["mjolnir"].get("tile_dir", '/dev/null'))
if not tiles_dir.is_dir():
# optionally use the tile extract to find the tiles
if args.extract_tar and (tiles_extract_in := Path(config["mjolnir"].get("tile_extract"))).is_file():
tile_resolver = TileResolver(tiles_extract_in)
LOGGER.debug("Using tar file to extract tiles")
# else get and validate the tiles directory
elif tiles_dir.is_dir():
tile_resolver = TileResolver(tiles_dir)
LOGGER.debug("Using graph dir to extract tiles")
else:
LOGGER.critical(
f"Directory 'mjolnir.tile_dir': {tiles_dir.resolve()} was not found on the filesystem."
f"'Can't find valid paths for 'mjolnir.tile_dir' or 'mjolnir.tile_extract' in {args.config}"
)
sys.exit(1)

# get the tile paths which intersect with the geom, if any
tile_paths: Union[Set[Path], List[Path]] = sorted(tiles_dir.rglob('*.gph'))
# get the tile paths which intersect with the geom, if any, and write to TileResolver
if args.bbox:
tile_paths = get_tiles_with_bbox(tile_paths, args.bbox, tiles_dir)
get_tiles_with_bbox(tile_resolver, args.bbox)
elif args.geojson_dir:
tile_paths = get_tiles_with_geojson(tile_paths, args.geojson_dir, tiles_dir)

# set the right logger level
if args.verbosity == 0:
LOGGER.setLevel(logging.CRITICAL)
elif args.verbosity == 1:
LOGGER.setLevel(logging.INFO)
elif args.verbosity >= 2:
LOGGER.setLevel(logging.DEBUG)
get_tiles_with_geojson(tile_resolver, args.geojson_dir)
else:
tile_resolver.matched_paths = tile_resolver.normalized_tile_paths

create_extracts(config, args.with_traffic, tile_paths)
create_extracts(config, args.with_traffic, tile_resolver, tiles_extract_out)
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/test/data/utrecht_tiles/traffic.ta
-t ${VALHALLA_SOURCE_DIR}/test/data/traffic_tiles/
COMMAND ${CMAKE_BINARY_DIR}/valhalla_build_extract
--inline-config '{"mjolnir":{"tile_dir":"test/data/utrecht_tiles","tile_extract":"test/data/utrecht_tiles/tiles.tar","traffic_extract":"test/data/utrecht_tiles/traffic.tar","concurrency":1,"logging":{"type":""}}}'
--with-traffic
--with-traffic --overwrite
COMMENT "Building Utrecht Tiles..."
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS valhalla_build_tiles valhalla_add_predicted_traffic build_timezones ${VALHALLA_SOURCE_DIR}/test/data/utrecht_netherlands.osm.pbf ${CMAKE_BINARY_DIR}/valhalla_build_extract)
Expand Down
Loading

0 comments on commit 9c8bd05

Please sign in to comment.