From 3dfa195023923fe7e1a95433c8af5eaa40a02ca7 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Fri, 16 Feb 2024 11:53:05 -0500 Subject: [PATCH] Update script to keep state of sequence files - imposm --- images/tiler-imposm/start.sh | 235 +++++++++++++++++------------------ 1 file changed, 113 insertions(+), 122 deletions(-) diff --git a/images/tiler-imposm/start.sh b/images/tiler-imposm/start.sh index cf2d7922..87e11b5f 100755 --- a/images/tiler-imposm/start.sh +++ b/images/tiler-imposm/start.sh @@ -1,175 +1,166 @@ #!/bin/bash set -e -stateFile="state.txt" -PBFFile="osm.pbf" -limitFile="limitFile.geojson" + +STATEFILE="state.txt" +PBFFILE="osm.pbf" +LIMITFILE="limitFile.geojson" # directories to keep the imposm's cache for updating the db -workDir=/mnt/data -cachedir=$workDir/cachedir -mkdir -p $cachedir -diffdir=$workDir/diff -mkdir -p $diffdir -imposm3_expire_dir=$workDir/imposm3_expire_dir -mkdir -p $imposm3_expire_dir -# imposm3_expire_state_dir=$workDir/imposm3_expire_state -# mkdir -p $imposm3_expire_state_dir -# Setting directory -settingDir=/osm +WORKDIR=/mnt/data +CACHEDIR=$WORKDIR/cachedir +DIFF_DIR=$WORKDIR/diff +IMPOSM3_EXPIRE_DIR=$WORKDIR/imposm3_expire_dir + +# # Setting directory +# settingDir=/osm # Folder to store the imposm expider files in s3 or gs BUCKET_IMPOSM_FOLDER=imposm INIT_FILE=/mnt/data/init_done -# Create config file to set variable for imposm -echo "{" > $workDir/config.json -echo "\"cachedir\": \"$cachedir\"," >> $workDir/config.json -echo "\"diffdir\": \"$diffdir\"," >> $workDir/config.json -echo "\"connection\": \"postgis://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB\"," >> $workDir/config.json -echo "\"mapping\": \"config/imposm3.json\"," >> $workDir/config.json -echo "\"replication_url\": \"$REPLICATION_URL\"" >> $workDir/config.json -echo "}" >> $workDir/config.json - -function getData () { - # Import from pubic url, usualy it come from osm - if [ $TILER_IMPORT_FROM == "osm" ]; then - wget $TILER_IMPORT_PBF_URL -O $PBFFile - fi - if [ $TILER_IMPORT_FROM == "osmseed" ]; then - if [ $CLOUDPROVIDER == "aws" ]; then +mkdir -p "$CACHE_DIR" "$DIFF_DIR" "$IMPOSM3_EXPIRE_DIR" + +# Create config file to set variables for imposm +{ + echo "{" + echo "\"cachedir\": \"$CACHE_DIR\"," + echo "\"diffdir\": \"$DIFF_DIR\"," + echo "\"connection\": \"postgis://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB\"," + echo "\"mapping\": \"config/imposm3.json\"," + echo "\"replication_url\": \"$REPLICATION_URL\"" + echo "}" +} >"$WORKDIR/config.json" + +function getData() { + """ + Get the PBF file from the cloud provider or public URL + """ + if [ "$TILER_IMPORT_FROM" == "osm" ]; then + wget "$TILER_IMPORT_PBF_URL" -O "$PBFFILE" + elif [ "$TILER_IMPORT_FROM" == "osmseed" ]; then + if [ "$CLOUDPROVIDER" == "aws" ]; then # Get the state.txt file from S3 - aws s3 cp $AWS_S3_BUCKET/planet/full-history/$stateFile . - PBFCloudPath=$(tail -n +1 $stateFile) - aws s3 cp $PBFCloudPath $PBFFile - fi - # Google storage - if [ $CLOUDPROVIDER == "gcp" ]; then + aws s3 cp "$AWS_S3_BUCKET/planet/full-history/$STATEFILE" . + PBFCloudPath=$(tail -n +1 "$STATEFILE") + aws s3 cp "$PBFCloudPath" "$PBFFILE" + elif [ "$CLOUDPROVIDER" == "gcp" ]; then # Get the state.txt file from GS - gsutil cp $GCP_STORAGE_BUCKET/planet/full-history/$stateFile . - PBFCloudPath=$(tail -n +1 $stateFile) - gsutil cp $PBFCloudPath $PBFFile + gsutil cp "$GCP_STORAGE_BUCKET/planet/full-history/$STATEFILE" . + PBFCloudPath=$(tail -n +1 "$STATEFILE") + gsutil cp "$PBFCloudPath" "$PBFFILE" fi fi } -function uploadExpiredFiles(){ - # create statte file - # dateStr=$(date '+%y%m%d%H%M%S') - # stateFile=$imposm3_expire_state_dir/expired_${dateStr}.txt - # bucketStateFile=${stateFile#*"$workDir"} - - for file in $(find $imposm3_expire_dir -type f -cmin -1); do - bucketFile=${file#*"$workDir"} - echo $(date +%F_%H:%M:%S)": New file..." $file - # echo $file >> $stateFile - # AWS - if [ "$CLOUDPROVIDER" == "aws" ]; then - aws s3 cp $file ${AWS_S3_BUCKET}/${BUCKET_IMPOSM_FOLDER}${bucketFile} --acl public-read - fi - # Google Storage - if [ "$CLOUDPROVIDER" == "gcp" ]; then - gsutil cp -a public-read $file ${GCP_STORAGE_BUCKET}${BUCKET_IMPOSM_FOLDER}${bucketFile} - fi - done - # Upload state File - # if [[ -f "$stateFile" ]]; then - # # AWS - # if [ "$CLOUDPROVIDER" == "aws" ]; then - # aws s3 cp $stateFile ${AWS_S3_BUCKET}/${BUCKET_IMPOSM_FOLDER}${bucketStateFile} --acl public-read - # fi - # # Google Storage - # if [ "$CLOUDPROVIDER" == "gcp" ]; then - # gsutil cp -a public-read $stateFile ${GCP_STORAGE_BUCKET}${BUCKET_IMPOSM_FOLDER}${bucketStateFile} - # fi - # fi +function uploadExpiredFiles() { + """ + Upload the expired files to the cloud provider + """ + for file in $(find $IMPOSM3_EXPIRE_DIR -type f -cmin -1); do + bucketFile=${file#*"$WORKDIR"} + echo $(date +%F_%H:%M:%S)":" $file + # AWS + if [ "$CLOUDPROVIDER" == "aws" ]; then + aws s3 cp $file ${AWS_S3_BUCKET}/${BUCKET_IMPOSM_FOLDER}${bucketFile} --acl public-read + fi + # Google Storage + if [ "$CLOUDPROVIDER" == "gcp" ]; then + gsutil cp -a public-read $file ${GCP_STORAGE_BUCKET}${BUCKET_IMPOSM_FOLDER}${bucketFile} + fi + done } -function updateData(){ +function updateData() { + """ + Update the DB with the new data form minute replication + """ + if [ "$OVERWRITE_STATE" = "true" ]; then - rm $diffdir/last.state.txt + rm $DIFF_DIR/last.state.txt fi - # Verify if last.state.txt exist - if [ -f "$diffdir/last.state.txt" ]; then - echo "Exist... $diffdir/last.state.txt" - else - # OverWrite the last.state.txt file with REPLICATION_URL and sequenceNumber=0 + + # Check if last.state.txt exists + if [ -f "$DIFF_DIR/last.state.txt" ]; then + echo "Exist... $DIFF_DIR/last.state.txt" + else + # Create last.state.txt file with REPLICATION_URL and SEQUENCE_NUMBER from env vars echo "timestamp=0001-01-01T00\:00\:00Z sequenceNumber=$SEQUENCE_NUMBER - replicationUrl=$REPLICATION_URL" > $diffdir/last.state.txt + replicationUrl=$REPLICATION_URL" >$DIFF_DIR/last.state.txt fi + # Check if the limit file exists if [ -z "$TILER_IMPORT_LIMIT" ]; then - imposm run -config $workDir/config.json -expiretiles-dir $imposm3_expire_dir & - while true - do - echo "Updating...$(date +%F_%H-%M-%S)" - uploadExpiredFiles - sleep 1m - done + imposm run -config "$WORKDIR/config.json" -expiretiles-dir "$IMPOSM3_EXPIRE_DIR" & else - imposm run -config $workDir/config.json -limitto $workDir/$limitFile -expiretiles-dir $imposm3_expire_dir & - while true - do - echo "Updating...$(date +%F_%H-%M-%S)" - uploadExpiredFiles - sleep 1m - done + imposm run -config "$WORKDIR/config.json" -limitto "$WORKDIR/$LIMITFILE" -expiretiles-dir "$IMPOSM3_EXPIRE_DIR" & fi + + while true; do + echo "Upload expired files... $(date +%F_%H-%M-%S)" + uploadExpiredFiles + sleep 1m + done } -function importData () { +function importData() { + """ + Import the PBF and Natural Earth files to the DB + """ echo "Execute the missing functions" psql "postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB" -a -f config/postgis_helpers.sql - echo "Import Natural Earth" + + echo "Import Natural Earth..." ./scripts/natural_earth.sh - echo "Import OSM Land" + + echo "Import OSM Land..." ./scripts/osm_land.sh - echo "Import PBF file" + + echo "Import PBF file..." if [ -z "$TILER_IMPORT_LIMIT" ]; then imposm import \ - -config $workDir/config.json \ - -read $PBFFile \ - -write \ - -diff -cachedir $cachedir -diffdir $diffdir + -config $WORKDIR/config.json \ + -read $PBFFILE \ + -write \ + -diff -cachedir $CACHE_DIR -diffdir $DIFF_DIR else - wget $TILER_IMPORT_LIMIT -O $workDir/$limitFile + wget $TILER_IMPORT_LIMIT -O $WORKDIR/$LIMITFILE imposm import \ - -config $workDir/config.json \ - -read $PBFFile \ - -write \ - -diff -cachedir $cachedir -diffdir $diffdir \ - -limitto $workDir/$limitFile + -config $WORKDIR/config.json \ + -read $PBFFILE \ + -write \ + -diff -cachedir $CACHE_DIR -diffdir $DIFF_DIR \ + -limitto $WORKDIR/$LIMITFILE fi imposm import \ - -config $workDir/config.json \ - -deployproduction - # -diff -cachedir $cachedir -diffdir $diffdir + -config $WORKDIR/config.json \ + -deployproduction # These index will help speed up tegola tile generation psql "postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB" -a -f config/postgis_index.sql touch $INIT_FILE + # Update the DB updateData } - echo "Connecting to $POSTGRES_HOST DB" flag=true while "$flag" = true; do pg_isready -h $POSTGRES_HOST -p 5432 >/dev/null 2>&2 || continue - # Change flag to false to stop ping the DB - flag=false - echo "Check if $INIT_FILE exists" - if ([[ -f $INIT_FILE ]]); then - echo "Update the DB with osm data" - updateData - else - echo "Import PBF data to DB" - getData - if [ -f $PBFFile ]; then - echo "Start importing the data" - importData - fi + # Change flag to false to stop ping the DB + flag=false + echo "Check if $INIT_FILE exists" + if ([[ -f $INIT_FILE ]]); then + echo "Update the DB with osm data" + updateData + else + echo "Import PBF data to DB" + getData + if [ -f $PBFFILE ]; then + echo "Start importing the data" + importData fi + fi done