From 2a7e77ff24247b780367f09d55446bc3e32752a7 Mon Sep 17 00:00:00 2001 From: "Ruben L. Mendoza" Date: Tue, 20 Feb 2024 10:22:15 -0500 Subject: [PATCH] ImpOSM update sequence (#313) * Update script to keep state of sequence files - imposm * Update docker file and start.sh - imposm * Add liveness probe - imposm * Add UPLOAD_EXPIRED_FILES env var for imposm * Add default value for UPLOAD_EXPIRED_FILES in imposm * set initialDelaySeconds: 3600 in liveness probe - imposm * Increase initialDelaySeconds to 2 hours --------- --- compose/tiler.yml | 57 +++-- images/tiler-imposm/Dockerfile | 10 +- images/tiler-imposm/liveness.sh | 8 + images/tiler-imposm/start.sh | 237 +++++++++--------- .../tiler-imposm-statefulset.yaml | 28 ++- osm-seed/values.yaml | 1 + 6 files changed, 186 insertions(+), 155 deletions(-) create mode 100755 images/tiler-imposm/liveness.sh diff --git a/compose/tiler.yml b/compose/tiler.yml index a880d3d8..b94acd66 100644 --- a/compose/tiler.yml +++ b/compose/tiler.yml @@ -1,4 +1,4 @@ -version: '3' +version: '3.8' services: # ###################################################### # ### Tiler DB @@ -9,11 +9,12 @@ services: context: ../images/tiler-db dockerfile: Dockerfile ports: - - "5433:5432" + - "5433:5432" volumes: - - ../data/tiler-db-data:/var/lib/postgresql/data + - ../data/tiler-db-data:/var/lib/postgresql/data env_file: - - ../envs/.env.tiler-db + - ../envs/.env.tiler-db + restart: always # ###################################################### # ### Tiler imposm for importing data # ###################################################### @@ -23,18 +24,23 @@ services: context: ../images/tiler-imposm dockerfile: Dockerfile volumes: - - ../data/tiler-imposm-data:/mnt/data + - ../data/tiler-imposm-data:/mnt/data + # - ../images/tiler-imposm:/osm depends_on: - - tiler-db - command: > - /bin/bash -c " - echo starting imposm!; - sleep 1m; - ./start.sh; - " + - tiler-db + command: + - sh + - -c + - "sleep 60 && ./start.sh" env_file: - - ../envs/.env.tiler-db - - ../envs/.env.tiler-imposm + - ../envs/.env.tiler-db + - ../envs/.env.tiler-imposm + healthcheck: + test: ["CMD", "./liveness.sh"] + interval: 30s + timeout: 10s + retries: 3 + restart: always # ###################################################### # ### Tiler imposm for importing data # ###################################################### @@ -44,18 +50,17 @@ services: context: ../images/tiler-server dockerfile: Dockerfile volumes: - - ../data/tiler-server-data:/mnt/data + - ../data/tiler-server-data:/mnt/data depends_on: - - tiler-db - - tiler-imposm + - tiler-db + - tiler-imposm ports: - - "9090:9090" - command: > - /bin/bash -c " - echo Starting tiles server!; - sleep 2m; - ./start.sh; - " + - "9090:9090" + command: + - sh + - -c + - "sleep 60 && ./start.sh" env_file: - - ../envs/.env.tiler-db - - ../envs/.env.tiler-server \ No newline at end of file + - ../envs/.env.tiler-db + - ../envs/.env.tiler-server + restart: always diff --git a/images/tiler-imposm/Dockerfile b/images/tiler-imposm/Dockerfile index 0c38c2d1..10b7fd2b 100644 --- a/images/tiler-imposm/Dockerfile +++ b/images/tiler-imposm/Dockerfile @@ -19,12 +19,14 @@ RUN apt-get install -y \ curl \ wget \ unzip \ - software-properties-common + software-properties-common && \ + rm -rf /var/lib/apt/lists/* # # Install python RUN add-apt-repository ppa:deadsnakes/ppa && \ apt-get update && \ apt-get install -y build-essential python3.6 python3.6-dev python3-pip && \ + rm -rf /var/lib/apt/lists/* && \ python3 -m pip install pip --upgrade && \ python3 -m pip install wheel @@ -53,7 +55,5 @@ WORKDIR $IMPOSMDATA VOLUME $IMPOSMDATA WORKDIR /osm -COPY config config -COPY scripts scripts -COPY start.sh start.sh -CMD ./start.sh +COPY . . +CMD ./start.sh \ No newline at end of file diff --git a/images/tiler-imposm/liveness.sh b/images/tiler-imposm/liveness.sh new file mode 100755 index 00000000..a7b249ea --- /dev/null +++ b/images/tiler-imposm/liveness.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +if ps aux | grep -v grep | grep "imposm" >/dev/null; then + echo "imposm process is running." + exit 0 +else + echo "imposm process is not running." 1>&2 + exit 1 +fi diff --git a/images/tiler-imposm/start.sh b/images/tiler-imposm/start.sh index cf2d7922..c9fa48a0 100755 --- a/images/tiler-imposm/start.sh +++ b/images/tiler-imposm/start.sh @@ -1,175 +1,180 @@ #!/bin/bash set -e -stateFile="state.txt" -PBFFile="osm.pbf" -limitFile="limitFile.geojson" + +STATEFILE="state.txt" +PBFFILE="osm.pbf" +LIMITFILE="limitFile.geojson" # directories to keep the imposm's cache for updating the db -workDir=/mnt/data -cachedir=$workDir/cachedir -mkdir -p $cachedir -diffdir=$workDir/diff -mkdir -p $diffdir -imposm3_expire_dir=$workDir/imposm3_expire_dir -mkdir -p $imposm3_expire_dir -# imposm3_expire_state_dir=$workDir/imposm3_expire_state -# mkdir -p $imposm3_expire_state_dir -# Setting directory -settingDir=/osm +WORKDIR=/mnt/data +CACHE_DIR=$WORKDIR/cachedir +DIFF_DIR=$WORKDIR/diff +IMPOSM3_EXPIRE_DIR=$WORKDIR/imposm3_expire_dir + +# # Setting directory +# settingDir=/osm # Folder to store the imposm expider files in s3 or gs BUCKET_IMPOSM_FOLDER=imposm INIT_FILE=/mnt/data/init_done -# Create config file to set variable for imposm -echo "{" > $workDir/config.json -echo "\"cachedir\": \"$cachedir\"," >> $workDir/config.json -echo "\"diffdir\": \"$diffdir\"," >> $workDir/config.json -echo "\"connection\": \"postgis://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB\"," >> $workDir/config.json -echo "\"mapping\": \"config/imposm3.json\"," >> $workDir/config.json -echo "\"replication_url\": \"$REPLICATION_URL\"" >> $workDir/config.json -echo "}" >> $workDir/config.json - -function getData () { - # Import from pubic url, usualy it come from osm - if [ $TILER_IMPORT_FROM == "osm" ]; then - wget $TILER_IMPORT_PBF_URL -O $PBFFile - fi - if [ $TILER_IMPORT_FROM == "osmseed" ]; then - if [ $CLOUDPROVIDER == "aws" ]; then +mkdir -p "$CACHE_DIR" "$DIFF_DIR" "$IMPOSM3_EXPIRE_DIR" + +# Create config file to set variables for imposm +{ + echo "{" + echo "\"cachedir\": \"$CACHE_DIR\"," + echo "\"diffdir\": \"$DIFF_DIR\"," + echo "\"connection\": \"postgis://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB\"," + echo "\"mapping\": \"config/imposm3.json\"," + echo "\"replication_url\": \"$REPLICATION_URL\"" + echo "}" +} >"$WORKDIR/config.json" + +function getData() { + ### Get the PBF file from the cloud provider or public URL + if [ "$TILER_IMPORT_FROM" == "osm" ]; then + wget "$TILER_IMPORT_PBF_URL" -O "$PBFFILE" + elif [ "$TILER_IMPORT_FROM" == "osmseed" ]; then + if [ "$CLOUDPROVIDER" == "aws" ]; then # Get the state.txt file from S3 - aws s3 cp $AWS_S3_BUCKET/planet/full-history/$stateFile . - PBFCloudPath=$(tail -n +1 $stateFile) - aws s3 cp $PBFCloudPath $PBFFile - fi - # Google storage - if [ $CLOUDPROVIDER == "gcp" ]; then + aws s3 cp "$AWS_S3_BUCKET/planet/full-history/$STATEFILE" . + PBFCloudPath=$(tail -n +1 "$STATEFILE") + aws s3 cp "$PBFCloudPath" "$PBFFILE" + elif [ "$CLOUDPROVIDER" == "gcp" ]; then # Get the state.txt file from GS - gsutil cp $GCP_STORAGE_BUCKET/planet/full-history/$stateFile . - PBFCloudPath=$(tail -n +1 $stateFile) - gsutil cp $PBFCloudPath $PBFFile + gsutil cp "$GCP_STORAGE_BUCKET/planet/full-history/$STATEFILE" . + PBFCloudPath=$(tail -n +1 "$STATEFILE") + gsutil cp "$PBFCloudPath" "$PBFFILE" + fi + fi +} + +getFormattedDate() { + local file_path="$1" + if command -v stat >/dev/null 2>&1; then + local modification_date=$(stat -c %Y "$file_path") + if [ $? -eq 0 ]; then + local formatted_date=$(date -d "@$modification_date" "+%Y-%m-%d:%H:%M:%S") + echo "Created/Updated date of $file_path: $formatted_date" + else + echo "Error: Unable to get file modification date for file ${file_path}" fi + else + echo "Error: 'stat' command not found. Unable to get file modification date, for file ${file_path}" fi } -function uploadExpiredFiles(){ - # create statte file - # dateStr=$(date '+%y%m%d%H%M%S') - # stateFile=$imposm3_expire_state_dir/expired_${dateStr}.txt - # bucketStateFile=${stateFile#*"$workDir"} - - for file in $(find $imposm3_expire_dir -type f -cmin -1); do - bucketFile=${file#*"$workDir"} - echo $(date +%F_%H:%M:%S)": New file..." $file - # echo $file >> $stateFile +function uploadExpiredFiles() { + # Upload the expired files to the cloud provider + for file in $(find "$IMPOSM3_EXPIRE_DIR" -type f -cmin -1); do + bucketFile=${file#*"$WORKDIR"} + getFormattedDate "$file" + # UPLOAD_EXPIRED_FILES=true to upload the expired to cloud provider + if [ "$UPLOAD_EXPIRED_FILES" == "true" ]; then + echo "Uploading expired file ${file} to ${AWS_S3_BUCKET}" + # AWS if [ "$CLOUDPROVIDER" == "aws" ]; then - aws s3 cp $file ${AWS_S3_BUCKET}/${BUCKET_IMPOSM_FOLDER}${bucketFile} --acl public-read + aws s3 cp "$file" "${AWS_S3_BUCKET}/${BUCKET_IMPOSM_FOLDER}${bucketFile}" --acl public-read fi + # Google Storage if [ "$CLOUDPROVIDER" == "gcp" ]; then - gsutil cp -a public-read $file ${GCP_STORAGE_BUCKET}${BUCKET_IMPOSM_FOLDER}${bucketFile} + gsutil cp -a public-read "$file" "${GCP_STORAGE_BUCKET}${BUCKET_IMPOSM_FOLDER}${bucketFile}" fi - done - # Upload state File - # if [[ -f "$stateFile" ]]; then - # # AWS - # if [ "$CLOUDPROVIDER" == "aws" ]; then - # aws s3 cp $stateFile ${AWS_S3_BUCKET}/${BUCKET_IMPOSM_FOLDER}${bucketStateFile} --acl public-read - # fi - # # Google Storage - # if [ "$CLOUDPROVIDER" == "gcp" ]; then - # gsutil cp -a public-read $stateFile ${GCP_STORAGE_BUCKET}${BUCKET_IMPOSM_FOLDER}${bucketStateFile} - # fi - # fi + else + echo "Expired files were not uploaded because UPLOAD_EXPIRED_FILES=${UPLOAD_EXPIRED_FILES}" + fi + done } -function updateData(){ +function updateData() { + ### Update the DB with the new data form minute replication if [ "$OVERWRITE_STATE" = "true" ]; then - rm $diffdir/last.state.txt + rm $DIFF_DIR/last.state.txt fi - # Verify if last.state.txt exist - if [ -f "$diffdir/last.state.txt" ]; then - echo "Exist... $diffdir/last.state.txt" - else - # OverWrite the last.state.txt file with REPLICATION_URL and sequenceNumber=0 + + # Check if last.state.txt exists + if [ -f "$DIFF_DIR/last.state.txt" ]; then + echo "Exist... $DIFF_DIR/last.state.txt" + else + # Create last.state.txt file with REPLICATION_URL and SEQUENCE_NUMBER from env vars echo "timestamp=0001-01-01T00\:00\:00Z sequenceNumber=$SEQUENCE_NUMBER - replicationUrl=$REPLICATION_URL" > $diffdir/last.state.txt + replicationUrl=$REPLICATION_URL" >$DIFF_DIR/last.state.txt fi + # Check if the limit file exists if [ -z "$TILER_IMPORT_LIMIT" ]; then - imposm run -config $workDir/config.json -expiretiles-dir $imposm3_expire_dir & - while true - do - echo "Updating...$(date +%F_%H-%M-%S)" - uploadExpiredFiles - sleep 1m - done + imposm run -config "$WORKDIR/config.json" -expiretiles-dir "$IMPOSM3_EXPIRE_DIR" & else - imposm run -config $workDir/config.json -limitto $workDir/$limitFile -expiretiles-dir $imposm3_expire_dir & - while true - do - echo "Updating...$(date +%F_%H-%M-%S)" - uploadExpiredFiles - sleep 1m - done + imposm run -config "$WORKDIR/config.json" -limitto "$WORKDIR/$LIMITFILE" -expiretiles-dir "$IMPOSM3_EXPIRE_DIR" & fi + + while true; do + echo "Upload expired files... $(date +%F_%H-%M-%S)" + uploadExpiredFiles + sleep 1m + done } -function importData () { +function importData() { + ### Import the PBF and Natural Earth files to the DB echo "Execute the missing functions" psql "postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB" -a -f config/postgis_helpers.sql - echo "Import Natural Earth" + + echo "Import Natural Earth..." ./scripts/natural_earth.sh - echo "Import OSM Land" + + echo "Import OSM Land..." ./scripts/osm_land.sh - echo "Import PBF file" + + echo "Import PBF file..." if [ -z "$TILER_IMPORT_LIMIT" ]; then imposm import \ - -config $workDir/config.json \ - -read $PBFFile \ - -write \ - -diff -cachedir $cachedir -diffdir $diffdir + -config $WORKDIR/config.json \ + -read $PBFFILE \ + -write \ + -diff -cachedir $CACHE_DIR -diffdir $DIFF_DIR else - wget $TILER_IMPORT_LIMIT -O $workDir/$limitFile + wget $TILER_IMPORT_LIMIT -O $WORKDIR/$LIMITFILE imposm import \ - -config $workDir/config.json \ - -read $PBFFile \ - -write \ - -diff -cachedir $cachedir -diffdir $diffdir \ - -limitto $workDir/$limitFile + -config $WORKDIR/config.json \ + -read $PBFFILE \ + -write \ + -diff -cachedir $CACHE_DIR -diffdir $DIFF_DIR \ + -limitto $WORKDIR/$LIMITFILE fi imposm import \ - -config $workDir/config.json \ - -deployproduction - # -diff -cachedir $cachedir -diffdir $diffdir + -config $WORKDIR/config.json \ + -deployproduction # These index will help speed up tegola tile generation psql "postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB" -a -f config/postgis_index.sql touch $INIT_FILE + # Update the DB updateData } - echo "Connecting to $POSTGRES_HOST DB" flag=true while "$flag" = true; do pg_isready -h $POSTGRES_HOST -p 5432 >/dev/null 2>&2 || continue - # Change flag to false to stop ping the DB - flag=false - echo "Check if $INIT_FILE exists" - if ([[ -f $INIT_FILE ]]); then - echo "Update the DB with osm data" - updateData - else - echo "Import PBF data to DB" - getData - if [ -f $PBFFile ]; then - echo "Start importing the data" - importData - fi + # Change flag to false to stop ping the DB + flag=false + echo "Check if $INIT_FILE exists" + if ([[ -f $INIT_FILE ]]); then + echo "Update the DB with osm data" + updateData + else + echo "Import PBF data to DB" + getData + if [ -f $PBFFILE ]; then + echo "Start importing the data" + importData fi + fi done diff --git a/osm-seed/templates/tiler-imposm/tiler-imposm-statefulset.yaml b/osm-seed/templates/tiler-imposm/tiler-imposm-statefulset.yaml index f25ff266..ab344bab 100644 --- a/osm-seed/templates/tiler-imposm/tiler-imposm-statefulset.yaml +++ b/osm-seed/templates/tiler-imposm/tiler-imposm-statefulset.yaml @@ -24,6 +24,16 @@ spec: - name: {{ .Release.Name }}-tiler-imposm-statefulset image: {{ .Values.tilerImposm.image.name }}:{{ .Values.tilerImposm.image.tag }} command: ['./start.sh'] + livenessProbe: + exec: + command: + - /bin/bash + - -c + - ./liveness.sh + initialDelaySeconds: 7200 + timeoutSeconds: 5 + periodSeconds: 10 + failureThreshold: 3 {{- if .Values.tilerImposm.resources.enabled }} resources: requests: @@ -37,23 +47,25 @@ spec: - name: POSTGRES_HOST value: {{ .Release.Name }}-tiler-db - name: POSTGRES_DB - value: {{ .Values.tilerDb.env.POSTGRES_DB }} + value: {{ .Values.tilerDb.env.POSTGRES_DB | quote }} - name: POSTGRES_PASSWORD - value: {{ quote .Values.tilerDb.env.POSTGRES_PASSWORD }} + value: {{ .Values.tilerDb.env.POSTGRES_PASSWORD | quote }} - name: POSTGRES_USER - value: {{ .Values.tilerDb.env.POSTGRES_USER }} + value: {{ .Values.tilerDb.env.POSTGRES_USER | quote }} - name: POSTGRES_PORT value: {{ .Values.tilerDb.env.POSTGRES_PORT | quote }} - name: TILER_IMPORT_FROM - value: {{ .Values.tilerImposm.env.TILER_IMPORT_FROM }} + value: {{ .Values.tilerImposm.env.TILER_IMPORT_FROM | quote }} - name: TILER_IMPORT_PBF_URL - value: {{ .Values.tilerImposm.env.TILER_IMPORT_PBF_URL }} + value: {{ .Values.tilerImposm.env.TILER_IMPORT_PBF_URL | quote }} - name: REPLICATION_URL - value: {{ .Values.tilerImposm.env.REPLICATION_URL }} + value: {{ .Values.tilerImposm.env.REPLICATION_URL | quote }} - name: SEQUENCE_NUMBER - value: {{ .Values.tilerImposm.env.SEQUENCE_NUMBER | quote}} + value: {{ .Values.tilerImposm.env.SEQUENCE_NUMBER | quote }} - name: OVERWRITE_STATE - value: {{ quote .Values.tilerImposm.env.OVERWRITE_STATE }} + value: {{ .Values.tilerImposm.env.OVERWRITE_STATE | quote }} + - name: UPLOAD_EXPIRED_FILES + value: {{ .Values.tilerImposm.env.UPLOAD_EXPIRED_FILES | default "true" | quote }} - name: CLOUDPROVIDER value: {{ .Values.cloudProvider }} # In case cloudProvider=aws diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index 4f4c5e79..b9bfc19d 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -392,6 +392,7 @@ tilerImposm: TILER_IMPORT_FROM: osm TILER_IMPORT_PBF_URL: http://download.geofabrik.de/europe/monaco-latest.osm.pbf TILER_IMPORT_LIMIT: https://gist.githubusercontent.com/Rub21/96bdcac5eb11f0b36ba8d0352ac537f4/raw/2606f2e207d4a0d895897a83efa1efacefd36eb4/monaco.geojson + UPLOAD_EXPIRED_FILES: true persistenceDisk: enabled: false accessMode: ReadWriteOnce