Skip to content

Commit

Permalink
Update tag info start script
Browse files Browse the repository at this point in the history
  • Loading branch information
Rub21 committed Apr 10, 2024
1 parent 3909c89 commit a4c962e
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 88 deletions.
29 changes: 16 additions & 13 deletions images/taginfo/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ruby:3.3 as builder
FROM ruby:3.0 as builder
ARG workdir=/usr/src/app
WORKDIR $workdir
RUN apt-get update && apt-get install -y \
Expand All @@ -17,9 +17,11 @@ RUN apt-get update && apt-get install -y \
sqlite3-pcre \
passenger \
libapache2-mod-passenger \
libreadline-dev \
tcl \
git \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Clone and setup taginfo-tools
RUN git clone https://github.com/taginfo/taginfo-tools.git $workdir/taginfo-tools && \
Expand All @@ -28,8 +30,13 @@ RUN git clone https://github.com/taginfo/taginfo-tools.git $workdir/taginfo-tool
mkdir build && cd build && \
cmake .. && make

RUN apt-get update && apt-get install -y passenger libapache2-mod-passenger git jq python3-pip
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y \
passenger \
libapache2-mod-passenger \
git \
jq \
python3-pip \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

RUN gem install json rack-contrib puma
RUN gem install sinatra -v '< 3'
Expand All @@ -38,18 +45,14 @@ RUN gem install sinatra-r18n -v '5.0.2'
# Install AWS CLI
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip awscliv2.zip && \
./aws/install
./aws/install && \
rm awscliv2.zip

# Clone and setup taginfo
RUN git clone https://github.com/taginfo/taginfo.git $workdir/taginfo && \
cd $workdir/taginfo && \
bundle install

COPY config/taginfo-config.json $workdir/

COPY overwrite_config.py $workdir/scripts/
COPY start.sh $workdir/scripts/

# COPY overwrite_config.py .
# COPY start.sh .
# sqlite3 sqlite3-pcre
COPY overwrite_config.py .
COPY start.sh .
74 changes: 74 additions & 0 deletions images/taginfo/config/taginfo-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"instance": {
"url": "http://localhost:4567",
"name": "OpenStreetMap Taginfo",
"description": "This is a <b>taginfo test instance</b>. Change this text in your <tt>taginfo-config.json</tt>.",
"about": "<p>This site is maintained by osm-seed",
"icon": "/img/logo/test.png",
"contact": "[email protected]",
"area": "World",
"access_control_allow_origin": "*",
"sections": ["download", "taginfo"]
},
"geodistribution": {
"left": -180,
"bottom": -90,
"right": 180,
"top": 90,
"width": 360,
"height": 180,
"scale_image": 2,
"scale_compare_image" : 1,
"background_image": "/img/mapbg/world.png",
"image_attribution": "osm-seed"
},
"paths": {
"data_dir": "/usr/src/app/data",
"download_dir": "/usr/src/app/taginfo/web/public/download",
"bin_dir": "/usr/src/app/taginfo-tools/build/src",
"sqlite3_pcre_extension": "/usr/lib/sqlite3/pcre.so"
},
"xapi": {
"max_results": 1000,
"url_prefix": "https://overpass-api.de/api/xapi_meta?"
},
"turbo": {
"max_auto": 1000,
"url_prefix": "https://overpass-turbo.eu/?",
"wizard_area": "global"
},
"level0": {
"max_results": 50,
"overpass_url_prefix": "https://overpass-api.de/api/interpreter?",
"level0_url_prefix": "http://level0.osmz.ru/?"
},
"opensearch": {
"shortname": "Taginfo Test Instance",
"contact": "[email protected]",
"description": "Find metadata about OpenStreetMap tags",
"tags": "osm openstreetmap tag tags taginfo"
},
"sources": {
"download": "languages projects wiki ",
"create": "db chronology",
"db": {
"planetfile": "/osm/planet/var/current-planet.osm.pbf"
},
"chronology": {
"osm_history_file": "/osm/planet/var/current-history-planet.osh.pbf"
},
"master": {
"min_count_tags": 10000,
"min_tag_combination_count": 1000,
"min_count_for_map": 1000,
"min_count_relations_per_type": 100
}
},
"logging": {
"directory": "/usr/src/app/logs",
"min_duration": 0.1
},
"tagstats": {
"geodistribution": "FlexMem"
}
}
94 changes: 19 additions & 75 deletions images/taginfo/start.sh
Original file line number Diff line number Diff line change
@@ -1,50 +1,8 @@
#!/usr/bin/env bash

WORKDIR=/apps
DATA_DIR=$WORKDIR/data
UPDATE_DIR=$DATA_DIR/update
DOWNLOAD_DIR=$DATA_DIR/download

set_taginfo_config() {
echo "Setting up...$WORKDIR/taginfo-config.json"
# Update dir values in taginfo-config.json
grep -v '^ *//' $WORKDIR/taginfo/taginfo-config-example.json |
jq '.logging.directory = "'$UPDATE_DIR'/log"' |
jq '.paths.download_dir = "'$UPDATE_DIR'/download"' |
jq '.paths.bin_dir = "'$WORKDIR'/taginfo-tools/build/src"' |
jq '.sources.db.planetfile = "'$UPDATE_DIR'/planet/planet.osm.pbf"' |
jq '.sources.chronology.osm_history_file = "'$UPDATE_DIR'/planet/history-planet.osh.pbf"' |
jq '.sources.db.bindir = "'$UPDATE_DIR'/build/src"' |
jq '.paths.data_dir = "'$DATA_DIR'"' \
>$WORKDIR/taginfo-config.json

# languages wiki databases will be downloaded from OSM
[[ ! -z $DOWNLOAD_DB+z} ]] && jq --arg a "${DOWNLOAD_DB}" '.sources.download = $a' $WORKDIR/taginfo-config.json >tmp.json && mv tmp.json $WORKDIR/taginfo-config.json

# Update instance values in taginfo-config.json
python3 overwrite_config.py -u $OVERWRITE_CONFIG_URL -f $WORKDIR/taginfo-config.json

}

updates_create_db() {
local CREATE_DB="$1"
[[ ! -z $CREATE_DB+z} ]] && jq --arg a "${CREATE_DB}" '.sources.create = $a' $WORKDIR/taginfo-config.json >tmp.json && mv tmp.json $WORKDIR/taginfo-config.json
}

updates_source_code() {
echo "Update...Procesor source code"
# Function to replace the projects repo to get the projects information
TAGINFO_PROJECT_REPO=${TAGINFO_PROJECT_REPO//\//\\/}
sed -i -e 's/https:\/\/github.com\/taginfo\/taginfo-projects.git/'$TAGINFO_PROJECT_REPO'/g' $WORKDIR/taginfo/sources/projects/update.sh
# The follow line is requiered to avoid sqlite3 issues
sed -i -e 's/run_ruby "$SRCDIR\/update_characters.rb"/ruby "$SRCDIR\/update_characters.rb"/g' $WORKDIR/taginfo/sources/db/update.sh
sed -i -e 's/run_ruby "$SRCDIR\/import.rb"/ruby "$SRCDIR\/import.rb"/g' $WORKDIR/taginfo/sources/projects/update.sh
sed -i -e 's/run_ruby "$SRCDIR\/parse.rb"/ruby "$SRCDIR\/parse.rb"/g' $WORKDIR/taginfo/sources/projects/update.sh
sed -i -e 's/run_ruby "$SRCDIR\/get_icons.rb"/ruby "$SRCDIR\/get_icons.rb"/g' $WORKDIR/taginfo/sources/projects/update.sh
}
mkdir -p /osm/planet/var/
sed -i 's/"env -/"/g' /usr/src/app/taginfo/sources/util.sh

download_planet_files() {
mkdir -p $UPDATE_DIR/planet/
# Check if URL_PLANET_FILE_STATE exist and set URL_PLANET_FILE
if [[ ${URL_PLANET_FILE_STATE} && ${URL_PLANET_FILE_STATE-x} ]]; then
wget -q -O state.planet.txt --no-check-certificate - $URL_PLANET_FILE_STATE
Expand All @@ -56,49 +14,35 @@ download_planet_files() {
URL_HISTORY_PLANET_FILE=$(cat state.history.txt)
fi
# Download pbf files
echo "Downloading...$URL_PLANET_FILE"
wget -q -O $UPDATE_DIR/planet/planet.osm.pbf --no-check-certificate - $URL_PLANET_FILE
echo "Downloading...$URL_HISTORY_PLANET_FILE"
wget -q -O $UPDATE_DIR/planet/history-planet.osh.pbf --no-check-certificate - $URL_HISTORY_PLANET_FILE
rm state.planet.txt
rm state.history.txt
wget -O /osm/planet/var/current-planet.osm.pbf $URL_PLANET_FILE
wget -O /osm/planet/var/current-history-planet.osh.pbf $URL_HISTORY_PLANET_FILE
}

update() {
echo "Update...sqlite databases at $(date +%Y-%m-%d:%H-%M)"
# Download OSM planet replication and full-history files
process_data() {
download_planet_files
# In order to make it work we need to pass first one by one the creation and then all of them "db projects chronology"
for db in $CREATE_DB; do
echo "Update...taginfo-$db.db"
updates_create_db $db
$WORKDIR/taginfo/sources/update_all.sh $UPDATE_DIR
done
echo "Update...$CREATE_DB"
updates_create_db $CREATE_DB
$WORKDIR/taginfo/sources/update_all.sh $UPDATE_DIR
# Copy db files into data folder
cp $UPDATE_DIR/*/taginfo-*.db $DATA_DIR/
cp $UPDATE_DIR/taginfo-*.db $DATA_DIR/
# Link to download db zip files
chmod a=r $UPDATE_DIR/download
ln -sf $UPDATE_DIR/download $WORKDIR/taginfo/web/public/download
# Upload db to s3
aws s3 sync $DATA_DIR s3://$AWS_S3_BUCKET/$ENVIRONMENT --exclude "*" --include "*.db"
cd /usr/src/app/taginfo/sources/ && ./update_all.sh /usr/src/app/data
mv /usr/src/app/data/taginfo-*.db /usr/src/app/data/
mv /usr/src/app/data/*/taginfo-*.db /usr/src/app/data/
# if BUCKET_NAME is set upload data
if ! aws s3 ls "s3://$BUCKET_NAME/$ENVIRONMENT" 2>&1 | grep -q 'An error occurred'; then
aws s3 sync /usr/src/app/data/ s3://$AWS_S3_BUCKET/$ENVIRONMENT/ --exclude "*" --include "*.db"
fi
}


start_web() {
echo "Start...Taginfo web service"
aws s3 sync s3://$AWS_S3_BUCKET/$ENVIRONMENT/ $DATA_DIR/
cd $WORKDIR/taginfo/web && bundle exec rackup --host 0.0.0.0 -p 80
# if BUCKET_NAME is set download data
if ! aws s3 ls "s3://$BUCKET_NAME/$ENVIRONMENT" 2>&1 | grep -q 'An error occurred'; then
aws s3 sync s3://$AWS_S3_BUCKET/$ENVIRONMENT/ /usr/src/app/data/
fi
cd /usr/src/app/taginfo/web && ./taginfo.rb
}

ACTION=$1
set_taginfo_config
updates_source_code
mkdir -p $DATA_DIR/update/log/
if [ "$ACTION" = "web" ]; then
start_web
elif [ "$ACTION" = "data" ]; then
update
process_data
fi

0 comments on commit a4c962e

Please sign in to comment.