Links
-IATI Dataset Index - Minimal (JSON)
- - -diff --git a/.gitignore b/.gitignore index a8aa899..258bbca 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ __pycache__ /.env +/azure-provision/default-github-config.env + /azure-deployment/azure-resource-manager-deployment-manifest.yml /azure-deployment/manual-azure-deploy-secrets.env /azure-deployment/manual-azure-deploy-variables.env diff --git a/azure-deployment/manual-azure-deploy-from-local.sh b/azure-deployment/manual-azure-deploy-from-local.sh index 481e605..6fafe59 100755 --- a/azure-deployment/manual-azure-deploy-from-local.sh +++ b/azure-deployment/manual-azure-deploy-from-local.sh @@ -13,22 +13,19 @@ if [ ! -d ".git" ]; then exit 1 fi -git remote -v | grep "IATI/bulk-data-service.git" > /dev/null +(git remote -v 2> /dev/null | grep "IATI/bulk-data-service.git" > /dev/null) || (echo "$0: script must be run from the root of the bulk-data-service repository"; exit 1) -if [ "$?" != 0 ]; then - echo "$0: script must be run from the root of the bulk-data-service repository" - exit 1 -fi - -. ./manual-azure-deploy-secrets.env +. ./azure-deployment/manual-azure-deploy-secrets.env TARGET_ENVIRONMENT=$1 APP_NAME=bulk-data-service -RESOURCE_GROUP_NAME=rg-${APP_NAME}-${TARGET_ENVIRONMENT} +RESOURCE_GROUP_NAME="rg-${APP_NAME}-${TARGET_ENVIRONMENT}" + +CONTAINER_GROUP_INSTANCE_NAME="aci-${APP_NAME}-${TARGET_ENVIRONMENT}" -CONTAINER_GROUP_INSTANCE_NAME=aci-${APP_NAME}-${TARGET_ENVIRONMENT} +DOCKER_IMAGE_TAG=$(git log -n1 --format=format:"%H") LOCAL_DEPLOY=true @@ -36,21 +33,21 @@ echo "Generating Azure ARM deployment manifest from template" . ./azure-deployment/generate-manifest-from-template.sh # build the docker image for the Bulk Data Service -docker build . -t criati.azurecr.io/bulk-data-service-$TARGET_ENVIRONMENT +docker build . -t "criati.azurecr.io/bulk-data-service-$TARGET_ENVIRONMENT:$DOCKER_IMAGE_TAG" # push Bulk Data Service image to Azure -docker push criati.azurecr.io/bulk-data-service-$TARGET_ENVIRONMENT +docker push "criati.azurecr.io/bulk-data-service-$TARGET_ENVIRONMENT:$DOCKER_IMAGE_TAG" # now configure, build and push the docker image for the nginx reverse proxy # create password file -htpasswd -c -b ./azure-deployment/nginx-reverse-proxy/htpasswd prom $PROM_NGINX_REVERSE_PROXY_PASSWORD +htpasswd -c -b ./azure-deployment/nginx-reverse-proxy/htpasswd prom "$PROM_NGINX_REVERSE_PROXY_PASSWORD" # make the image for the nginx reverse proxy (for putting HTTP basic auth on the # prom client) -docker build ./azure-deployment/nginx-reverse-proxy -t criati.azurecr.io/bds-prom-nginx-reverse-proxy-$TARGET_ENVIRONMENT +docker build ./azure-deployment/nginx-reverse-proxy -t "criati.azurecr.io/bds-prom-nginx-reverse-proxy-$TARGET_ENVIRONMENT:$DOCKER_IMAGE_TAG" -docker push criati.azurecr.io/bds-prom-nginx-reverse-proxy-$TARGET_ENVIRONMENT +docker push "criati.azurecr.io/bds-prom-nginx-reverse-proxy-$TARGET_ENVIRONMENT:$DOCKER_IMAGE_TAG" echo az container delete \ diff --git a/azure-provision/add-default-config-to-github-variables.sh b/azure-provision/add-default-config-to-github-variables.sh new file mode 100755 index 0000000..ce17d50 --- /dev/null +++ b/azure-provision/add-default-config-to-github-variables.sh @@ -0,0 +1,36 @@ +#!/usr/bin/bash + +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes + +if [[ ! -v "1" ]]; then + echo "usage: $0 TARGET_ENVIRONMENT" + echo " TARGET_ENVIRONMENT should likely be 'test', 'dev', or 'prod'" + exit 1 +fi + +if [[ ! -d ".git" ]]; then + echo "$0: script must be run from the root of the bulk-data-service repository" + exit 1 +fi + +(git remote -v 2> /dev/null | grep "IATI/bulk-data-service.git" > /dev/null) || (echo "$0: script must be run from the root of the bulk-data-service repository"; exit 1) + +if [[ "$1" == "" ]]; then + echo "TARGET_ENVIRONMENT cannot be empty" + exit 2 +fi + +if [[ $(which gh > /dev/null) ]]; then + echo "This script requires the Github command line client to be installed" + exit 3 +fi + +TARGET_ENVIRONMENT="$1" + +cp -f azure-provision/default-github-config-template.env azure-provision/default-github-config.env + +sed -i "s/^/${TARGET_ENVIRONMENT^^}/g" azure-provision/default-github-config.env + +gh variable set --env-file ./azure-provision/default-github-config.env diff --git a/azure-provision/azure-create-resources.sh b/azure-provision/azure-create-resources.sh index 80514b6..1713657 100755 --- a/azure-provision/azure-create-resources.sh +++ b/azure-provision/azure-create-resources.sh @@ -4,48 +4,63 @@ # installation of the Bulk Data Service app: # resource group, log analytics workspace, storage account, postgres database. -set -euo pipefail +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes -if [ ! -v "1" ]; then +if [[ ! -v "1" ]]; then echo "usage: $0 TARGET_ENVIRONMENT" echo " TARGET_ENVIRONMENT should likely be 'test', 'dev', or 'prod'" exit 1 fi -if [ "$1" == "" ]; then +if [[ ! -d ".git" ]]; then + echo "$0: script must be run from the root of the bulk-data-service repository but the current directory doesn't look like a git repo" + exit 1 +fi + +(git remote -v 2> /dev/null | grep "IATI/bulk-data-service.git" > /dev/null) || (echo "$0: script must be run from the root of the bulk-data-service repository"; exit 1) + +if [[ "$1" == "" ]]; then echo "TARGET_ENVIRONMENT cannot be empty" exit 2 fi -if [ ! -v "BDS_DB_ADMIN_PASSWORD" ] || [ "$BDS_DB_ADMIN_PASSWORD" == "" ]; then +if [[ ! -v "BDS_DB_ADMIN_PASSWORD" ]] || [[ "$BDS_DB_ADMIN_PASSWORD" == "" ]]; then echo "The environment variable BDS_DB_ADMIN_PASSWORD must be set" exit 2 fi -if [ $(which jq > /dev/null) > 0 ]; then +if [[ $(which jq > /dev/null) ]]; then echo "This script requires the tool 'jq' to be installed" exit 3 fi -TARGET_ENVIRONMENT_ENV_VAR=$(echo "$1" | tr '[:lower:]' '[:upper:]') +TARGET_ENVIRONMENT="$1" + +TARGET_ENVIRONMENT_UPPER=$(echo "$TARGET_ENVIRONMENT" | tr '[:lower:]' '[:upper:]') SUBSCRIPTION_ID=$(az account list | jq -r '.[0].id') APP_NAME=bulk-data-service -APP_NAME_NO_HYPHENS=$(echo $APP_NAME | sed -e 's/-//g') +APP_NAME_NO_HYPHENS="${APP_NAME//-/}" -RESOURCE_GROUP_NAME=rg-${APP_NAME}-$1 +RESOURCE_GROUP_NAME="rg-${APP_NAME}-$TARGET_ENVIRONMENT" -LOG_ANALYTICS_NAME=log-${APP_NAME}-$1 +LOG_ANALYTICS_NAME="log-${APP_NAME}-$TARGET_ENVIRONMENT" -STORAGE_ACCOUNT_NAME=sa${APP_NAME_NO_HYPHENS}$1 +STORAGE_ACCOUNT_NAME="sa${APP_NAME_NO_HYPHENS}$TARGET_ENVIRONMENT" -POSTGRES_SERVER_NAME=${APP_NAME}-db-$1 +POSTGRES_SERVER_NAME="${APP_NAME}-db-$TARGET_ENVIRONMENT" -SERVICE_PRINCIPAL_NAME=sp-${APP_NAME}-$1 +SERVICE_PRINCIPAL_NAME="sp-${APP_NAME}-$TARGET_ENVIRONMENT" -LOCATION=uksouth +LOCATION="uksouth" + +WEB_BASE_URL_PREFIX=$([[ "$TARGET_ENVIRONMENT" == "prod" ]] && echo "" || echo "${TARGET_ENVIRONMENT}-") + +WEB_BASE_URL="https://${WEB_BASE_URL_PREFIX}bulk-data.iatistandard.org" echo echo "Proceeding will create Azure services with the following names:" @@ -56,6 +71,7 @@ echo "Log analytics workspace name : $LOG_ANALYTICS_NAME" echo "Storage account name : $STORAGE_ACCOUNT_NAME" echo "Postgres server name : $POSTGRES_SERVER_NAME" echo "Service principal name : $SERVICE_PRINCIPAL_NAME" +echo "Public-facing access URL : $WEB_BASE_URL" echo echo echo "(Using subscription: $SUBSCRIPTION_ID)" @@ -72,16 +88,16 @@ then fi # Create Resource Group -echo az group create --name $RESOURCE_GROUP_NAME --location $LOCATION -az group create --name $RESOURCE_GROUP_NAME --location $LOCATION +echo az group create --name "$RESOURCE_GROUP_NAME" --location "$LOCATION" +az group create --name "$RESOURCE_GROUP_NAME" --location "$LOCATION" echo # Create Log Analytics Workspace -echo az monitor log-analytics workspace create --resource-group $RESOURCE_GROUP_NAME \ - --workspace-name $LOG_ANALYTICS_NAME -LOG_ANALYTICS_CREATE_OUTPUT=$(az monitor log-analytics workspace create --resource-group $RESOURCE_GROUP_NAME \ - --workspace-name $LOG_ANALYTICS_NAME) +echo az monitor log-analytics workspace create --resource-group "$RESOURCE_GROUP_NAME" \ + --workspace-name "$LOG_ANALYTICS_NAME" +LOG_ANALYTICS_CREATE_OUTPUT=$(az monitor log-analytics workspace create --resource-group "$RESOURCE_GROUP_NAME" \ + --workspace-name "$LOG_ANALYTICS_NAME") echo "LOG_ANALYTICS_WORKSPACE_ID=echo ${LOG_ANALYTICS_CREATE_OUTPUT//[$'\t\r\n ']} | jq -r '.customerId'" @@ -90,21 +106,21 @@ LOG_ANALYTICS_WORKSPACE_ID=$(echo "${LOG_ANALYTICS_CREATE_OUTPUT//[$'\t\r\n ']}" echo Workspace ID is: $LOG_ANALYTICS_WORKSPACE_ID echo az monitor log-analytics workspace get-shared-keys \ - -g $RESOURCE_GROUP_NAME \ - -n $LOG_ANALYTICS_NAME \| jq -r '.primarySharedKey' + -g "$RESOURCE_GROUP_NAME" \ + -n "$LOG_ANALYTICS_NAME" \| jq -r '.primarySharedKey' -LOG_ANALYTICS_WORKSPACE_KEY=$(az monitor log-analytics workspace get-shared-keys -g $RESOURCE_GROUP_NAME -n $LOG_ANALYTICS_NAME | jq -r '.primarySharedKey') +LOG_ANALYTICS_WORKSPACE_KEY=$(az monitor log-analytics workspace get-shared-keys -g "$RESOURCE_GROUP_NAME" -n "$LOG_ANALYTICS_NAME" | jq -r '.primarySharedKey') echo Workspace key is: $LOG_ANALYTICS_WORKSPACE_KEY # Create storage account -echo az storage account create --resource-group $RESOURCE_GROUP_NAME \ +echo az storage account create --resource-group "$RESOURCE_GROUP_NAME" \ --name $STORAGE_ACCOUNT_NAME \ --location $LOCATION \ --sku Standard_LRS \ --enable-hierarchical-namespace true \ --kind StorageV2 -az storage account create --resource-group $RESOURCE_GROUP_NAME \ +az storage account create --resource-group "$RESOURCE_GROUP_NAME" \ --name $STORAGE_ACCOUNT_NAME \ --location $LOCATION \ --sku Standard_LRS \ @@ -131,54 +147,54 @@ az storage blob service-properties update --account-name $STORAGE_ACCOUNT_NAME \ --index-document index.html echo az storage account show-connection-string --name $STORAGE_ACCOUNT_NAME \ - --resource-group $RESOURCE_GROUP_NAME \ + --resource-group "$RESOURCE_GROUP_NAME" \ \| jq -r '.connectionString' -STORAGE_ACCOUNT_CONNECTION_STRING=$(az storage account show-connection-string --name $STORAGE_ACCOUNT_NAME --resource-group $RESOURCE_GROUP_NAME | jq -r '.connectionString') - +STORAGE_ACCOUNT_CONNECTION_STRING=$(az storage account show-connection-string --name $STORAGE_ACCOUNT_NAME --resource-group "$RESOURCE_GROUP_NAME" | jq -r '.connectionString') -WEB_BASE_URL="https://$STORAGE_ACCOUNT_NAME.blob.core.windows.net" -# $(az storage account show -n $STORAGE_ACCOUNT_NAME -g $RESOURCE_GROUP_NAME --query "primaryEndpoints.web" --output tsv) +# Shown to user, as may be needed for Cloudflare setup on very first run +AZURE_BASE_URL=$(az storage account show -n "$STORAGE_ACCOUNT_NAME" -g "$RESOURCE_GROUP_NAME" --query "primaryEndpoints.web" --output tsv) +# Calculated above from TARGET_ENVIRONMENT, bearing in mind 'prod' doesn' thave prefix sed -e "s#{{WEB_BASE_URL}}#$WEB_BASE_URL#" web/index-template.html > web/index.html az storage blob upload-batch -s web -d '$web' --account-name $STORAGE_ACCOUNT_NAME --overwrite # Provision Postgres Server -echo az postgres flexible-server create -y -g $RESOURCE_GROUP_NAME \ - -n $POSTGRES_SERVER_NAME --location $LOCATION \ - --admin-user bds --admin-password $BDS_DB_ADMIN_PASSWORD \ - --sku-name Standard_B1ms --tier Burstable --storage-size 32 -az postgres flexible-server create -y -g $RESOURCE_GROUP_NAME \ - -n $POSTGRES_SERVER_NAME --location $LOCATION \ - --admin-user bds --admin-password $BDS_DB_ADMIN_PASSWORD \ - --sku-name Standard_B1ms --tier Burstable --storage-size 32 +echo az postgres flexible-server create -y -g "$RESOURCE_GROUP_NAME" \ + -n "$POSTGRES_SERVER_NAME" --location "$LOCATION" \ + --admin-user bds --admin-password "$BDS_DB_ADMIN_PASSWORD" \ + --sku-name Standard_B1ms --tier Burstable --storage-size 32 --version 16 +az postgres flexible-server create -y -g "$RESOURCE_GROUP_NAME" \ + -n "$POSTGRES_SERVER_NAME" --location "$LOCATION" \ + --admin-user bds --admin-password "$BDS_DB_ADMIN_PASSWORD" \ + --sku-name Standard_B1ms --tier Burstable --storage-size 32 --version 16 # Create Postgres database -echo az postgres flexible-server db create --resource-group $RESOURCE_GROUP_NAME \ +echo az postgres flexible-server db create --resource-group "$RESOURCE_GROUP_NAME" \ --server-name $POSTGRES_SERVER_NAME \ --database-name bulk_data_service_db -az postgres flexible-server db create --resource-group $RESOURCE_GROUP_NAME \ +az postgres flexible-server db create --resource-group "$RESOURCE_GROUP_NAME" \ --server-name $POSTGRES_SERVER_NAME \ --database-name bulk_data_service_db # Add firewall rule to let other Azure resources access the database -echo az postgres flexible-server firewall-rule create --resource-group $RESOURCE_GROUP_NAME \ +echo az postgres flexible-server firewall-rule create --resource-group "$RESOURCE_GROUP_NAME" \ --name $POSTGRES_SERVER_NAME \ --rule-name allowazureservices \ --start-ip-address 0.0.0.0 -az postgres flexible-server firewall-rule create --resource-group $RESOURCE_GROUP_NAME \ +az postgres flexible-server firewall-rule create --resource-group "$RESOURCE_GROUP_NAME" \ --name $POSTGRES_SERVER_NAME \ --rule-name allowazureservices \ --start-ip-address 0.0.0.0 # Increase the maximum number of connections -echo az postgres flexible-server parameter set --resource-group $RESOURCE_GROUP_NAME \ +echo az postgres flexible-server parameter set --resource-group "$RESOURCE_GROUP_NAME" \ --server-name $POSTGRES_SERVER_NAME \ --name "max_connections" \ --value 85 -az postgres flexible-server parameter set --resource-group $RESOURCE_GROUP_NAME \ +az postgres flexible-server parameter set --resource-group "$RESOURCE_GROUP_NAME" \ --server-name $POSTGRES_SERVER_NAME \ --name "max_connections" \ --value 85 @@ -190,11 +206,22 @@ RESOURCE_GROUP_ID_STRING=$(az group list --query "[?name=='$RESOURCE_GROUP_NAME' echo az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME \ --role contributor \ --scopes $RESOURCE_GROUP_ID_STRING -SP_DETAILS=$(az ad sp create-for-rbac --name $SERVICE_PRINCIPAL_NAME \ +SP_DETAILS=$(az ad sp create-for-rbac --name "$SERVICE_PRINCIPAL_NAME" \ --role contributor \ - --scopes $RESOURCE_GROUP_ID_STRING) + --scopes "$RESOURCE_GROUP_ID_STRING") + +CREDS=$(echo "$SP_DETAILS" | jq "with_entries(if .key == \"appId\" then .key = \"clientId\" elif .key == \"tenant\" then .key = \"tenantId\" elif .key == \"password\" then .key = \"clientSecret\" else . end) | . += { \"subscriptionId\" : \"$SUBSCRIPTION_ID\" } | del(.displayName)") + +echo +echo +echo "--------------------------------------------------" + +echo "Configuration settings you will need:" + +echo -CREDS=$(echo $SP_DETAILS | jq "with_entries(if .key == \"appId\" then .key = \"clientId\" elif .key == \"tenant\" then .key = \"tenantId\" elif .key == \"password\" then .key = \"clientSecret\" else . end) | . += { \"subscriptionId\" : \"$SUBSCRIPTION_ID\" } | del(.displayName)") +echo "Base URL for Azure Storage Account: ${AZURE_BASE_URL}" +echo "(You may need to put this into the Cloudflare DNS setup if recreating dev/production)" echo echo @@ -202,38 +229,59 @@ echo "--------------------------------------------------" echo "Credentials to put into the Github repo's secrets:" echo -echo "JSON credentials for Azure: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_AZURE_CREDENTIALS)" +echo "JSON credentials for Azure: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_AZURE_CREDENTIALS)" -echo $CREDS +echo "$CREDS" -echo "Azure storage connection string: (Secret name ${TARGET_ENVIRONMENT_ENV_VAR}_AZURE_STORAGE_CONNECTION_STRING)" +echo "Azure storage connection string: (Secret name ${TARGET_ENVIRONMENT_UPPER}_AZURE_STORAGE_CONNECTION_STRING)" -echo $STORAGE_ACCOUNT_CONNECTION_STRING +echo "$STORAGE_ACCOUNT_CONNECTION_STRING" -echo "Database host: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_DB_HOST)" +echo "Database host: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_HOST)" -echo $POSTGRES_SERVER_NAME +echo "$POSTGRES_SERVER_NAME" -echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_DB_PORT)" +echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_PORT)" echo 5432 -echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_DB_NAME)" +echo "Database timeout: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_CONNECTION_TIMEOUT)" + +echo 30 + +echo "Database SSL mode: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_SSL_MODE)" + +echo "require" + +echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_NAME)" echo bulk_data_service_db -echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_DB_USER)" +echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_USER)" echo bds -echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_DB_PASS)" +echo "Database name: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_DB_PASS)" + +echo "$BDS_DB_ADMIN_PASSWORD" + +echo "Log analytics workspace ID: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_LOG_WORKSPACE_ID)" -echo $BDS_DB_ADMIN_PASSWORD +echo "$LOG_ANALYTICS_WORKSPACE_ID" -echo "Log analytics workspace ID: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_LOG_WORKSPACE_ID)" +echo "Log analytics workspace key: (Secret name: ${TARGET_ENVIRONMENT_UPPER}_LOG_WORKSPACE_KEY)" + +echo "$LOG_ANALYTICS_WORKSPACE_KEY" + +echo + +echo "You also need to ensure the repository has the following secrets setup, which are not specific to the target environment:" + +echo "ACR_LOGIN_SERVER, ACR_USERNAME, ACR_PASSWORD, DOCKER_HUB_USERNAME, DOCKER_HUB_TOKEN" + +echo -echo $LOG_ANALYTICS_WORKSPACE_ID +echo "If you want to add the default configuration setup to Github variables, you can now run:" -echo "Log analytics workspace key: (Secret name: ${TARGET_ENVIRONMENT_ENV_VAR}_LOG_WORKSPACE_KEY)" +echo "./azure-provision/add-default-config-to-github-variables.sh ${TARGET_ENVIRONMENT}" -echo $LOG_ANALYTICS_WORKSPACE_KEY diff --git a/azure-provision/default-github-config-template.env b/azure-provision/default-github-config-template.env new file mode 100644 index 0000000..dcb919e --- /dev/null +++ b/azure-provision/default-github-config-template.env @@ -0,0 +1,9 @@ +_AZURE_STORAGE_BLOB_CONTAINER_NAME_IATI_XML=iati-xml +_AZURE_STORAGE_BLOB_CONTAINER_NAME_IATI_ZIP=iati-zip +_DATA_REGISTRATION=ckan-registry +_DATA_REGISTRY_BASE_URL=https://iatiregistry.org/api/3/action/package_search +_FORCE_REDOWNLOAD_AFTER_HOURS=24 +_LOGFILE=log +_NUMBER_DOWNLOADER_THREADS=25 +_REMOVE_LAST_GOOD_DOWNLOAD_AFTER_FAILING_HOURS=72 +_ZIP_WORKING_DIR=/tmp/bulk-data-service-zip \ No newline at end of file diff --git a/web/index-template.html b/web/index-template.html index 35fc39d..d4c5906 100644 --- a/web/index-template.html +++ b/web/index-template.html @@ -1,60 +1,63 @@ +
-IATI Dataset Index - Minimal (JSON)
- - -IATI Dataset Index - Minimal (JSON)
+ + +