Skip to content

Commit

Permalink
Merge pull request #176 from WildMeOrg/sql/migration-script
Browse files Browse the repository at this point in the history
Connecting logic for Postgres URI and a migration script from SQLite to Postgres
  • Loading branch information
Jason Parham authored Jan 28, 2021
2 parents a0a327c + 82c84e3 commit db88631
Show file tree
Hide file tree
Showing 21 changed files with 1,090 additions and 473 deletions.
2 changes: 2 additions & 0 deletions .dockerfiles/docker-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ set -e
if [ -d /code ]; then
echo "*** $0 --- Uninstalling wildbook-ia"
pip uninstall -y wildbook-ia
echo "*** $0 --- Uninstalling sentry_sdk (in development)"
pip uninstall -y sentry_sdk
echo "*** $0 --- Installing development version of wildbook-ia at /code"
pushd /code && pip install -e ".[tests,postgres]" && popd
fi
Expand Down
2 changes: 1 addition & 1 deletion .dockerfiles/init-db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
CREATE USER $DB_USER WITH PASSWORD '$DB_PASSWORD';
CREATE USER $DB_USER WITH SUPERUSER PASSWORD '$DB_PASSWORD';
CREATE DATABASE $DB_NAME;
GRANT ALL PRIVILEGES ON DATABASE $DB_NAME TO $DB_USER;
EOSQL
9 changes: 4 additions & 5 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
os: [ubuntu-latest] # Disable "macos-latest" for now
# For speed, we choose one version and that should be the lowest common denominator
python-version: [3.6, 3.7, 3.8]
postgres: ['', postgres]
postgres-uri: ['', 'postgresql://postgres:wbia@localhost:5432/postgres']

services:
db:
Expand All @@ -71,7 +71,7 @@ jobs:
# Checkout and env setup
- uses: actions/checkout@v2
- name: Install pgloader
if: matrix.postgres
if: matrix.postgres-uri
run: sudo apt-get install pgloader
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
Expand All @@ -92,10 +92,9 @@ jobs:
run: |
mkdir -p data/work
python -m wbia --set-workdir data/work --preload-exit
pytest --slow --web-tests
pytest --slow --web-tests --with-postgres-uri=$POSTGRES_URI
env:
WBIA_BASE_DB_URI: postgresql://postgres:wbia@localhost:5432
POSTGRES: ${{ matrix.postgres }}
POSTGRES_URI: ${{ matrix.postgres-uri }}

on-failure:
# This is not in the 'test' job itself because it would otherwise notify once per matrix combination.
Expand Down
7 changes: 6 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,12 @@ EXPOSE 5000
# Move to the workdir
WORKDIR /data

# Set the "workdir"
RUN python3 -m wbia --set-workdir /data --preload-exit

COPY .dockerfiles/docker-entrypoint.sh /docker-entrypoint.sh

ENV WBIA_DB_DIR="/data/db"

ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["python3", "-m", "wbia.dev", "--dbdir", "/data/db", "--logdir", "/data/logs/", "--web", "--port", "5000", "--web-deterministic-ports", "--containerized", "--cpudark", "--production"]
CMD ["python3", "-m", "wbia.dev", "--dbdir", "$WBIA_DB_DIR", "--logdir", "/data/logs/", "--web", "--port", "5000", "--web-deterministic-ports", "--containerized", "--cpudark", "--production"]
8 changes: 8 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,11 @@ def pytest_addoption(parser):
"instead it will reuse the previous test run's db"
),
)
parser.addoption(
'--with-postgres-uri',
dest='postgres_uri',
help=(
'used to enable tests to run against a Postgres database '
'(note, the uri should use a superuser role)'
),
)
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ services:
env_file: ./.dockerfiles/docker-compose.env
ports:
- "5000:5000"
# Development mounting of the code
volumes:
- ./:/code
- ./.dockerfiles/docker-entrypoint.sh:/docker-entrypoint.sh

pgadmin:
image: dpage/pgadmin4
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ def gen_packages_items():
[console_scripts]
wbia-init-testdbs = wbia.cli.testdbs:main
wbia-convert-hsdb = wbia.cli.convert_hsdb:main
wbia-migrate-sqlite-to-postgres = wbia.cli.migrate_sqlite_to_postgres:main
wbia-compare-databases = wbia.cli.compare_databases:main
""",
)

Expand Down
95 changes: 95 additions & 0 deletions wbia/cli/compare_databases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
import logging
import sys
import click

from wbia.dtool.copy_sqlite_to_postgres import (
SqliteDatabaseInfo,
PostgresDatabaseInfo,
compare_databases,
DEFAULT_CHECK_PC,
DEFAULT_CHECK_MIN,
DEFAULT_CHECK_MAX,
)


logger = logging.getLogger('wbia')


@click.command()
@click.option(
'--db-dir',
multiple=True,
help='SQLite databases location',
)
@click.option(
'--sqlite-uri',
multiple=True,
help='SQLite database URI (e.g. sqlite:////path.sqlite3)',
)
@click.option(
'--pg-uri',
multiple=True,
help='Postgres connection URI (e.g. postgresql://user:pass@host)',
)
@click.option(
'--check-pc',
type=float,
default=DEFAULT_CHECK_PC,
help=f'Percentage of table to check, default {DEFAULT_CHECK_PC} ({int(DEFAULT_CHECK_PC * 100)}% of the table)',
)
@click.option(
'--check-max',
type=int,
default=DEFAULT_CHECK_MAX,
help=f'Maximum number of rows to check, default {DEFAULT_CHECK_MAX} (0 for no limit)',
)
@click.option(
'--check-min',
type=int,
default=DEFAULT_CHECK_MIN,
help=f'Minimum number of rows to check, default {DEFAULT_CHECK_MIN}',
)
@click.option(
'-v',
'--verbose',
is_flag=True,
default=False,
help='Show debug messages',
)
def main(db_dir, sqlite_uri, pg_uri, check_pc, check_max, check_min, verbose):
if verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)

logger.addHandler(logging.StreamHandler())

if len(db_dir) + len(sqlite_uri) + len(pg_uri) != 2:
raise click.BadParameter('exactly 2 db_dir or sqlite_uri or pg_uri must be given')
db_infos = []
for db_dir_ in db_dir:
db_infos.append(SqliteDatabaseInfo(db_dir_))
for sqlite_uri_ in sqlite_uri:
db_infos.append(SqliteDatabaseInfo(sqlite_uri_))
for pg_uri_ in pg_uri:
db_infos.append(PostgresDatabaseInfo(pg_uri_))
exact = not (sqlite_uri and pg_uri)
differences = compare_databases(
*db_infos,
exact=exact,
check_pc=check_pc,
check_max=check_max,
check_min=check_min,
)
if differences:
click.echo(f'Databases {db_infos[0]} and {db_infos[1]} are different:')
for line in differences:
click.echo(line)
sys.exit(1)
else:
click.echo(f'Databases {db_infos[0]} and {db_infos[1]} are the same')


if __name__ == '__main__':
main()
143 changes: 143 additions & 0 deletions wbia/cli/migrate_sqlite_to_postgres.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*-
import logging
import re
import subprocess
import sys
from pathlib import Path

import click
import sqlalchemy

from wbia.dtool.copy_sqlite_to_postgres import (
copy_sqlite_to_postgres,
SqliteDatabaseInfo,
PostgresDatabaseInfo,
compare_databases,
)


logger = logging.getLogger('wbia')


@click.command()
@click.option(
'--db-dir', required=True, type=click.Path(exists=True), help='database location'
)
@click.option(
'--db-uri',
required=True,
help='Postgres connection URI (e.g. postgres://user:pass@host)',
)
@click.option(
'--force',
is_flag=True,
default=False,
help='Delete all tables in the public schema in postgres',
)
@click.option(
'-v',
'--verbose',
is_flag=True,
default=False,
help='Show debug messages',
)
def main(db_dir, db_uri, force, verbose):
""""""
# Set up logging
if verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

logger.info(f'using {db_dir} ...')

# Create the database if it doesn't exist
engine = sqlalchemy.create_engine(db_uri)
try:
engine.connect()
except sqlalchemy.exc.OperationalError as e:
m = re.search(r'database "([^"]*)" does not exist', str(e))
if m:
dbname = m.group(1)
engine = sqlalchemy.create_engine(db_uri.rsplit('/', 1)[0])
logger.info(f'Creating "{dbname}"...')
engine.execution_options(isolation_level='AUTOCOMMIT').execute(
f'CREATE DATABASE {dbname}'
)
else:
raise
finally:
engine.dispose()

# Check that the database hasn't already been migrated.
db_infos = [
SqliteDatabaseInfo(Path(db_dir)),
PostgresDatabaseInfo(db_uri),
]
differences = compare_databases(*db_infos)

if not differences:
logger.info('Database already migrated')
sys.exit(0)

# Make sure there are no tables in the public schema in postgresql
# because we're using it as the workspace for the migration
if 'public' in db_infos[1].get_schema():
table_names = [
t for schema, t in db_infos[1].get_table_names() if schema == 'public'
]
if not force:
click.echo(
f'Tables in public schema in postgres database: {", ".join(table_names)}'
)
click.echo('Use --force to remove the tables in public schema')
sys.exit(1)
else:
click.echo(f'Dropping all tables in public schema: {", ".join(table_names)}')
for table_name in table_names:
db_infos[1].engine.execute(f'DROP TABLE {table_name} CASCADE')

# Migrate
problems = {}
with click.progressbar(length=100000, show_eta=True) as bar:
for path, completed_future, db_size, total_size in copy_sqlite_to_postgres(
Path(db_dir), db_uri
):
try:
completed_future.result()
except Exception as exc:
logger.info(
f'\nfailed while processing {str(path)}\n{completed_future.exception()}'
)
problems[path] = exc
else:
logger.info(f'\nfinished processing {str(path)}')
finally:
bar.update(int(db_size / total_size * bar.length))

# Report problems
for path, exc in problems.items():
logger.info('*' * 60)
logger.info(f'There was a problem migrating {str(path)}')
logger.exception(exc)
if isinstance(exc, subprocess.CalledProcessError):
logger.info('-' * 30)
logger.info(exc.stdout.decode())

# Verify the migration
differences = compare_databases(*db_infos)

if differences:
logger.info(f'Databases {db_infos[0]} and {db_infos[1]} are different:')
for line in differences:
logger.info(line)
sys.exit(1)
else:
logger.info(f'Database {db_infos[0]} successfully migrated to {db_infos[1]}')

sys.exit(0)


if __name__ == '__main__':
main()
Loading

0 comments on commit db88631

Please sign in to comment.