Skip to content

Commit

Permalink
split database in two
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Feb 3, 2023
1 parent b59a19c commit 5778fcf
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 26 deletions.
20 changes: 18 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
SECRET_KEY=blahblah
DJANGO_SETTINGS_MODULE=findthatcharity.settings

DATABASE_URL=postgres://postgres:postgres@localhost/ftc_dj
CACHE_URL=dbcache://findthatcharity_cache
DATABASE_ADMIN_URL=postgres://postgres:postgres@localhost/ftc_admin
DATABASE_DASHBOARD_URL=postgres://postgres:postgres@localhost/ftc_dj
DATASTORE_360GIVING_URL=postgres://postgres:postgres@localhost/360givingdatastore

ES_URL=localhost:9200
ALLOWED_HOSTS='.ftc.dkane.net;.findthatcharity.uk'
DEBUG=False

EMAIL_HOST=smtp.example.com
EMAIL_PORT=465
EMAIL_USE_SSL=True
EMAIL_HOST_USER=[email protected]
EMAIL_HOST_PASSWORD=blahblah
ADMIN_EMAIL=[email protected]
DEFAULT_FROM_EMAIL=[email protected]

TWITTER_ACCESS_TOKEN=blahblah
TWITTER_ACCESS_TOKEN_SECRET=blahblah
TWITTER_CONSUMER_KEY=blahblah
TWITTER_CONSUMER_SECRET=blahblah
TWITTER_CONSUMER_SECRET=blahblah

SENTRY_DSN=https://<id>@<id>.ingest.sentry.io/<code>
LOGGING_DB=logs/logs_{year}_{month:02}.db
2 changes: 1 addition & 1 deletion Procfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
web: gunicorn findthatcharity.wsgi:application --timeout 120
release: python manage.py migrate --noinput
release: sh ./release.sh
6 changes: 4 additions & 2 deletions companies/tests/test_import_companies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
import re

import requests_mock
from django.test import TestCase
from django.test import TransactionTestCase
from requests import Response
from requests_html import HTMLSession

from companies.management.commands.import_companies import Command


class TestImportCompanies(TestCase):
class TestImportCompanies(TransactionTestCase):
databases = {"data", "admin"}

def mock_csv_downloads(self, m):
dirname = os.path.dirname(__file__)
with open(os.path.join(dirname, "data", "CompaniesHomePage.html")) as a:
Expand Down
49 changes: 49 additions & 0 deletions findthatcharity/db_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
class DBRouter:
"""
A router to decide whether the data or admin database should be used.
"""

route_app_labels = {
"addtocsv",
"api",
"charity",
"companies",
"ftcbot",
"ftc",
"geo",
"other_data",
"reconcile",
}
admin_db = "admin"
data_db = "data"

def db_for_read(self, model, **hints):
"""
Attempts to read data types go to data_db.
"""
if model._meta.app_label in self.route_app_labels:
return self.data_db
return self.admin_db

def db_for_write(self, model, **hints):
"""
Attempts to write data types go to data_db.
"""
if model._meta.app_label in self.route_app_labels:
return self.data_db
return self.admin_db

def allow_relation(self, obj1, obj2, **hints):
"""
Only allow relations in the same database
"""
return None

def allow_migrate(self, db, app_label, model_name=None, **hints):
"""
Make sure the auth and contenttypes apps only appear in the
'auth_db' database.
"""
if app_label in self.route_app_labels:
return db == self.data_db
return db == self.admin_db
13 changes: 8 additions & 5 deletions findthatcharity/jinja2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.conf import settings
from django.contrib.humanize.templatetags.humanize import naturalday, naturaltime
from django.core.cache import cache
from django.db import connection
from django.db import connections
from django.db.models import Count, F, Func
from django.templatetags.static import static
from django.urls import reverse
Expand Down Expand Up @@ -32,7 +32,10 @@ def get_orgtypes():
value = cache.get(cache_key)
if value:
return value
if OrganisationType._meta.db_table in connection.introspection.table_names():
if (
OrganisationType._meta.db_table
in connections["data"].introspection.table_names()
):
by_orgtype = {
ot["orgtype"]: ot["records"]
for ot in Organisation.objects.annotate(
Expand Down Expand Up @@ -60,7 +63,7 @@ def get_sources():
value = cache.get(cache_key)
if value:
return value
if Source._meta.db_table in connection.introspection.table_names():
if Source._meta.db_table in connections["data"].introspection.table_names():
value = {
s.id: s
for s in Source.objects.all()
Expand All @@ -78,7 +81,7 @@ def get_orgidschemes():
value = cache.get(cache_key)
if value:
return value
if OrgidScheme._meta.db_table in connection.introspection.table_names():
if OrgidScheme._meta.db_table in connections["data"].introspection.table_names():
value = {s.code: s for s in OrgidScheme.objects.all()}
cache.set(cache_key, value, 60 * 60)
else:
Expand All @@ -91,7 +94,7 @@ def get_locations(areatypes=settings.DEFAULT_AREA_TYPES):
value = cache.get(cache_key)
if value:
return value
if GeoLookup._meta.db_table in connection.introspection.table_names():
if GeoLookup._meta.db_table in connections["data"].introspection.table_names():
value = {}
for s in GeoLookup.objects.filter(geoCodeType__in=areatypes):
if s.geoCodeType not in value:
Expand Down
6 changes: 5 additions & 1 deletion findthatcharity/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,12 @@
# https://docs.djangoproject.com/en/3.0/ref/settings/#databases

DATABASES = {
"default": dj_database_url.config(env="DATABASE_URL"),
"default": {},
"admin": dj_database_url.config(env="DATABASE_ADMIN_URL"),
"data": dj_database_url.config(env="DATABASE_URL"),
"dashboard": dj_database_url.config(env="DATABASE_DASHBOARD_URL"),
}
DATABASE_ROUTERS = ["findthatcharity.db_router.DBRouter"]
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

# Caching in database
Expand Down Expand Up @@ -291,3 +294,4 @@
EMAIL_HOST_PASSWORD = os.environ.get("EMAIL_HOST_PASSWORD")
EMAIL_PORT = os.environ.get("EMAIL_PORT")
EMAIL_USE_SSL = os.environ.get("EMAIL_USE_SSL") == "True"
EMAIL_USE_SSL = os.environ.get("EMAIL_USE_SSL") == "True"
6 changes: 3 additions & 3 deletions ftc/management/commands/_base_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import requests_cache
import validators
from django.core.management.base import BaseCommand
from django.db import connection, transaction
from django.db import connections, transaction
from django.utils.text import slugify

from ftc.management.commands._db_logger import ScrapeHandler
Expand Down Expand Up @@ -70,7 +70,7 @@ def __init__(self, *args, **kwargs):
self.logger.addHandler(self.scrape_logger)

self.post_sql = {}
self.cursor = connection.cursor()
self.cursor = connections["data"].cursor()

def add_arguments(self, parser):
parser.add_argument(
Expand All @@ -86,7 +86,7 @@ def set_session(self, install_cache=False):
self.session = requests.Session()

def handle(self, *args, **options):
with transaction.atomic():
with transaction.atomic("data"):
try:
self.run_scraper(*args, **options)
except Exception as err:
Expand Down
2 changes: 2 additions & 0 deletions ftc/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@


class TestCase(django.test.TestCase):
databases = {"data", "admin"}

def setUp(self):
# setup elasticsearch patcher
self.es_patcher = patch("ftc.documents.get_connection")
Expand Down
4 changes: 4 additions & 0 deletions ftc/tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@


class BaseScraperTests(TestCase):
databases = {"data", "admin"}

def test_parse_company_number(self):
BaseScraper.name = "test"
scraper = BaseScraper()
Expand Down Expand Up @@ -60,6 +62,8 @@ def test_org_id(self):


class ScraperTests(TestCase):
databases = {"data", "admin"}

def mock_csv_downloads(self, m):
dirname = os.path.dirname(__file__)
for url, filename in MOCK_FILES:
Expand Down
28 changes: 16 additions & 12 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ Installation
4. Install requirements (`pip install -r requirements.txt`)
5. [Install postgres](https://www.postgresql.org/download/)
6. Start postgres
7. [Install elasticsearch 7](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html) - you may need to increase available memory (see below)
8. Start elasticsearch
9. Create `.env` file in root directory. Contents based on `.env.example`.
10. Create the database tables (`python ./manage.py migrate && python ./manage.py createcachetable`)
11. Import data on charities (`python ./manage.py import_charities`)
12. Import data on nonprofit companies (`python ./manage.py import_companies`)
13. Import data on other non-profit organisations (`python ./manage.py import_all`)
14. Add organisations to elasticsearch index (`python ./manage.py es_index`) - (Don't use the default `search_index` command as this won't setup aliases correctly)
7. Create 2 postgres databases - one for admin (eg `ftc_admin` and one for data eg `ftc_data`)
8. [Install elasticsearch 7](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html) - you may need to increase available memory (see below)
9. Start elasticsearch
10. Create `.env` file in root directory. Contents based on `.env.example`.
11. Create the database tables (`python ./manage.py migrate --database=data && python ./manage.py migrate --database=admin && python ./manage.py createcachetable --database=admin`)
12. Import data on charities (`python ./manage.py import_charities`)
13. Import data on nonprofit companies (`python ./manage.py import_companies`)
14. Import data on other non-profit organisations (`python ./manage.py import_all`)
15. Add organisations to elasticsearch index (`python ./manage.py es_index`) - (Don't use the default `search_index` command as this won't setup aliases correctly)

Dokku Installation
------------------
Expand All @@ -43,8 +44,10 @@ dokku apps:create ftc

# postgres
sudo dokku plugin:install https://github.com/dokku/dokku-postgres.git postgres
dokku postgres:create ftc-db
dokku postgres:link ftc-db ftc
dokku postgres:create ftc-db-data
dokku postgres:link ftc-db-data ftc --alias "DATABASE_URL"
dokku postgres:create ftc-db-admin
dokku postgres:link ftc-db-admin ftc --alias "DATABASE_ADMIN_URL"

# elasticsearch
sudo dokku plugin:install https://github.com/dokku/dokku-elasticsearch.git elasticsearch
Expand Down Expand Up @@ -84,8 +87,9 @@ On Dokku server run:

```bash
# setup
dokku run ftc python ./manage.py migrate
dokku run ftc python ./manage.py createcachetable
dokku run ftc python ./manage.py migrate --database=data
dokku run ftc python ./manage.py migrate --database=admin
dokku run ftc python ./manage.py createcachetable --database=admin

# run import
dokku run ftc python ./manage.py charity_setup
Expand Down
2 changes: 2 additions & 0 deletions release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python manage.py migrate --database=data --noinput
python manage.py migrate --database=admin --noinput

0 comments on commit 5778fcf

Please sign in to comment.