From 588ebba0877acdc28aa2c1c7742b8484c27e43ad Mon Sep 17 00:00:00 2001 From: Tom Brown Date: Wed, 16 Aug 2023 00:28:00 -1000 Subject: [PATCH] access version history from memory Read all transactions into memory to display version history. This is a bit ugly, not tested very well but speeds up generating the render cache from about a minute to a few seconds. It is a work-around for https://github.com/kvesteri/sqlalchemy-continuum/issues/332 . --- tourist/continuumutils.py | 98 ++++++++++++++++++++++++++++++++++++ tourist/render_factory.py | 30 ++++++----- tourist/scripts/batchtool.py | 16 ++---- 3 files changed, 121 insertions(+), 23 deletions(-) create mode 100644 tourist/continuumutils.py diff --git a/tourist/continuumutils.py b/tourist/continuumutils.py new file mode 100644 index 0000000..46532e0 --- /dev/null +++ b/tourist/continuumutils.py @@ -0,0 +1,98 @@ +from collections import defaultdict +from typing import Any +from typing import Dict +from typing import List +from typing import Set +from typing import Type + +import attrs +import sqlalchemy +import sqlalchemy_continuum +from more_itertools import last + +from tourist.models import tstore + +PoolVersion = sqlalchemy_continuum.version_class(tstore.Pool) +PlaceVersion = sqlalchemy_continuum.version_class(tstore.Place) +ClubVersion = sqlalchemy_continuum.version_class(tstore.Club) +Transaction = sqlalchemy_continuum.transaction_class(tstore.Club) + + +type_to_version_cls = { + 'place': PlaceVersion, + 'pool': PoolVersion, + 'club': ClubVersion, +} + + + +@attrs.frozen() +class VersionTable: + """In-memory copy of one Version table, built up while replaying transactions + """ + entity_types: Set[str] + version_cls: Type + versions: Dict[int, List] = attrs.field(factory=lambda: defaultdict(list)) + + def add_version_object(self, transaction: Transaction, new_version_obj): + prev_version = last(self.versions[new_version_obj.id], None) + if prev_version: + assert prev_version.end_transaction_id == transaction.id + self.versions[new_version_obj.id].append(new_version_obj) + + +@attrs.frozen() +class VersionTables: + """In memory dump of continuum versions and transactions, created to make iterating through + them run about 60 times faster. There is similar code in `batchtool`. + + TODO(TomGoBravo): add some tests for this + """ + version_tables: Dict[Type, VersionTable] + transaction_user_email: Dict[int, str] = attrs.field(factory=dict) + transaction_issued_at: Dict[int, Any] = attrs.field(factory=dict) + + @staticmethod + def make() -> 'VersionSyncer': + version_tables = {version_cls: VersionTable(entity_types={type_str}, version_cls=version_cls) + for type_str, version_cls in type_to_version_cls.items()} + return VersionTables(version_tables=version_tables) + + def populate(self): + existing_transactions = Transaction.query.all() + for transaction in existing_transactions: + if transaction.user: + self.transaction_user_email[transaction.id] = transaction.user.email + self.transaction_issued_at[transaction.id] = transaction.issued_at + for version_cls, cls_changed_entities in transaction.changed_entities.items(): + for version_obj in cls_changed_entities: + self.version_tables[version_cls].add_version_object(transaction, version_obj) + + def get_object_history(self, obj): + obj_version_type = sqlalchemy_continuum.version_class(obj.__class__) + version_table = self.version_tables[obj_version_type] + return version_table.versions[obj.id] + + +def changeset(current_version, previous_version): + """ + Return a dictionary of changed fields in this version with keys as + field names and values as lists with first value as the old field value + and second list value as the new value. + + This is a very ugly copy of sqlalchemy_continuum.version.VersionClassBase which I created + because accessing the previous version is super slow. + """ + data = {} + + for key in sqlalchemy.inspect(current_version.__class__).columns.keys(): + if sqlalchemy_continuum.utils.is_internal_column(current_version, key): + continue + if not previous_version: + old = None + else: + old = getattr(previous_version, key) + new = getattr(current_version, key) + if old != new: + data[key] = [old, new] + return data diff --git a/tourist/render_factory.py b/tourist/render_factory.py index dcc6ed7..ecabf58 100644 --- a/tourist/render_factory.py +++ b/tourist/render_factory.py @@ -4,6 +4,7 @@ import io import itertools from typing import List, Mapping +from typing import Type from typing import Union from sqlalchemy.util import IdentitySet @@ -13,6 +14,7 @@ import geojson from geoalchemy2.shape import to_shape +from tourist import continuumutils from tourist.models import render from tourist.models import tstore @@ -61,21 +63,23 @@ def _build_render_pool(orm_pool: tstore.Pool) -> render.Pool: ) -def _build_changes(orm_entity: Union[tstore.Place, tstore.Club, tstore.Pool]) -> ( - render.PlaceEntityChanges): +def _build_changes(orm_entity: Union[tstore.Place, tstore.Club, tstore.Pool], versions: + continuumutils.VersionTables) -> (render.PlaceEntityChanges): changes = render.PlaceEntityChanges(entity_name=orm_entity.name) - for v in orm_entity.versions: - user_email = None - if v.transaction.user: - user_email = v.transaction.user.email + prev_v = None + for v in versions.get_object_history(orm_entity): + issued_at = versions.transaction_issued_at[v.transaction_id] + user_email = versions.transaction_user_email.get(v.transaction_id, None) changes.changes.append(render.PlaceEntityChanges.Change( - timestamp=v.transaction.issued_at, user=user_email, - change=str(v.changeset))) + timestamp=issued_at, user=user_email, + change=str(continuumutils.changeset(v, prev_v)))) + prev_v = v return changes -def _build_render_place(orm_place: tstore.Place, source_by_short_name: Mapping[str, render.ClubSource]) -> render.Place: +def _build_render_place(orm_place: tstore.Place, source_by_short_name: Mapping[str, + render.ClubSource], versions: continuumutils.VersionTables) -> (render.Place): children_geojson = orm_place.children_geojson_features if children_geojson: geojson_children_collection = geojson.FeatureCollection(children_geojson) @@ -125,10 +129,10 @@ def _build_render_place(orm_place: tstore.Place, source_by_short_name: Mapping[s entity_changes = None else: recently_updated = None - entity_changes = [_build_changes(orm_place)] + entity_changes = [_build_changes(orm_place, versions)] for child in itertools.chain(orm_place.child_places, orm_place.child_pools, orm_place.child_clubs): - entity_changes.append(_build_changes(child)) + entity_changes.append(_build_changes(child, versions)) return render.Place( @@ -230,9 +234,11 @@ def get_all(cls): all_pools: List[tstore.Pool] = get_all(tstore.Pool) all_sources: List[tstore.Source] = get_all(tstore.Source) source_by_short_name = {s.source_short_name: _build_render_club_source(s) for s in all_sources} + version_tables = continuumutils.VersionTables.make() + version_tables.populate() for place in all_places: - render_place = _build_render_place(place, source_by_short_name) + render_place = _build_render_place(place, source_by_short_name, version_tables) yield tstore.RenderCache(name=RenderName.PLACE_PREFIX.value + place.short_name, value_dict=cattrs.unstructure(render_place)) if place.is_world: diff --git a/tourist/scripts/batchtool.py b/tourist/scripts/batchtool.py index 01b112c..ae6ab9c 100644 --- a/tourist/scripts/batchtool.py +++ b/tourist/scripts/batchtool.py @@ -18,6 +18,11 @@ import tourist from tourist import render_factory +from tourist.continuumutils import ClubVersion +from tourist.continuumutils import PlaceVersion +from tourist.continuumutils import PoolVersion +from tourist.continuumutils import Transaction +from tourist.continuumutils import type_to_version_cls from tourist.models import attrib from tourist.models import tstore from tourist.models.tstore import PAGE_LINK_RE @@ -167,10 +172,6 @@ def incr_column(cls, column_name: str): click.echo('Run with --write to commit changes') -PoolVersion = sqlalchemy_continuum.version_class(tstore.Pool) -PlaceVersion = sqlalchemy_continuum.version_class(tstore.Place) -ClubVersion = sqlalchemy_continuum.version_class(tstore.Club) -Transaction = sqlalchemy_continuum.transaction_class(tstore.Club) operation_type_column_name = sqlalchemy_continuum.utils.option(tstore.Club, 'operation_type_column_name') @@ -266,13 +267,6 @@ def live_versions(self): yield version_obj -type_to_version_cls = { - 'place': PlaceVersion, - 'pool': PoolVersion, - 'club': ClubVersion, -} - - @attr.s(auto_attribs=True) class VersionSyncer: """Creates version history