From ce2e930e36383feb5c237a5b412fdcea7c7cb519 Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Tue, 2 Jul 2024 17:56:54 -0500 Subject: [PATCH] Use separate ORM model with single JSON column instead of combining with full model object --- naturtag/app/app.py | 9 +- .../controllers/observation_controller.py | 4 +- naturtag/controllers/taxon_controller.py | 7 +- naturtag/controllers/taxon_view.py | 4 +- naturtag/storage/__init__.py | 2 +- naturtag/storage/app_state.py | 157 ++++++++++++++++++ naturtag/storage/settings.py | 24 +-- naturtag/storage/setup.py | 27 +-- naturtag/storage/user_taxa.py | 120 ------------- naturtag/widgets/taxon_images.py | 4 +- 10 files changed, 189 insertions(+), 169 deletions(-) create mode 100644 naturtag/storage/app_state.py delete mode 100644 naturtag/storage/user_taxa.py diff --git a/naturtag/app/app.py b/naturtag/app/app.py index aa91c16b..c4df7dcc 100755 --- a/naturtag/app/app.py +++ b/naturtag/app/app.py @@ -47,9 +47,9 @@ def __init__(self, *args, **kwargs): self.setApplicationVersion(pkg_version('naturtag')) self.setOrganizationName('pyinat') self.setWindowIcon(QIcon(QPixmap(str(APP_ICON)))) - self.settings = Settings.read() def post_init(self): + self.settings = Settings.read() self.log_handler = init_handler( self.settings.log_level, root_level=self.settings.log_level_external, @@ -57,8 +57,7 @@ def post_init(self): ) # Run initial/post-update setup steps, if needed - self.settings.check_version_change() - setup(self.settings) + self.state = setup(self.settings.db_path) # Globally available application objects self.client = iNatDbClient(self.settings.db_path) @@ -71,7 +70,7 @@ class MainWindow(QMainWindow): def __init__(self, app: NaturtagApp): super().__init__() self.setWindowTitle('Naturtag') - self.resize(*app.settings.window_size) + self.resize(*app.state.window_size) self.app = app # Controllers @@ -211,7 +210,7 @@ def check_username(self): def closeEvent(self, _): """Save settings before closing the app""" self.app.settings.write() - self.taxon_controller.user_taxa.write() + self.app.state.write() def info(self, message: str): """Show a message both in the status bar and in the logs""" diff --git a/naturtag/controllers/observation_controller.py b/naturtag/controllers/observation_controller.py index 607150df..9f0df5ce 100644 --- a/naturtag/controllers/observation_controller.py +++ b/naturtag/controllers/observation_controller.py @@ -170,9 +170,9 @@ def get_user_observations(self) -> list[Observation]: observations = self.app.client.observations.get_user_observations( username=self.app.settings.username, - updated_since=self.app.settings.last_obs_check, + updated_since=self.app.state.last_obs_check, limit=DEFAULT_PAGE_SIZE, page=self.page, ) - self.app.settings.set_obs_checkpoint() + self.app.state.set_obs_checkpoint() return observations diff --git a/naturtag/controllers/taxon_controller.py b/naturtag/controllers/taxon_controller.py index 57e1f4ba..03dcfc2b 100644 --- a/naturtag/controllers/taxon_controller.py +++ b/naturtag/controllers/taxon_controller.py @@ -14,7 +14,7 @@ TaxonSearch, get_app, ) -from naturtag.storage import UserTaxa +from naturtag.storage import AppState from naturtag.widgets import HorizontalLayout, TaxonInfoCard, TaxonList, VerticalLayout logger = getLogger(__name__) @@ -27,7 +27,7 @@ class TaxonController(BaseController): def __init__(self): super().__init__() - self.user_taxa = UserTaxa.read(self.app.settings.db_path) + self.user_taxa = self.app.state self.root = HorizontalLayout(self) self.root.setAlignment(Qt.AlignLeft) @@ -118,7 +118,7 @@ class TaxonTabs(QTabWidget): def __init__( self, - user_taxa: UserTaxa, + user_taxa: AppState, parent: Optional[QWidget] = None, ): super().__init__(parent) @@ -222,6 +222,7 @@ def display_observed(self, taxon_counts: TaxonCounts): """After fetching observation taxon counts for the user, add info cards for them""" self.observed.set_taxa(list(taxon_counts)[:MAX_DISPLAY_OBSERVED]) self.user_taxa.update_observed(taxon_counts) + self.user_taxa.write() self.on_load.emit(list(self.observed.cards)) @Slot(Taxon) diff --git a/naturtag/controllers/taxon_view.py b/naturtag/controllers/taxon_view.py index a314a4ee..c7d7b556 100644 --- a/naturtag/controllers/taxon_view.py +++ b/naturtag/controllers/taxon_view.py @@ -11,7 +11,7 @@ from naturtag.app.style import fa_icon from naturtag.constants import SIZE_SM -from naturtag.storage import UserTaxa +from naturtag.storage import AppState from naturtag.widgets import ( GridLayout, HorizontalLayout, @@ -195,7 +195,7 @@ def _update_buttons(self): class TaxonomySection(HorizontalLayout): """Section to display ancestors and children of selected taxon""" - def __init__(self, user_taxa: UserTaxa): + def __init__(self, user_taxa: AppState): super().__init__() self.ancestors_group = self.add_group( diff --git a/naturtag/storage/__init__.py b/naturtag/storage/__init__.py index 3fe97e68..31153d13 100644 --- a/naturtag/storage/__init__.py +++ b/naturtag/storage/__init__.py @@ -1,5 +1,5 @@ # flake8: noqa: F401 +from naturtag.storage.app_state import AppState from naturtag.storage.client import ImageSession, iNatDbClient from naturtag.storage.settings import Settings from naturtag.storage.setup import setup -from naturtag.storage.user_taxa import UserTaxa diff --git a/naturtag/storage/app_state.py b/naturtag/storage/app_state.py new file mode 100644 index 00000000..1b7ec599 --- /dev/null +++ b/naturtag/storage/app_state.py @@ -0,0 +1,157 @@ +from collections import Counter, OrderedDict +from datetime import datetime, timezone +from importlib.metadata import version as pkg_version +from itertools import chain +from logging import getLogger +from pathlib import Path +from typing import Iterable, Optional + +from attr import define, field +from cattrs.preconf import json +from pyinaturalist import TaxonCounts +from pyinaturalist_convert._models import Base +from pyinaturalist_convert.db import create_table, get_session +from sqlalchemy import Column, Integer, delete, select, types +from sqlalchemy.exc import OperationalError + +from naturtag.constants import ( + DB_PATH, + DEFAULT_WINDOW_SIZE, + MAX_DISPLAY_HISTORY, + MAX_DISPLAY_OBSERVED, +) + +JsonConverter = json.make_converter() +logger = getLogger(__name__) + + +@define(auto_attribs=False, slots=False) +class AppState: + """Application state container. This includes values that don't need to be + human-readable/editable; so, they don't need to be stored in settings.yml, and are persisted in + SQLite instead. + """ + + db_path: Path = None # type: ignore + + # Taxonomy browser data + history: list[int] = field(factory=list) + starred: list[int] = field(factory=list) + observed: dict[int, int] = field(factory=dict) + frequent: Counter[int] = None # type: ignore + + # Misc state info + setup_complete: bool = field(default=False) + last_obs_check: Optional[datetime] = field(default=None) + last_version: str = field(default='N/A') + window_size: tuple[int, int] = field(default=DEFAULT_WINDOW_SIZE) + + def __attrs_post_init__(self): + self.frequent = Counter(self.history) + + @property + def display_ids(self) -> set[int]: + """Return top history, frequent, observed, and starred taxa combined. + Returns only unique IDs, since a given taxon may appear in more than one list. + """ + top_ids = [self.top_history, self.top_frequent, self.top_observed, self.starred] + return set(chain.from_iterable(top_ids)) + + @property + def top_history(self) -> list[int]: + """Get the most recently viewed unique taxa""" + return _top_unique_ids(self.history[::-1]) + + @property + def top_frequent(self) -> list[int]: + """Get the most frequently viewed taxa""" + return [t[0] for t in self.frequent.most_common(MAX_DISPLAY_HISTORY)] + + @property + def top_observed(self) -> list[int]: + """Get the most commonly observed taxa""" + return _top_unique_ids(self.observed.keys(), MAX_DISPLAY_OBSERVED) + + def check_version_change(self): + """Check if the app version has changed since the last run""" + current_version = pkg_version('naturtag') + if self.last_version != current_version: + logger.info(f'Updated from {self.last_version} to {current_version}') + self.last_version = current_version + self.setup_complete = False + + def frequent_idx(self, taxon_id: int) -> Optional[int]: + """Return the position of a taxon in the frequent list, if it's in the top + ``MAX_DISPLAY_HISTORY`` taxa. + """ + try: + return self.top_frequent.index(taxon_id) + except ValueError: + return None + + def set_obs_checkpoint(self): + self.last_obs_check = datetime.now(timezone.utc).replace(microsecond=0) + self.write() + + def update_history(self, taxon_id: int): + """Update history and frequent with a new or existing taxon ID""" + self.history.append(taxon_id) + self.frequent.update([taxon_id]) + + def update_observed(self, taxon_counts: TaxonCounts): + self.observed = {t.id: t.count for t in taxon_counts} + + def view_count(self, taxon_id: int) -> int: + """Return the number of times this taxon has been viewed""" + return self.frequent.get(taxon_id, 0) + + def __str__(self): + sizes = [ + f'History: {len(self.history)}', + f'Starred: {len(self.starred)}', + f'Frequent: {len(self.frequent)}', + f'Observed: {len(self.observed)}', + ] + return '\n'.join(sizes) + + @classmethod + def read(cls, db_path: Path = DB_PATH) -> 'AppState': + """Read app state from SQLite database, or return a new instance if no state is found""" + logger.info(f'Reading app state from {db_path}') + + try: + with get_session(db_path) as session: + state_json = session.execute(select(DbAppState)).first()[0].content + except (TypeError, OperationalError): + new_state = AppState() + new_state.db_path = db_path + return new_state + + obj = JsonConverter.structure(state_json, cl=cls) + obj.db_path = db_path + return obj + + def write(self): + """Write app state to SQLite database. Table will be created if it doesn't exist.""" + logger.info(f'Writing app state to {self.db_path}') + create_table(DbAppState, self.db_path) + state_json = JsonConverter.unstructure(self) + with get_session(self.db_path) as session: + session.execute(delete(DbAppState)) + session.add(DbAppState(content=state_json)) + session.commit() + + +@Base.mapped +class DbAppState: + """Application state persisted in SQLite, stored in a single JSON field""" + + __tablename__ = 'app_state' + + id = Column(Integer, default=0, primary_key=True) + content = Column(types.JSON) + + +def _top_unique_ids(ids: Iterable[int], n: int = MAX_DISPLAY_HISTORY) -> list[int]: + """Get the top unique IDs from a list, preserving order""" + return list(OrderedDict.fromkeys(ids))[:n] diff --git a/naturtag/storage/settings.py b/naturtag/storage/settings.py index 851f8e3f..c5b5cc14 100644 --- a/naturtag/storage/settings.py +++ b/naturtag/storage/settings.py @@ -2,17 +2,16 @@ # TODO: Finish and document portable mode / storing config and data in a user-specified path from datetime import datetime -from importlib.metadata import version as pkg_version from logging import getLogger from pathlib import Path from typing import Optional import yaml from attr import define, field -from cattr import Converter -from cattr.preconf import pyyaml +from cattrs import Converter +from cattrs.preconf import pyyaml -from naturtag.constants import APP_DIR, CONFIG_PATH, DEFAULT_WINDOW_SIZE, MAX_DIR_HISTORY, PathOrStr +from naturtag.constants import APP_DIR, CONFIG_PATH, MAX_DIR_HISTORY, PathOrStr logger = getLogger().getChild(__name__) @@ -45,7 +44,6 @@ def doc_field(doc: str = '', **kwargs): class Settings: # Display settings dark_mode: bool = field(default=False) - window_size: tuple[int, int] = field(default=DEFAULT_WINDOW_SIZE) # Logging settings log_level: str = doc_field(default='INFO', doc='Logging level') @@ -92,9 +90,6 @@ class Settings: # Internal debug: bool = field(default=False) - setup_complete: bool = field(default=False) - last_obs_check: Optional[datetime] = field(default=None) - last_version: str = field(default='') n_worker_threads: int = field(default=1) # Shortcuts for application files within the user data dir @@ -155,15 +150,6 @@ def write(self): with open(self.path, 'w') as f: yaml.safe_dump(attrs_dict, f) - def check_version_change(self): - """Check if the app version has changed since the last run""" - current_version = pkg_version('naturtag') - if self.last_version != current_version: - logger.info(f'Updated from {self.last_version} to {current_version}') - self.last_version = current_version - self.setup_complete = False - self.write() - def add_favorite_dir(self, image_dir: Path): if image_dir not in self.favorite_image_dirs: self.favorite_image_dirs.append(image_dir) @@ -188,7 +174,3 @@ def reset_defaults(self): """Reset all settings to defaults""" self.__class__(path=self.path).write() self = self.__class__.read(path=self.path) - - def set_obs_checkpoint(self): - self.last_obs_check = datetime.utcnow().replace(microsecond=0) - self.write() diff --git a/naturtag/storage/setup.py b/naturtag/storage/setup.py index b1e2b177..4f806a13 100644 --- a/naturtag/storage/setup.py +++ b/naturtag/storage/setup.py @@ -5,7 +5,6 @@ from pathlib import Path from tarfile import TarFile from tempfile import TemporaryDirectory -from typing import Optional import requests from pyinaturalist_convert import create_tables, load_table @@ -15,17 +14,17 @@ vacuum_analyze, ) -from naturtag.constants import PACKAGED_TAXON_DB, TAXON_DB_URL -from naturtag.storage import Settings +from naturtag.constants import DB_PATH, PACKAGED_TAXON_DB, TAXON_DB_URL +from naturtag.storage import AppState logger = getLogger().getChild(__name__) def setup( - settings: Optional[Settings] = None, + db_path: Path = DB_PATH, overwrite: bool = False, download: bool = False, -): +) -> AppState: """Run any first-time setup steps, if needed: * Create database tables * Extract packaged taxonomy data and load into SQLite @@ -35,15 +34,16 @@ def setup( Use `download=True` to fetch the missing data. Args: - settings: Existing settings object + db_path: SQLite database path overwrite: Overwrite an existing taxon database, if it already exists download: Download taxon data (full text search + basic taxon details) """ - settings = settings or Settings.read() - db_path = settings.db_path - if settings.setup_complete and not overwrite: + # Check if setup is needed + app_state = AppState.read(db_path) + app_state.check_version_change() + if app_state.setup_complete and not overwrite: logger.debug('Database setup already done') - return + return app_state logger.info('Running database setup') if overwrite: @@ -64,10 +64,11 @@ def setup( create_observation_fts_table(db_path) _load_taxon_db(db_path, download) + app_state.setup_complete = True + app_state.last_obs_check = None + app_state.write() logger.info('Setup complete') - settings.setup_complete = True - settings.last_obs_check = None - settings.write() + return app_state # TODO: Currently this isn't exposed through the UI; requires calling `setup(download=True)` or diff --git a/naturtag/storage/user_taxa.py b/naturtag/storage/user_taxa.py deleted file mode 100644 index f98e7164..00000000 --- a/naturtag/storage/user_taxa.py +++ /dev/null @@ -1,120 +0,0 @@ -from collections import Counter, OrderedDict -from dataclasses import dataclass -from itertools import chain -from logging import getLogger -from pathlib import Path -from typing import Iterable, Optional - -from pyinaturalist import TaxonCounts -from pyinaturalist_convert._models import Base, sa_field -from pyinaturalist_convert.db import get_session -from sqlalchemy import Integer, select, types -from sqlalchemy.orm import reconstructor - -from naturtag.constants import DB_PATH, MAX_DISPLAY_HISTORY, MAX_DISPLAY_OBSERVED - -logger = getLogger(__name__) - - -def _top_unique_ids(ids: Iterable[int], n: int = MAX_DISPLAY_HISTORY) -> list[int]: - """Get the top unique IDs from a list, preserving order""" - return list(OrderedDict.fromkeys(ids))[:n] - - -@Base.mapped -@dataclass -class UserTaxa: - """Relevant taxon IDs stored for the current user, mainly used by taxonomy browser""" - - __tablename__ = 'user_taxa' - __sa_dataclass_metadata_key__ = 'sa' - - id: int = sa_field(Integer, primary_key=True) - history: list[int] = sa_field(types.JSON, default=None) - starred: list[int] = sa_field(types.JSON, default=None) - observed: dict[int, int] = sa_field(types.JSON, default=None) - frequent: Counter[int] = None # type: ignore - - # @property - # def frequent(self) -> Counter[int]: - # if not self._frequent: - # self.frequent = Counter(self.history) - # return self._frequent - - @property - def display_ids(self) -> set[int]: - """Return top history, frequent, observed, and starred taxa combined. - Returns only unique IDs, since a given taxon may appear in more than one list. - """ - top_ids = [self.top_history, self.top_frequent, self.top_observed, self.starred] - return set(chain.from_iterable(top_ids)) - - @reconstructor - def post_init(self): - self.history = self.history or [] - self.starred = self.starred or [] - self.observed = self.observed or {} - self.frequent = Counter(self.history) - - @property - def top_history(self) -> list[int]: - """Get the most recently viewed unique taxa""" - return _top_unique_ids(self.history[::-1]) - - @property - def top_frequent(self) -> list[int]: - """Get the most frequently viewed taxa""" - return [t[0] for t in self.frequent.most_common(MAX_DISPLAY_HISTORY)] - - @property - def top_observed(self) -> list[int]: - """Get the most commonly observed taxa""" - return _top_unique_ids(self.observed.keys(), MAX_DISPLAY_OBSERVED) - - def frequent_idx(self, taxon_id: int) -> Optional[int]: - """Return the position of a taxon in the frequent list, if it's in the top - ``MAX_DISPLAY_HISTORY`` taxa. - """ - try: - return self.top_frequent.index(taxon_id) - except ValueError: - return None - - def view_count(self, taxon_id: int) -> int: - """Return the number of times this taxon has been viewed""" - return self.frequent.get(taxon_id, 0) - - def update_history(self, taxon_id: int): - """Update history and frequent with a new or existing taxon ID""" - self.history.append(taxon_id) - self.frequent.update([taxon_id]) - - def update_observed(self, taxon_counts: TaxonCounts): - self.observed = {t.id: t.count for t in taxon_counts} - self.write() - - def __str__(self): - sizes = [ - f'History: {len(self.history)}', - f'Starred: {len(self.starred)}', - f'Frequent: {len(self.frequent)}', - f'Observed: {len(self.observed)}', - ] - return '\n'.join(sizes) - - # Unconventional for a SQLAlchemy model, but convenient for consistency with Settings class - @classmethod - def read(cls, db_path: Path = DB_PATH) -> 'UserTaxa': - logger.info(f'Reading user taxa from {db_path}') - with get_session(db_path) as session: - user_taxa = session.execute(select(UserTaxa)).first() - if not user_taxa: - user_taxa = UserTaxa(id=0) - user_taxa.post_init() - return user_taxa - - def write(self, db_path: Path = DB_PATH): - logger.info(f'Writing user taxa to {db_path}') - with get_session(db_path) as session: - session.add(self) - session.commit() diff --git a/naturtag/widgets/taxon_images.py b/naturtag/widgets/taxon_images.py index 67cbd0ea..bef55bfc 100644 --- a/naturtag/widgets/taxon_images.py +++ b/naturtag/widgets/taxon_images.py @@ -21,7 +21,7 @@ ) if TYPE_CHECKING: - from naturtag.storage import UserTaxa + from naturtag.storage import AppState ATTRIBUTION_STRIP_PATTERN = re.compile(r',?\s+uploaded by.*') @@ -65,7 +65,7 @@ def __init__(self, taxon: Taxon, user_observations_count: int = 0, delayed_load: class TaxonList(InfoCardList): """A scrollable list of TaxonInfoCards""" - def __init__(self, user_taxa: 'UserTaxa', **kwargs): + def __init__(self, user_taxa: 'AppState', **kwargs): super().__init__(**kwargs) self.user_taxa = user_taxa