From 995a092a9690b6c59ee55fcd851d3ad08ba9ad6b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 14:35:02 -0500 Subject: [PATCH 001/124] From core branch, rename 'interfaces' to 'connectors'. Also, add documentation to IDMode, from core branch. --- .../{interfaces => connectors}/__init__.py | 0 .../interfaces/BQFirebaseInterface.py | 0 .../interfaces/BigQueryCodingInterface.py | 0 .../interfaces/BigQueryInterface.py | 0 .../{ => connectors}/interfaces/CSVInterface.py | 0 .../interfaces/CodingInterface.py | 0 .../interfaces/EventInterface.py | 0 .../{ => connectors}/interfaces/Interface.py | 0 .../interfaces/MySQLInterface.py | 0 .../outerfaces/DataOuterface.py | 0 .../outerfaces/DebugOuterface.py | 0 .../outerfaces/DictionaryOuterface.py | 0 .../outerfaces/TSVOuterface.py | 0 .../outerfaces/__init__.py | 0 src/ogd/common/models/enums/IDMode.py | 16 ++++++++++++++++ 15 files changed, 16 insertions(+) rename src/ogd/common/{interfaces => connectors}/__init__.py (100%) rename src/ogd/common/{ => connectors}/interfaces/BQFirebaseInterface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/BigQueryCodingInterface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/BigQueryInterface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/CSVInterface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/CodingInterface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/EventInterface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/Interface.py (100%) rename src/ogd/common/{ => connectors}/interfaces/MySQLInterface.py (100%) rename src/ogd/common/{interfaces => connectors}/outerfaces/DataOuterface.py (100%) rename src/ogd/common/{interfaces => connectors}/outerfaces/DebugOuterface.py (100%) rename src/ogd/common/{interfaces => connectors}/outerfaces/DictionaryOuterface.py (100%) rename src/ogd/common/{interfaces => connectors}/outerfaces/TSVOuterface.py (100%) rename src/ogd/common/{interfaces => connectors}/outerfaces/__init__.py (100%) diff --git a/src/ogd/common/interfaces/__init__.py b/src/ogd/common/connectors/__init__.py similarity index 100% rename from src/ogd/common/interfaces/__init__.py rename to src/ogd/common/connectors/__init__.py diff --git a/src/ogd/common/interfaces/BQFirebaseInterface.py b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py similarity index 100% rename from src/ogd/common/interfaces/BQFirebaseInterface.py rename to src/ogd/common/connectors/interfaces/BQFirebaseInterface.py diff --git a/src/ogd/common/interfaces/BigQueryCodingInterface.py b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py similarity index 100% rename from src/ogd/common/interfaces/BigQueryCodingInterface.py rename to src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py diff --git a/src/ogd/common/interfaces/BigQueryInterface.py b/src/ogd/common/connectors/interfaces/BigQueryInterface.py similarity index 100% rename from src/ogd/common/interfaces/BigQueryInterface.py rename to src/ogd/common/connectors/interfaces/BigQueryInterface.py diff --git a/src/ogd/common/interfaces/CSVInterface.py b/src/ogd/common/connectors/interfaces/CSVInterface.py similarity index 100% rename from src/ogd/common/interfaces/CSVInterface.py rename to src/ogd/common/connectors/interfaces/CSVInterface.py diff --git a/src/ogd/common/interfaces/CodingInterface.py b/src/ogd/common/connectors/interfaces/CodingInterface.py similarity index 100% rename from src/ogd/common/interfaces/CodingInterface.py rename to src/ogd/common/connectors/interfaces/CodingInterface.py diff --git a/src/ogd/common/interfaces/EventInterface.py b/src/ogd/common/connectors/interfaces/EventInterface.py similarity index 100% rename from src/ogd/common/interfaces/EventInterface.py rename to src/ogd/common/connectors/interfaces/EventInterface.py diff --git a/src/ogd/common/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py similarity index 100% rename from src/ogd/common/interfaces/Interface.py rename to src/ogd/common/connectors/interfaces/Interface.py diff --git a/src/ogd/common/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py similarity index 100% rename from src/ogd/common/interfaces/MySQLInterface.py rename to src/ogd/common/connectors/interfaces/MySQLInterface.py diff --git a/src/ogd/common/interfaces/outerfaces/DataOuterface.py b/src/ogd/common/connectors/outerfaces/DataOuterface.py similarity index 100% rename from src/ogd/common/interfaces/outerfaces/DataOuterface.py rename to src/ogd/common/connectors/outerfaces/DataOuterface.py diff --git a/src/ogd/common/interfaces/outerfaces/DebugOuterface.py b/src/ogd/common/connectors/outerfaces/DebugOuterface.py similarity index 100% rename from src/ogd/common/interfaces/outerfaces/DebugOuterface.py rename to src/ogd/common/connectors/outerfaces/DebugOuterface.py diff --git a/src/ogd/common/interfaces/outerfaces/DictionaryOuterface.py b/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py similarity index 100% rename from src/ogd/common/interfaces/outerfaces/DictionaryOuterface.py rename to src/ogd/common/connectors/outerfaces/DictionaryOuterface.py diff --git a/src/ogd/common/interfaces/outerfaces/TSVOuterface.py b/src/ogd/common/connectors/outerfaces/TSVOuterface.py similarity index 100% rename from src/ogd/common/interfaces/outerfaces/TSVOuterface.py rename to src/ogd/common/connectors/outerfaces/TSVOuterface.py diff --git a/src/ogd/common/interfaces/outerfaces/__init__.py b/src/ogd/common/connectors/outerfaces/__init__.py similarity index 100% rename from src/ogd/common/interfaces/outerfaces/__init__.py rename to src/ogd/common/connectors/outerfaces/__init__.py diff --git a/src/ogd/common/models/enums/IDMode.py b/src/ogd/common/models/enums/IDMode.py index 92a5fa5..5b0b724 100644 --- a/src/ogd/common/models/enums/IDMode.py +++ b/src/ogd/common/models/enums/IDMode.py @@ -1,7 +1,23 @@ +"""IDMode Module +""" + # import standard libraries from enum import IntEnum class IDMode(IntEnum): + """Enum representing the different kinds of IDs in OpenGameData. + + Namely: + + * Session IDs + * User IDs (or Player IDs) + * App IDs (or Game IDs) + + :param IntEnum: _description_ + :type IntEnum: _type_ + :return: _description_ + :rtype: _type_ + """ SESSION = 1 USER = 2 GAME = 3 From 45e385cef8f3c73ccef631bb9ab647e412138976 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 14:37:39 -0500 Subject: [PATCH 002/124] Add some documentation strings to Schema base class. --- src/ogd/common/schemas/Schema.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/ogd/common/schemas/Schema.py b/src/ogd/common/schemas/Schema.py index 61b9137..cb94762 100644 --- a/src/ogd/common/schemas/Schema.py +++ b/src/ogd/common/schemas/Schema.py @@ -25,18 +25,38 @@ def __repr__(self): @property @abc.abstractmethod def AsMarkdown(self) -> str: + """Gets a markdown-formatted representation of the schema. + + :return: A markdown-formatted representation of the schema. + :rtype: str + """ pass @property def Name(self) -> str: + """Gets the name of the specific schema represented by the class instance. + + :return: The name of the specific schema represented by the class instance. + :rtype: str + """ return self._name @property def NonStandardElements(self) -> Dict[str, Any]: + """Gets a sub-dictionary of any non-standard schema elements found in the source dictionary for the given schema instance. + + :return: A dictionary of any non-standard schema elements found in the source dictionary for the given schema instance. + :rtype: Dict[str, Any] + """ return self._other_elements @property def NonStandardElementNames(self) -> List[str]: + """Gets a list of names of non-standard schema elements found in the source dictionary for the given schema instance. + + :return: A list of names of non-standard schema elements found in the source dictionary for the given schema instance. + :rtype: List[str] + """ return list(self._other_elements.keys()) @staticmethod From 51fc8d4e92cb809fe0784208b1fd3f9061531905 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 22:33:04 -0500 Subject: [PATCH 003/124] Add StorageConnector base class, from core branch. --- src/ogd/common/connectors/StorageConnector.py | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 src/ogd/common/connectors/StorageConnector.py diff --git a/src/ogd/common/connectors/StorageConnector.py b/src/ogd/common/connectors/StorageConnector.py new file mode 100644 index 0000000..55639ad --- /dev/null +++ b/src/ogd/common/connectors/StorageConnector.py @@ -0,0 +1,105 @@ +"""StorageConnector Module +""" + +# import standard libraries +import abc +import logging + +# import local files +from ogd.core.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.core.utils.Logger import Logger + +class StorageConnector(abc.ABC): + """Base class for all interfaces and outerfaces. + Ensures each inter/outerface can be opened and closed, like most system resources. + + All subclasses must implement the `_open` and `_close` functions. + """ + + # *** ABSTRACTS *** + + @abc.abstractmethod + def _open(self) -> bool: + """Private implementation of the logic for opening a connection to a storage resource + + :return: True if the connection was successful, otherwise False. + :rtype: bool + """ + pass + + @abc.abstractmethod + def _close(self) -> bool: + """Private implementation of the logic for closing a connection to a storage resource + + :return: True if the connection was closed successfully, otherwise False. + :rtype: bool + """ + pass + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, schema:GameSourceSchema): + self._source_schema : GameSourceSchema = schema + self._is_open : bool = False + + def __del__(self): + self.Close() + + @property + def IsOpen(self) -> bool: + """Property to indicate whether a connection with the storage resource is open or not. + + :return: True if there is an open connection to the storage resource, otherwise false. + :rtype: bool + """ + return True if self._is_open else False + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + def Open(self, force_reopen:bool = False) -> bool: + """Function to open the connection to a storage resource. + + If the resource was already open, this function (by default) does nothing. + The type of resource is determined by the implementation of a given interface/outerface class. + + :param force_reopen: Force a re-open of the storage resource, if it was already open. Defaults to False + :type force_reopen: bool, optional + :return: True if the resource was successfully opened (or was already open), otherwise False. + :rtype: bool + """ + if not self.IsOpen: + self._is_open = self._open() + elif force_reopen: + self.Close() + self._is_open = self._open() + Logger.Log(f"Successfully force-reopened {self.__class__}", logging.INFO) + return self.IsOpen + + def Close(self, force_close:bool = False) -> bool: + """Function to close the connection to a storage resource. + + If there was no open connection, this function (by default) does nothing. + + :param force_close: Force an attempt to close the resource, even if there is not a known open connection. Defaults to False + :type force_close: bool, optional + :return: True if the resource was successfully closed (or was not open to begin with), otherwise False. + :rtype: bool + """ + ret_val = True + if self.IsOpen: + ret_val = self._close() + elif force_close: + try: + self._close() + except Exception as err: + Logger.Log(f"Encountered an error while force-closing {self.__class__}:\n{err}", logging.WARNING) + else: + Logger.Log(f"Successfully force-closed {self.__class__}", logging.INFO) + + return ret_val + + # *** PRIVATE STATICS *** + + # *** PRIVATE METHODS *** From 02c68e777beea81029b66d32860f37c6ff56d49b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 22:34:10 -0500 Subject: [PATCH 004/124] Add changes to Interface class from core branch. --- .../common/connectors/interfaces/Interface.py | 135 +++++++++++++++--- 1 file changed, 116 insertions(+), 19 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 0a2683f..90cb3c8 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -1,27 +1,72 @@ +"""DataInterface Module +""" ## import standard libraries import abc -from typing import Any, Dict +import logging +from datetime import datetime +from typing import Dict, List, Optional, Union # import local files from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -class Interface(abc.ABC): +class Interface(StorageConnector): + """Base class for all connectors that serve as an interface to some IO resource. + + All subclasses must implement the `_availableIDs`, `_availableDates`, `_IDsFromDates`, and `_datesFromIDs` functions. + """ # *** ABSTRACTS *** @abc.abstractmethod - def _open(self) -> bool: + def _availableIDs(self, mode:IDMode=IDMode.SESSION) -> List[str]: + """Private implementation of the logic to retrieve all IDs of given mode from the connected storage. + + :param mode: The type of ID to be listed. + :type mode: IDMode + :return: A list of IDs with given mode available through the connected storage. + :rtype: List[str] + """ + pass + + @abc.abstractmethod + def _availableDates(self) -> Dict[str,datetime]: + """Private implementation of the logic to retrieve the full range of dates/times from the connected storage. + + :return: A dict mapping `min` and `max` to the minimum and maximum datetimes + :rtype: Dict[str,datetime] + """ pass @abc.abstractmethod - def _close(self) -> bool: + def _IDsFromDates(self, min:datetime, max:datetime, mode:IDMode=IDMode.SESSION) -> List[str]: + """Private implementation of logic to list IDs of given mode that have data within a range of dates. + + :param min: Earliest date in the range + :type min: datetime + :param max: Latest date in the range + :type max: datetime + :return: A list of IDs of given mode with data falling within the given date range. + :rtype: Optional[List[str]] + """ + pass + + @abc.abstractmethod + def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION) -> Dict[str,datetime]: + """Private implementation of the logic to get a range of dates covering all data for given list of IDs (with given mode). + + :param id_list: The list of IDs, for whose data we want a date range. + :type id_list: List[str] + :param id_mode: The kind of ID to use when interpreting the `id_list`, defaults to IDMode.SESSION + :type id_mode: IDMode, optional + :return: A dictionary mapping `min` and `max` to the range of dates covering all data for the given IDs + :rtype: Union[Dict[str,datetime], Dict[str,None]] + """ pass # *** BUILT-INS & PROPERTIES *** - def __init__(self, config:GameSourceSchema): - self._config : GameSourceSchema = config - self._is_open : bool = False + def __init__(self, schema:GameSourceSchema): + super().__init__(schema=schema) def __del__(self): self.Close() @@ -30,19 +75,71 @@ def __del__(self): # *** PUBLIC METHODS *** - def Open(self, force_reopen:bool = False) -> bool: - if (not self._is_open) or force_reopen: - self._is_open = self._open() - return self._is_open - - def IsOpen(self) -> bool: - return True if self._is_open else False - - def Close(self) -> bool: - if self.IsOpen(): - return self._close() + def AvailableIDs(self, mode:IDMode=IDMode.SESSION) -> Optional[List[str]]: + """Retrieve all IDs of given mode from the connected storage. + + :param mode: The type of ID to be listed. + :type mode: IDMode + :return: A list of IDs with given mode available through the connected storage. + :rtype: List[str] + """ + ret_val = None + if self.IsOpen: + ret_val = self._availableIDs(mode=mode) + else: + Logger.Log(f"Can't retrieve list of all {mode} IDs, the storage connection is not open!", logging.WARNING, depth=3) + return ret_val + + def AvailableDates(self) -> Union[Dict[str,datetime], Dict[str,None]]: + """Retrieve the full range of dates/times covered by data in the connected storage. + + :return: A dictionary mapping `min` and `max` to the min and max datetimes, or to None (if unavailable) + :rtype: Union[Dict[str,datetime], Dict[str,None]] + """ + ret_val = {'min':None, 'max':None} + if self.IsOpen: + ret_val = self._availableDates() else: - return True + Logger.Log("Could not get full date range, the storage connection is not open!", logging.WARNING, depth=3) + return ret_val + + def IDsFromDates(self, min:datetime, max:datetime, mode:IDMode=IDMode.SESSION) -> Optional[List[str]]: + """Get a list of IDs of given mode that have data within a range of dates. + + :param min: Earliest date in the range + :type min: datetime + :param max: Latest date in the range + :type max: datetime + :return: A list of IDs of given mode with data falling within the given date range. + :rtype: Optional[List[str]] + """ + ret_val = None + if not self.IsOpen: + str_min, str_max = min.strftime("%Y%m%d"), max.strftime("%Y%m%d") + Logger.Log(f"Could not retrieve IDs for {str_min}-{str_max}, the source interface is not open!", logging.WARNING, depth=3) + else: + ret_val = self._IDsFromDates(min=min, max=max, mode=mode) + return ret_val + + def DatesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION) -> Union[Dict[str,datetime], Dict[str,None]]: + """Get a range of dates covering all data for given list of IDs (with given mode). + + :param id_list: The list of IDs, for whose data we want a date range. + :type id_list: List[str] + :param id_mode: The kind of ID to use when interpreting the `id_list`, defaults to IDMode.SESSION + :type id_mode: IDMode, optional + :return: A dictionary mapping `min` and `max` to the range of dates covering all data for the given IDs + :rtype: Union[Dict[str,datetime], Dict[str,None]] + """ + ret_val = {'min':None, 'max':None} + if not self.IsOpen: + Logger.Log(f"Could not retrieve date range {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) + else: + Logger.Log(f"Retrieving date range from IDs with {id_mode.name} ID mode.", logging.DEBUG, depth=3) + ret_val = self._datesFromIDs(id_list=id_list, id_mode=id_mode) + return ret_val + + # *** PROPERTIES *** # *** PRIVATE STATICS *** From e0272d10b646766824a6a0b4e80f8bc24c61084c Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 22:34:56 -0500 Subject: [PATCH 005/124] Update imports. --- src/ogd/common/connectors/interfaces/Interface.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 90cb3c8..5c94848 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -7,7 +7,10 @@ from typing import Dict, List, Optional, Union # import local files +from ogd.common.connectors.StorageConnector import StorageConnector +from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.utils.Logger import Logger class Interface(StorageConnector): """Base class for all connectors that serve as an interface to some IO resource. From 6ed35358f84136665ec1cfa4267800d17b529f17 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 22:48:05 -0500 Subject: [PATCH 006/124] Schema reorganization and new schemas from core branch. --- .../BigQuerySourceSchema.py | 0 .../schemas/storage/CredentialSchema.py | 16 + .../DataSourceSchema.py | 0 .../FileSourceSchema.py | 0 .../MySQLSourceSchema.py | 0 .../common/schemas/tables/EventTableSchema.py | 316 ++++++++++++++++++ 6 files changed, 332 insertions(+) rename src/ogd/common/schemas/{configs/data_sources => storage}/BigQuerySourceSchema.py (100%) create mode 100644 src/ogd/common/schemas/storage/CredentialSchema.py rename src/ogd/common/schemas/{configs/data_sources => storage}/DataSourceSchema.py (100%) rename src/ogd/common/schemas/{configs/data_sources => storage}/FileSourceSchema.py (100%) rename src/ogd/common/schemas/{configs/data_sources => storage}/MySQLSourceSchema.py (100%) create mode 100644 src/ogd/common/schemas/tables/EventTableSchema.py diff --git a/src/ogd/common/schemas/configs/data_sources/BigQuerySourceSchema.py b/src/ogd/common/schemas/storage/BigQuerySourceSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/data_sources/BigQuerySourceSchema.py rename to src/ogd/common/schemas/storage/BigQuerySourceSchema.py diff --git a/src/ogd/common/schemas/storage/CredentialSchema.py b/src/ogd/common/schemas/storage/CredentialSchema.py new file mode 100644 index 0000000..f2076b3 --- /dev/null +++ b/src/ogd/common/schemas/storage/CredentialSchema.py @@ -0,0 +1,16 @@ +# import standard libraries +from typing import Any, Dict # , overload +# import local files +from ogd.core.schemas.Schema import Schema + + +class CredentialSchema(Schema): + """Dumb struct to contain data pertaining to credentials for accessing a data source. + + In general, a credential can have a key, or a user-password combination + """ + # @overload + # def __init__(self, name:str, other_elements:Dict[str, Any]): ... + + def __init__(self, name:str, unparsed_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=unparsed_elements) diff --git a/src/ogd/common/schemas/configs/data_sources/DataSourceSchema.py b/src/ogd/common/schemas/storage/DataSourceSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/data_sources/DataSourceSchema.py rename to src/ogd/common/schemas/storage/DataSourceSchema.py diff --git a/src/ogd/common/schemas/configs/data_sources/FileSourceSchema.py b/src/ogd/common/schemas/storage/FileSourceSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/data_sources/FileSourceSchema.py rename to src/ogd/common/schemas/storage/FileSourceSchema.py diff --git a/src/ogd/common/schemas/configs/data_sources/MySQLSourceSchema.py b/src/ogd/common/schemas/storage/MySQLSourceSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/data_sources/MySQLSourceSchema.py rename to src/ogd/common/schemas/storage/MySQLSourceSchema.py diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py new file mode 100644 index 0000000..85a0797 --- /dev/null +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -0,0 +1,316 @@ +"""EventTableSchema Module""" +# import standard libraries +import logging +import re +from datetime import datetime, timedelta, timezone +from json.decoder import JSONDecodeError +from pathlib import Path +from typing import Any, Dict, Tuple, Optional + +# import 3rd-party libraries +from dateutil import parser + +# import local files +from ogd.core import schemas +from ogd.core.schemas.tables.TableSchema import TableSchema +from ogd.core.models.Event import Event, EventSource +from ogd.core.utils import utils +from ogd.core.utils.Logger import Logger +from ogd.core.utils.typing import Map + +## @class TableSchema +# Dumb struct to hold useful info about the structure of database data +# for a particular game. +# This includes the indices of several important database columns, the names +# of the database columns, the max and min levels in the game, and a list of +# IDs for the game sessions in the given requested date range. +class EventTableSchema(TableSchema): + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/"): + """Constructor for the TableSchema class. + Given a database connection and a game data request, + this retrieves a bit of information from the database to fill in the + class variables. + + :param schema_name: The filename for the table schema JSON. + :type schema_name: str + :param schema_path: Path to find the given table schema file, defaults to "./schemas/table_schemas/" + :type schema_path: str, optional + :param is_legacy: [description], defaults to False + :type is_legacy: bool, optional + """ + super().__init__(schema_name=schema_name, schema_path=schema_path) + + @property + def AsMarkdown(self) -> str: + ret_val = "\n\n".join([ + "## Database Columns", + "The individual columns recorded in the database for this game.", + "\n".join([item.AsMarkdown for item in self.Columns]), + "## Event Object Elements", + "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", + self._column_map.AsMarkdown, + ""]) + return ret_val + + @property + def SessionIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.SessionID, int): + ret_val = self.ColumnNames[self._column_map.SessionID] + elif isinstance(self._column_map.SessionID, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.SessionID]) + return ret_val + + @property + def AppIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.AppID, int): + ret_val = self.ColumnNames[self._column_map.AppID] + elif isinstance(self._column_map.AppID, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppID]) + return ret_val + + @property + def TimestampColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.Timestamp, int): + ret_val = self.ColumnNames[self._column_map.Timestamp] + elif isinstance(self._column_map.Timestamp, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.Timestamp]) + return ret_val + + @property + def EventNameColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventName, int): + ret_val = self.ColumnNames[self._column_map.EventName] + elif isinstance(self._column_map.EventName, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventName]) + return ret_val + + @property + def EventDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventData, int): + ret_val = self.ColumnNames[self._column_map.EventData] + elif isinstance(self._column_map.EventData, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventData]) + return ret_val + + @property + def EventSourceColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventSource, int): + ret_val = self.ColumnNames[self._column_map.EventSource] + elif isinstance(self._column_map.EventSource, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSource]) + return ret_val + + @property + def AppVersionColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.AppVersion, int): + ret_val = self.ColumnNames[self._column_map.AppVersion] + elif isinstance(self._column_map.AppVersion, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppVersion]) + return ret_val + + @property + def AppBranchColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.AppBranch, int): + ret_val = self.ColumnNames[self._column_map.AppBranch] + elif isinstance(self._column_map.AppBranch, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppBranch]) + return ret_val + + @property + def LogVersionColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.LogVersion, int): + ret_val = self.ColumnNames[self._column_map.LogVersion] + elif isinstance(self._column_map.LogVersion, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.LogVersion]) + return ret_val + + @property + def TimeOffsetColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.TimeOffset, int): + ret_val = self.ColumnNames[self._column_map.TimeOffset] + elif isinstance(self._column_map.TimeOffset, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.TimeOffset]) + return ret_val + + @property + def UserIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.UserID, int): + ret_val = self.ColumnNames[self._column_map.UserID] + elif isinstance(self._column_map.UserID, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserID]) + return ret_val + + @property + def UserDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.UserData, int): + ret_val = self.ColumnNames[self._column_map.UserData] + elif isinstance(self._column_map.UserData, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserData]) + return ret_val + + @property + def GameStateColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.GameState, int): + ret_val = self.ColumnNames[self._column_map.GameState] + elif isinstance(self._column_map.GameState, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.GameState]) + return ret_val + + @property + def EventSequenceIndexColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventSequenceIndex, int): + ret_val = self.ColumnNames[self._column_map.EventSequenceIndex] + elif isinstance(self._column_map.EventSequenceIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSequenceIndex]) + return ret_val + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + _conversion_warnings = [] + def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): + """Function to convert a row to an Event, based on the loaded schema. + In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. + If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. + Finally, the concatenated values (or single value) are parsed according to the type required by Event. + One exception: For event_data, we expect to create a Dict object, so each column in the list will have its value parsed according to the type in 'columns', + and placed into a dict mapping the original column name to the parsed value (unless the parsed value is a dict, then it is merged into the top-level dict). + + :param row: The raw row data for an event. Generally assumed to be a tuple, though in principle a list would be fine too. + :type row: Tuple[str] + :param concatenator: A string to use as a separator when concatenating multiple columns into a single Event element. + :type concatenator: str + :return: [description] + :rtype: [type] + """ + # define vars to be passed as params + sess_id : str + app_id : str + tstamp : datetime + ename : str + edata : Map + app_ver : str + app_br : str + log_ver : str + offset : Optional[timezone] + uid : Optional[str] + udata : Optional[Map] + state : Optional[Map] + index : Optional[int] + + # 2) Handle event_data parameter, a special case. + # For this case we've got to parse the json, and then fold in whatever other columns were desired. + # 3) Assign vals to our arg vars and pass to Event ctor. + sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) + if not isinstance(sess_id, str): + if "sess_id" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("sess_id") + sess_id = str(sess_id) + + app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) + if not isinstance(app_id, str): + if "app_id" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("app_id") + app_id = str(app_id) + + tstamp = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) + if not isinstance(tstamp, datetime): + if "timestamp" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) + EventTableSchema._conversion_warnings.append("timestamp") + tstamp = TableSchema._convertDateTime(tstamp) + + ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) + if not isinstance(ename, str): + if "ename" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("ename") + ename = str(ename) + + datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) + + # TODO: go bac to isostring function; need 0-padding on ms first, though + edata = dict(sorted(datas.items())) # Sort keys alphabetically + + esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) + if not isinstance(esrc, EventSource): + if "esrc" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) + EventTableSchema._conversion_warnings.append("esrc") + esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME + + app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) + if not isinstance(app_ver, str): + if "app_ver" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("app_ver") + app_ver = str(app_ver) + + app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) + if not isinstance(app_br, str): + if "app_br" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("app_br") + app_br = str(app_br) + + log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) + if not isinstance(log_ver, str): + if "log_ver" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("log_ver") + log_ver = str(log_ver) + + offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) + if isinstance(offset, timedelta): + if "offset" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) + EventTableSchema._conversion_warnings.append("offset") + offset = timezone(offset) + + uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) + if uid is not None and not isinstance(uid, str): + if "uid" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("uid") + uid = str(uid) + + udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) + + state = self._getValueFromRow(row=row, indices=self._column_map.GameState, concatenator=concatenator, fallback=fallbacks.get('game_state')) + + index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) + if index is not None and not isinstance(index, int): + if "index" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) + EventTableSchema._conversion_warnings.append("index") + index = int(index) + + return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, + event_name=ename, event_data=edata, event_source=esrc, + app_version=app_ver, app_branch=app_br, log_version=log_ver, + time_offset=offset, user_id=uid, user_data=udata, + game_state=state, event_sequence_index=index) + + # *** PRIVATE STATICS *** From dda3605e973a8fe392beab108893a42d7843a769 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 22:51:56 -0500 Subject: [PATCH 007/124] Add most of the remaining changes from core branch. --- .../schemas/configs/GameSourceSchema.py | 8 +- .../schemas/storage/DataSourceSchema.py | 61 ++-- src/ogd/common/schemas/tables/TableSchema.py | 265 ------------------ src/ogd/common/schemas/tables/__init__.py | 4 +- 4 files changed, 49 insertions(+), 289 deletions(-) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index 0019ffe..3f68d59 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -34,14 +34,12 @@ def __init__(self, name:str, all_elements:Dict[str, Any], data_sources:Dict[str, Logger.Log(f"For {name} Game Source config, all_elements was not a dict, defaulting to empty dict", logging.WARN) if "source" in all_elements.keys(): self._source_name = GameSourceSchema._parseSource(all_elements["source"]) + if self._source_name in data_sources.keys(): + self._source_schema = data_sources[self._source_name] else: self._source_name = "UNKNOWN" - Logger.Log(f"{name} config does not have a 'source' element; defaulting to source_name={self._source_name}", logging.WARN) - if self._source_name in data_sources.keys(): - self._source_schema = data_sources[self._source_name] - else: self._source_schema = None - Logger.Log(f"{name} config's 'source' name ({self._source_name}) was not found in available source schemas; defaulting to source_schema={self._source_schema}", logging.WARN) + Logger.Log(f"{name} config does not have a 'source' element; defaulting to source_name={self._source_name}, source_schema={self._source_schema}", logging.WARN) if "database" in all_elements.keys(): self._db_name = GameSourceSchema._parseDBName(all_elements["database"]) else: diff --git a/src/ogd/common/schemas/storage/DataSourceSchema.py b/src/ogd/common/schemas/storage/DataSourceSchema.py index f6a9051..9c95d51 100644 --- a/src/ogd/common/schemas/storage/DataSourceSchema.py +++ b/src/ogd/common/schemas/storage/DataSourceSchema.py @@ -1,26 +1,41 @@ # import standard libraries import abc import logging -from typing import Any, Dict +from pathlib import Path +from typing import Any, Dict # , overload # import local files from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger class DataSourceSchema(Schema): - def __init__(self, name:str, other_elements:Dict[str, Any]): - self._db_type : str - if not isinstance(other_elements, dict): - other_elements = {} + """Dumb struct to contain data pertaining to a data source, which a StorageConnector can connect to. + + Every source has: + - A named "type" to inform what StorageConnector should be instantiated + - A config "name" for use within ogd software for identifying a particular data source config + - A resource "location" for use by the StorageConnector (such as a filename, cloud project name, or database host) + """ + # @overload + # def __init__(self, name:str, other_elements:Dict[str, Any]): ... + + def __init__(self, name:str, unparsed_elements:Dict[str, Any] | Any): + self._source_type : str + # 1. Ensure we've actually got a dict to parse from + if not isinstance(unparsed_elements, dict): + unparsed_elements = {} Logger.Log(f"For {name} Data Source config, other_elements was not a dict, defaulting to empty dict", logging.WARN) - # Parse DB info - if "DB_TYPE" in other_elements.keys(): - self._db_type = DataSourceSchema._parseDBType(other_elements["DB_TYPE"]) + # 2. Parse standard elements, with legacy elements nested under "else" case. + if "SOURCE_TYPE" in unparsed_elements.keys(): + self._source_type = DataSourceSchema._parseSourceType(unparsed_elements["SOURCE_TYPE"]) else: - self._db_type = "UNKNOWN" - Logger.Log(f"{name} config does not have a 'DB_TYPE' element; defaulting to db_host={self._db_type}", logging.WARN) + if "DB_TYPE" in unparsed_elements.keys(): + self._source_type = DataSourceSchema._parseSourceType(unparsed_elements["DB_TYPE"]) + else: + self._source_type = "UNKNOWN" + Logger.Log(f"{name} config does not have a 'SOURCE_TYPE' element; defaulting to db_name={self._source_type}", logging.WARN) - _used = {"DB_TYPE"} - _leftovers = { key : val for key,val in other_elements.items() if key not in _used } + _used = {"SOURCE_TYPE", "DB_TYPE"} + _leftovers = { key : val for key,val in unparsed_elements.items() if key not in _used } super().__init__(name=name, other_elements=_leftovers) @property @@ -32,7 +47,17 @@ def Type(self) -> str: :return: A string describing the type of the data source :rtype: str """ - return self._db_type + return self._source_type + + @property + @abc.abstractmethod + def Location(self) -> str | Path: + pass + + @property + @abc.abstractmethod + def Credential(self) -> CredentialSchema: + pass @property @abc.abstractmethod @@ -40,11 +65,11 @@ def AsConnectionInfo(self) -> str: pass @staticmethod - def _parseDBType(db_type) -> str: + def _parseSourceType(source_type) -> str: ret_val : str - if isinstance(db_type, str): - ret_val = db_type + if isinstance(source_type, str): + ret_val = source_type else: - ret_val = str(db_type) - Logger.Log(f"Data Source DB type was unexpected type {type(db_type)}, defaulting to str(db_type)={ret_val}.", logging.WARN) + ret_val = str(source_type) + Logger.Log(f"Data Source typename was unexpected type {type(source_type)}, defaulting to str(source_type)={ret_val}.", logging.WARN) return ret_val diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index fdeadff..2a6bb14 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -69,277 +69,12 @@ def ColumnNames(self) -> List[str]: def Columns(self) -> List[ColumnSchema]: return self._columns - @property - def AsMarkdown(self) -> str: - ret_val = "\n\n".join([ - "## Database Columns", - "The individual columns recorded in the database for this game.", - "\n".join([item.AsMarkdown for item in self.Columns]), - "## Event Object Elements", - "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", - self._column_map.AsMarkdown, - ""]) - return ret_val - - @property - def SessionIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.SessionID, int): - ret_val = self.ColumnNames[self._column_map.SessionID] - elif isinstance(self._column_map.SessionID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.SessionID]) - return ret_val - - @property - def AppIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.AppID, int): - ret_val = self.ColumnNames[self._column_map.AppID] - elif isinstance(self._column_map.AppID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppID]) - return ret_val - - @property - def TimestampColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.Timestamp, int): - ret_val = self.ColumnNames[self._column_map.Timestamp] - elif isinstance(self._column_map.Timestamp, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.Timestamp]) - return ret_val - - @property - def EventNameColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.EventName, int): - ret_val = self.ColumnNames[self._column_map.EventName] - elif isinstance(self._column_map.EventName, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventName]) - return ret_val - - @property - def EventDataColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.EventData, int): - ret_val = self.ColumnNames[self._column_map.EventData] - elif isinstance(self._column_map.EventData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventData]) - return ret_val - - @property - def EventSourceColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.EventSource, int): - ret_val = self.ColumnNames[self._column_map.EventSource] - elif isinstance(self._column_map.EventSource, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSource]) - return ret_val - - @property - def AppVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.AppVersion, int): - ret_val = self.ColumnNames[self._column_map.AppVersion] - elif isinstance(self._column_map.AppVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppVersion]) - return ret_val - - @property - def AppBranchColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.AppBranch, int): - ret_val = self.ColumnNames[self._column_map.AppBranch] - elif isinstance(self._column_map.AppBranch, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppBranch]) - return ret_val - - @property - def LogVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.LogVersion, int): - ret_val = self.ColumnNames[self._column_map.LogVersion] - elif isinstance(self._column_map.LogVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.LogVersion]) - return ret_val - - @property - def TimeOffsetColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.TimeOffset, int): - ret_val = self.ColumnNames[self._column_map.TimeOffset] - elif isinstance(self._column_map.TimeOffset, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.TimeOffset]) - return ret_val - - @property - def UserIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.UserID, int): - ret_val = self.ColumnNames[self._column_map.UserID] - elif isinstance(self._column_map.UserID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserID]) - return ret_val - - @property - def UserDataColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.UserData, int): - ret_val = self.ColumnNames[self._column_map.UserData] - elif isinstance(self._column_map.UserData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserData]) - return ret_val - - @property - def GameStateColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.GameState, int): - ret_val = self.ColumnNames[self._column_map.GameState] - elif isinstance(self._column_map.GameState, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.GameState]) - return ret_val - - @property - def EventSequenceIndexColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.EventSequenceIndex, int): - ret_val = self.ColumnNames[self._column_map.EventSequenceIndex] - elif isinstance(self._column_map.EventSequenceIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSequenceIndex]) - return ret_val - # *** IMPLEMENT ABSTRACT FUNCTIONS *** # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** - _conversion_warnings = [] - def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): - """Function to convert a row to an Event, based on the loaded schema. - In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. - If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. - Finally, the concatenated values (or single value) are parsed according to the type required by Event. - One exception: For event_data, we expect to create a Dict object, so each column in the list will have its value parsed according to the type in 'columns', - and placed into a dict mapping the original column name to the parsed value (unless the parsed value is a dict, then it is merged into the top-level dict). - - :param row: The raw row data for an event. Generally assumed to be a tuple, though in principle a list would be fine too. - :type row: Tuple[str] - :param concatenator: A string to use as a separator when concatenating multiple columns into a single Event element. - :type concatenator: str - :return: [description] - :rtype: [type] - """ - # define vars to be passed as params - MAX_WARNINGS : Final[int] = 10 - sess_id : str - app_id : str - time : datetime - ename : str - edata : Map - app_ver : str - app_br : str - log_ver : str - offset : Optional[timezone] - uid : Optional[str] - udata : Optional[Map] - state : Optional[Map] - index : Optional[int] - - # 2) Handle event_data parameter, a special case. - # For this case we've got to parse the json, and then fold in whatever other columns were desired. - # 3) Assign vals to our arg vars and pass to Event ctor. - sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) - if not isinstance(sess_id, str): - if "sess_id" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) - TableSchema._conversion_warnings.append("sess_id") - sess_id = str(sess_id) - - app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) - if not isinstance(app_id, str): - if "app_id" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) - TableSchema._conversion_warnings.append("app_id") - app_id = str(app_id) - - time = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) - if not isinstance(time, datetime): - if "timestamp" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(time)}, but timestamp should be a datetime", logging.WARN) - TableSchema._conversion_warnings.append("timestamp") - time = TableSchema._convertDateTime(time) - - ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) - if not isinstance(ename, str): - if "ename" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) - TableSchema._conversion_warnings.append("ename") - ename = str(ename) - - datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) - - # TODO: go bac to isostring function; need 0-padding on ms first, though - edata = dict(sorted(datas.items())) # Sort keys alphabetically - - esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) - if not isinstance(esrc, EventSource): - if "esrc" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) - TableSchema._conversion_warnings.append("esrc") - esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME - - app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) - if not isinstance(app_ver, str): - if "app_ver" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) - TableSchema._conversion_warnings.append("app_ver") - app_ver = str(app_ver) - - app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) - if not isinstance(app_br, str): - if "app_br" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) - TableSchema._conversion_warnings.append("app_br") - app_br = str(app_br) - - log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) - if not isinstance(log_ver, str): - if "log_ver" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) - TableSchema._conversion_warnings.append("log_ver") - log_ver = str(log_ver) - - offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) - if isinstance(offset, timedelta): - if "offset" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) - TableSchema._conversion_warnings.append("offset") - offset = timezone(offset) - - uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) - if uid is not None and not isinstance(uid, str): - if "uid" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) - TableSchema._conversion_warnings.append("uid") - uid = str(uid) - - udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) - - state = self._getValueFromRow(row=row, indices=self._column_map.GameState, concatenator=concatenator, fallback=fallbacks.get('game_state')) - - index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) - if index is not None and not isinstance(index, int): - if "index" not in TableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) - TableSchema._conversion_warnings.append("index") - index = int(index) - - return Event(session_id=sess_id, app_id=app_id, timestamp=time, - event_name=ename, event_data=edata, event_source=esrc, - app_version=app_ver, app_branch=app_br, log_version=log_ver, - time_offset=offset, user_id=uid, user_data=udata, - game_state=state, event_sequence_index=index) - # *** PRIVATE STATICS *** @staticmethod diff --git a/src/ogd/common/schemas/tables/__init__.py b/src/ogd/common/schemas/tables/__init__.py index e5ef7d4..5ba342e 100644 --- a/src/ogd/common/schemas/tables/__init__.py +++ b/src/ogd/common/schemas/tables/__init__.py @@ -1,5 +1,7 @@ __all__ = [ "TableSchema", + "EventTableSchema" ] -from . import TableSchema \ No newline at end of file +from . import TableSchema +from . import EventTableSchema \ No newline at end of file From 550f39552a309b350383cb1604347303bab1c8a8 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 3 Oct 2024 22:55:44 -0500 Subject: [PATCH 008/124] Sort out updates to imports. --- src/ogd/common/schemas/configs/GameSourceSchema.py | 4 ++-- src/ogd/common/schemas/storage/DataSourceSchema.py | 2 ++ src/ogd/common/schemas/tables/TableSchema.py | 2 +- tests/cases/t_interfaces/t_CSVInterface.py | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index 3f68d59..1084eda 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -1,8 +1,8 @@ # import standard libraries import logging -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/schemas/storage/DataSourceSchema.py b/src/ogd/common/schemas/storage/DataSourceSchema.py index 9c95d51..03328ca 100644 --- a/src/ogd/common/schemas/storage/DataSourceSchema.py +++ b/src/ogd/common/schemas/storage/DataSourceSchema.py @@ -5,8 +5,10 @@ from typing import Any, Dict # , overload # import local files from ogd.common.schemas.Schema import Schema +from ogd.common.schemas.storage.CredentialSchema import CredentialSchema from ogd.common.utils.Logger import Logger + class DataSourceSchema(Schema): """Dumb struct to contain data pertaining to a data source, which a StorageConnector can connect to. diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 2a6bb14..8a0c178 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Any, Dict, Final, List, Tuple, Optional, Union ## import local files -from ogd.core import schemas +from ogd.common import schemas from ogd.common.models.Event import Event, EventSource from ogd.common.schemas.tables.ColumnMapSchema import ColumnMapSchema from ogd.common.schemas.tables.ColumnSchema import ColumnSchema diff --git a/tests/cases/t_interfaces/t_CSVInterface.py b/tests/cases/t_interfaces/t_CSVInterface.py index 925435b..f9886ec 100644 --- a/tests/cases/t_interfaces/t_CSVInterface.py +++ b/tests/cases/t_interfaces/t_CSVInterface.py @@ -5,7 +5,7 @@ from unittest import TestCase from zipfile import ZipFile # import locals -from ogd.common.interfaces.CSVInterface import CSVInterface +from ogd.common.connectors.interfaces.CSVInterface import CSVInterface from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema class t_CSVInterface(TestCase): From ae1c9ab263214c921b621a75ec181d6f85d37fe4 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 9 Nov 2024 17:20:24 -0500 Subject: [PATCH 009/124] Had a couple imports referring to core. --- src/ogd/common/connectors/StorageConnector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/connectors/StorageConnector.py b/src/ogd/common/connectors/StorageConnector.py index 55639ad..624fd0c 100644 --- a/src/ogd/common/connectors/StorageConnector.py +++ b/src/ogd/common/connectors/StorageConnector.py @@ -6,8 +6,8 @@ import logging # import local files -from ogd.core.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.core.utils.Logger import Logger +from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.utils.Logger import Logger class StorageConnector(abc.ABC): """Base class for all interfaces and outerfaces. From 3388e485c04dafd88cc7abb4a7caba1d28131a00 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 9 Nov 2024 17:21:57 -0500 Subject: [PATCH 010/124] Update other places where we had imports pointing to core. --- src/ogd/common/schemas/storage/CredentialSchema.py | 2 +- src/ogd/common/schemas/tables/EventTableSchema.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ogd/common/schemas/storage/CredentialSchema.py b/src/ogd/common/schemas/storage/CredentialSchema.py index f2076b3..9ed4211 100644 --- a/src/ogd/common/schemas/storage/CredentialSchema.py +++ b/src/ogd/common/schemas/storage/CredentialSchema.py @@ -1,7 +1,7 @@ # import standard libraries from typing import Any, Dict # , overload # import local files -from ogd.core.schemas.Schema import Schema +from ogd.common.schemas.Schema import Schema class CredentialSchema(Schema): diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 85a0797..77ed716 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -11,12 +11,12 @@ from dateutil import parser # import local files -from ogd.core import schemas -from ogd.core.schemas.tables.TableSchema import TableSchema -from ogd.core.models.Event import Event, EventSource -from ogd.core.utils import utils -from ogd.core.utils.Logger import Logger -from ogd.core.utils.typing import Map +from ogd.common import schemas +from ogd.common.schemas.tables.TableSchema import TableSchema +from ogd.common.models.Event import Event, EventSource +from ogd.common.utils import utils +from ogd.common.utils.Logger import Logger +from ogd.common.utils.typing import Map ## @class TableSchema # Dumb struct to hold useful info about the structure of database data From be70a620a8587f07bb4fce41d7472e2732fa04a9 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 20 Nov 2024 21:55:39 -0600 Subject: [PATCH 011/124] Work in progress for the handling of date and version filters when getting list of IDs. --- src/ogd/common/connectors/interfaces/Interface.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 5c94848..2c62494 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -4,12 +4,13 @@ import abc import logging from datetime import datetime -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Tuple, Union # import local files from ogd.common.connectors.StorageConnector import StorageConnector from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.utils.SemanticVersion import SemanticVersion from ogd.common.utils.Logger import Logger class Interface(StorageConnector): @@ -21,7 +22,7 @@ class Interface(StorageConnector): # *** ABSTRACTS *** @abc.abstractmethod - def _availableIDs(self, mode:IDMode=IDMode.SESSION) -> List[str]: + def _availableIDs(self, mode:IDMode=IDMode.SESSION, date_range:Optional[Tuple[datetime, datetime]]=None, log_versions:Optional[List[int] | Tuple[int, int]]=None, app_versions:Optional[List[SemanticVersion] | Tuple[SemanticVersion, SemanticVersion]]=None) -> List[str]: """Private implementation of the logic to retrieve all IDs of given mode from the connected storage. :param mode: The type of ID to be listed. From 2ccb4a623560aca455a5e75227a358eed6a31954 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 15:10:32 -0600 Subject: [PATCH 012/124] Add an init file for the filter collections. --- .../common/connectors/filters/collections/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 src/ogd/common/connectors/filters/collections/__init__.py diff --git a/src/ogd/common/connectors/filters/collections/__init__.py b/src/ogd/common/connectors/filters/collections/__init__.py new file mode 100644 index 0000000..d6b673f --- /dev/null +++ b/src/ogd/common/connectors/filters/collections/__init__.py @@ -0,0 +1,11 @@ +__all__ = [ + "EventFilterCollection", + "IDFilterCollection", + "TimingFilterCollection", + "VersioningFilterCollection" +] + +from .EventFilterCollection import EventFilterCollection +from .IDFilterCollection import IDFilterCollection +from .TimingFilterCollection import TimingFilterCollection +from .VersioningFilterCollection import VersioningFilterCollection \ No newline at end of file From 59bdcc8a16e3f0d808d63977fac72f231aa5a19c Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 15:20:14 -0600 Subject: [PATCH 013/124] Add an enum for the types of versions that exist. --- src/ogd/common/models/enums/VersionType.py | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/ogd/common/models/enums/VersionType.py diff --git a/src/ogd/common/models/enums/VersionType.py b/src/ogd/common/models/enums/VersionType.py new file mode 100644 index 0000000..4002050 --- /dev/null +++ b/src/ogd/common/models/enums/VersionType.py @@ -0,0 +1,26 @@ +"""VersionType Module +""" + +# import standard libraries +from enum import IntEnum + +class VersionType(IntEnum): + """Enum representing the different kinds of versioning in OpenGameData. + + Namely: + + * Log Version + * App Version + * App Branch + + :param IntEnum: _description_ + :type IntEnum: _type_ + :return: _description_ + :rtype: _type_ + """ + LOG = 1 + APP = 2 + BRANCH = 3 + + def __str__(self): + return self.name From 14190fb8ee78b0a6a1e1499743f927431d0103b5 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 18:57:25 -0600 Subject: [PATCH 014/124] Add a ResourceName property, which for now at least is just the name attached to GameSourceSchema. --- src/ogd/common/connectors/StorageConnector.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ogd/common/connectors/StorageConnector.py b/src/ogd/common/connectors/StorageConnector.py index 624fd0c..3de2be3 100644 --- a/src/ogd/common/connectors/StorageConnector.py +++ b/src/ogd/common/connectors/StorageConnector.py @@ -54,6 +54,10 @@ def IsOpen(self) -> bool: """ return True if self._is_open else False + @property + def ResourceName(self) -> str: + return self._source_schema.Name + # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** From dc7c0144ffcd0b0662ded87ddf871adb4d49e95b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 18:58:54 -0600 Subject: [PATCH 015/124] Update the AvailableX functions in Interface base class. --- .../common/connectors/interfaces/Interface.py | 105 +++++++----------- 1 file changed, 42 insertions(+), 63 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 2c62494..0095e15 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -7,8 +7,10 @@ from typing import Dict, List, Optional, Tuple, Union # import local files +from ogd.common.connectors.filters.collections import * from ogd.common.connectors.StorageConnector import StorageConnector from ogd.common.models.enums.IDMode import IDMode +from ogd.common.models.enums.VersionType import VersionType from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.SemanticVersion import SemanticVersion from ogd.common.utils.Logger import Logger @@ -22,7 +24,7 @@ class Interface(StorageConnector): # *** ABSTRACTS *** @abc.abstractmethod - def _availableIDs(self, mode:IDMode=IDMode.SESSION, date_range:Optional[Tuple[datetime, datetime]]=None, log_versions:Optional[List[int] | Tuple[int, int]]=None, app_versions:Optional[List[SemanticVersion] | Tuple[SemanticVersion, SemanticVersion]]=None) -> List[str]: + def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[str]: """Private implementation of the logic to retrieve all IDs of given mode from the connected storage. :param mode: The type of ID to be listed. @@ -33,7 +35,7 @@ def _availableIDs(self, mode:IDMode=IDMode.SESSION, date_range:Optional[Tuple[da pass @abc.abstractmethod - def _availableDates(self) -> Dict[str,datetime]: + def _availableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Dict[str,datetime]: """Private implementation of the logic to retrieve the full range of dates/times from the connected storage. :return: A dict mapping `min` and `max` to the minimum and maximum datetimes @@ -42,29 +44,7 @@ def _availableDates(self) -> Dict[str,datetime]: pass @abc.abstractmethod - def _IDsFromDates(self, min:datetime, max:datetime, mode:IDMode=IDMode.SESSION) -> List[str]: - """Private implementation of logic to list IDs of given mode that have data within a range of dates. - - :param min: Earliest date in the range - :type min: datetime - :param max: Latest date in the range - :type max: datetime - :return: A list of IDs of given mode with data falling within the given date range. - :rtype: Optional[List[str]] - """ - pass - - @abc.abstractmethod - def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION) -> Dict[str,datetime]: - """Private implementation of the logic to get a range of dates covering all data for given list of IDs (with given mode). - - :param id_list: The list of IDs, for whose data we want a date range. - :type id_list: List[str] - :param id_mode: The kind of ID to use when interpreting the `id_list`, defaults to IDMode.SESSION - :type id_mode: IDMode, optional - :return: A dictionary mapping `min` and `max` to the range of dates covering all data for the given IDs - :rtype: Union[Dict[str,datetime], Dict[str,None]] - """ + def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: pass # *** BUILT-INS & PROPERTIES *** @@ -79,7 +59,7 @@ def __del__(self): # *** PUBLIC METHODS *** - def AvailableIDs(self, mode:IDMode=IDMode.SESSION) -> Optional[List[str]]: + def AvailableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> Optional[List[str]]: """Retrieve all IDs of given mode from the connected storage. :param mode: The type of ID to be listed. @@ -89,58 +69,57 @@ def AvailableIDs(self, mode:IDMode=IDMode.SESSION) -> Optional[List[str]]: """ ret_val = None if self.IsOpen: - ret_val = self._availableIDs(mode=mode) + _date_clause = f" on date(s) {date_filter}" + _version_clause = f" with version(s) {version_filter}" + _msg = f"Retrieving IDs with {mode} ID mode{_date_clause}{_version_clause} from {self.ResourceName}." + Logger.Log(_msg, logging.DEBUG, depth=3) + ret_val = self._availableIDs(mode=mode, date_filter=date_filter, version_filter=version_filter) else: - Logger.Log(f"Can't retrieve list of all {mode} IDs, the storage connection is not open!", logging.WARNING, depth=3) + Logger.Log(f"Can't retrieve list of {mode} IDs from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val - def AvailableDates(self) -> Union[Dict[str,datetime], Dict[str,None]]: - """Retrieve the full range of dates/times covered by data in the connected storage. + def AvailableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Union[Dict[str,datetime], Dict[str,None]]: + """Retrieve the full range of dates/times covered by data in the connected storage, subject to given filters. + + Note, this is different from listing the exact dates in which the data exists. + This function gets the range from the earliest instance of an event matching the filters, to the last such instance. + + TODO: Create separate functions for exact dates and date range. - :return: A dictionary mapping `min` and `max` to the min and max datetimes, or to None (if unavailable) + :return: A dictionary mapping `min` and `max` to the range of dates covering all data for the given IDs/versions :rtype: Union[Dict[str,datetime], Dict[str,None]] """ ret_val = {'min':None, 'max':None} if self.IsOpen: - ret_val = self._availableDates() + _version_clause = f" with version(s) {version_filter}" + _msg = f"Retrieving range of event/feature dates{_version_clause} from {self.ResourceName}." + Logger.Log(_msg, logging.DEBUG, depth=3) + ret_val = self._availableDates(id_filter=id_filter, version_filter=version_filter) else: - Logger.Log("Could not get full date range, the storage connection is not open!", logging.WARNING, depth=3) + Logger.Log(f"Could not get full date range from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val - def IDsFromDates(self, min:datetime, max:datetime, mode:IDMode=IDMode.SESSION) -> Optional[List[str]]: - """Get a list of IDs of given mode that have data within a range of dates. - :param min: Earliest date in the range - :type min: datetime - :param max: Latest date in the range - :type max: datetime - :return: A list of IDs of given mode with data falling within the given date range. - :rtype: Optional[List[str]] - """ - ret_val = None - if not self.IsOpen: - str_min, str_max = min.strftime("%Y%m%d"), max.strftime("%Y%m%d") - Logger.Log(f"Could not retrieve IDs for {str_min}-{str_max}, the source interface is not open!", logging.WARNING, depth=3) - else: - ret_val = self._IDsFromDates(min=min, max=max, mode=mode) - return ret_val + def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: + """Get a list of all versions of given type in the connected storage, subject to ID and date filters. - def DatesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION) -> Union[Dict[str,datetime], Dict[str,None]]: - """Get a range of dates covering all data for given list of IDs (with given mode). - - :param id_list: The list of IDs, for whose data we want a date range. - :type id_list: List[str] - :param id_mode: The kind of ID to use when interpreting the `id_list`, defaults to IDMode.SESSION - :type id_mode: IDMode, optional - :return: A dictionary mapping `min` and `max` to the range of dates covering all data for the given IDs - :rtype: Union[Dict[str,datetime], Dict[str,None]] + :param mode: _description_ + :type mode: VersionType + :param id_filter: _description_ + :type id_filter: IDFilterCollection + :param date_filter: _description_ + :type date_filter: TimingFilterCollection + :return: _description_ + :rtype: List[SemanticVersion | str] """ - ret_val = {'min':None, 'max':None} - if not self.IsOpen: - Logger.Log(f"Could not retrieve date range {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) + ret_val = [] + if self.IsOpen: + _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving data versions{_date_clause} from {self.ResourceName}." + Logger.Log(_msg, logging.DEBUG, depth=3) + ret_val = self._availableVersions(mode=mode, id_filter=id_filter, date_filter=date_filter) else: - Logger.Log(f"Retrieving date range from IDs with {id_mode.name} ID mode.", logging.DEBUG, depth=3) - ret_val = self._datesFromIDs(id_list=id_list, id_mode=id_mode) + Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val # *** PROPERTIES *** From e88d6abb9d8b4d1539050f880c10bbbe0020e2da Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 20:34:42 -0600 Subject: [PATCH 016/124] When getting available versions/dates/ids, log the fact that we're doing it at INFO level. --- src/ogd/common/connectors/interfaces/Interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 0095e15..a235034 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -72,7 +72,7 @@ def AvailableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_ _date_clause = f" on date(s) {date_filter}" _version_clause = f" with version(s) {version_filter}" _msg = f"Retrieving IDs with {mode} ID mode{_date_clause}{_version_clause} from {self.ResourceName}." - Logger.Log(_msg, logging.DEBUG, depth=3) + Logger.Log(_msg, logging.INFO, depth=3) ret_val = self._availableIDs(mode=mode, date_filter=date_filter, version_filter=version_filter) else: Logger.Log(f"Can't retrieve list of {mode} IDs from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) @@ -93,7 +93,7 @@ def AvailableDates(self, id_filter:IDFilterCollection, version_filter:Versioning if self.IsOpen: _version_clause = f" with version(s) {version_filter}" _msg = f"Retrieving range of event/feature dates{_version_clause} from {self.ResourceName}." - Logger.Log(_msg, logging.DEBUG, depth=3) + Logger.Log(_msg, logging.INFO, depth=3) ret_val = self._availableDates(id_filter=id_filter, version_filter=version_filter) else: Logger.Log(f"Could not get full date range from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) @@ -116,7 +116,7 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date if self.IsOpen: _date_clause = f" on date(s) {date_filter}" _msg = f"Retrieving data versions{_date_clause} from {self.ResourceName}." - Logger.Log(_msg, logging.DEBUG, depth=3) + Logger.Log(_msg, logging.INFO, depth=3) ret_val = self._availableVersions(mode=mode, id_filter=id_filter, date_filter=date_filter) else: Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) From a1b9b428fd85d4beb9c56fa2211dd7fe8b3268f3 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:04:55 -0600 Subject: [PATCH 017/124] Add basic versions of the GetXCollection functions. --- .../common/connectors/interfaces/Interface.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index a235034..c4763df 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -9,6 +9,8 @@ # import local files from ogd.common.connectors.filters.collections import * from ogd.common.connectors.StorageConnector import StorageConnector +from ogd.common.models.Event import Event +from ogd.common.models.FeatureData import FeatureData from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.VersionType import VersionType from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema @@ -47,6 +49,14 @@ def _availableDates(self, id_filter:IDFilterCollection, version_filter:Versionin def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: pass + @abc.abstractmethod + def _getEventCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: + pass + + @abc.abstractmethod + def _getFeatureCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: + pass + # *** BUILT-INS & PROPERTIES *** def __init__(self, schema:GameSourceSchema): @@ -122,7 +132,11 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val - # *** PROPERTIES *** + def GetEventCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: + return self._getEventCollection(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + + def GetFeatureCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: + return self._getFeatureCollection(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) # *** PRIVATE STATICS *** From 10697d90e9596bc0567b5bd70970be328e7ae289 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:26:50 -0600 Subject: [PATCH 018/124] Add table schema param to the GetXCollection functions, and default filters. --- src/ogd/common/connectors/interfaces/Interface.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index c4763df..745cab1 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -14,6 +14,8 @@ from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.VersionType import VersionType from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.schemas.tables.EventTableSchema import EventTableSchema +from ogd.common.schemas.tables.FeatureTableSchema import FeatureTableSchema from ogd.common.utils.SemanticVersion import SemanticVersion from ogd.common.utils.Logger import Logger @@ -50,11 +52,11 @@ def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, dat pass @abc.abstractmethod - def _getEventCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: + def _getEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: pass @abc.abstractmethod - def _getFeatureCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: + def _getFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: pass # *** BUILT-INS & PROPERTIES *** @@ -132,11 +134,11 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val - def GetEventCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: - return self._getEventCollection(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> List[Event]: + return self._getEventCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) - def GetFeatureCollection(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: - return self._getFeatureCollection(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> List[FeatureData]: + return self._getFeatureCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) # *** PRIVATE STATICS *** From 9f176d9889ebc1993704c0873243d9292040f1a9 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:27:27 -0600 Subject: [PATCH 019/124] Remove EventInterface class, everything will just inherit from interface. --- .../connectors/interfaces/EventInterface.py | 141 ------------------ 1 file changed, 141 deletions(-) delete mode 100644 src/ogd/common/connectors/interfaces/EventInterface.py diff --git a/src/ogd/common/connectors/interfaces/EventInterface.py b/src/ogd/common/connectors/interfaces/EventInterface.py deleted file mode 100644 index ef270b5..0000000 --- a/src/ogd/common/connectors/interfaces/EventInterface.py +++ /dev/null @@ -1,141 +0,0 @@ -## import standard libraries -import abc -import logging -from datetime import datetime -from pprint import pformat -from typing import Any, Dict, List, Tuple, Optional, Union - -# import local files -from ogd.common.interfaces.Interface import Interface -from ogd.common.models.Event import Event -from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.tables.TableSchema import TableSchema -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.utils.Logger import Logger - -class EventInterface(Interface): - - # *** ABSTRACTS *** - - @abc.abstractmethod - def _allIDs(self) -> List[str]: - pass - - @abc.abstractmethod - def _fullDateRange(self) -> Dict[str,datetime]: - pass - - @abc.abstractmethod - def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]] = None, exclude_rows:Optional[List[str]] = None) -> List[Tuple]: - """Function to retrieve all rows for a given set of Session or Player IDs, which can be converted to Event objects by a TableSchema - - :param id_list: List of IDs whose events should be retrieved from the database. These are session IDs if id_mode is SESSION, or user IDs if id_mode is USER. - :type id_list: List[str] - :param id_mode: The mode of ID to use for interpreting the id_list, defaults to IDMode.SESSION - :type id_mode: IDMode, optional - :param versions: List of log_versions to include in the query, any versions not in the list will be ignored. Defaults to None - :type versions: Optional[List[int]], optional - :param exclude_rows: List of event names to be excluded from the query, defaults to None - :type exclude_rows: Optional[List[str]], optional - :return: A list of raw results from the query. - :rtype: List[Tuple] - """ - pass - - @abc.abstractmethod - def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]] = None) -> List[str]: - pass - - @abc.abstractmethod - def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]] = None) -> Dict[str,datetime]: - pass - - # *** BUILT-INS & PROPERTIES *** - - def __init__(self, game_id:str, config:GameSourceSchema, fail_fast:bool): - super().__init__(config=config) - self._fail_fast = fail_fast - self._game_id : str = game_id - self._table_schema : TableSchema = TableSchema(schema_name=self._config.TableSchema) - - def __del__(self): - self.Close() - - # *** PUBLIC STATICS *** - - # *** PUBLIC METHODS *** - - def AllIDs(self) -> Optional[List[str]]: - ret_val = None - if self.IsOpen(): - ret_val = self._allIDs() - else: - Logger.Log("Can't retrieve list of all session IDs, the source interface is not open!", logging.WARNING, depth=3) - return ret_val - - def FullDateRange(self) -> Union[Dict[str,datetime], Dict[str,None]]: - ret_val = {'min':None, 'max':None} - if self.IsOpen(): - ret_val = self._fullDateRange() - else: - Logger.Log(f"Could not get full date range, the source interface is not open!", logging.WARNING, depth=3) - return ret_val - - def EventsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None, exclude_rows:Optional[List[str]]=None) -> Optional[List[Event]]: - ret_val = None - - _curr_sess : str = "" - _evt_sess_index : int = 1 - if self.IsOpen(): - Logger.Log(f"Retrieving rows from IDs with {id_mode.name} ID mode.", logging.DEBUG, depth=3) - _rows = self._rowsFromIDs(id_list=id_list, id_mode=id_mode, versions=versions, exclude_rows=exclude_rows) - _fallbacks = {"app_id":self._game_id} - ret_val = [] - for row in _rows: - try: - next_event = self._table_schema.RowToEvent(row=row, fallbacks=_fallbacks) - # in case event index was not given, we should fall back on using the order it came to us. - if next_event.SessionID != _curr_sess: - _curr_sess = next_event.SessionID - _evt_sess_index = 1 - next_event.FallbackDefaults(index=_evt_sess_index) - _evt_sess_index += 1 - except Exception as err: - if self._fail_fast: - Logger.Log(f"Error while converting row to Event\nFull error: {err}\nRow data: {pformat(row)}", logging.ERROR, depth=2) - raise err - else: - Logger.Log(f"Error while converting row ({row}) to Event. This row will be skipped.\nFull error: {err}", logging.WARNING, depth=2) - else: - ret_val.append(next_event) - else: - Logger.Log(f"Could not retrieve rows for {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) - return ret_val - - def IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> Optional[List[str]]: - ret_val = None - if not self.IsOpen(): - str_min, str_max = min.strftime("%Y%m%d"), max.strftime("%Y%m%d") - Logger.Log(f"Could not retrieve IDs for {str_min}-{str_max}, the source interface is not open!", logging.WARNING, depth=3) - else: - ret_val = self._IDsFromDates(min=min, max=max, versions=versions) - return ret_val - - def DatesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Union[Dict[str,datetime], Dict[str,None]]: - ret_val = {'min':None, 'max':None} - if not self.IsOpen(): - Logger.Log(f"Could not retrieve date range {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) - else: - Logger.Log(f"Retrieving date range from IDs with {id_mode.name} ID mode.", logging.DEBUG, depth=3) - ret_val = self._datesFromIDs(id_list=id_list, id_mode=id_mode, versions=versions) - return ret_val - - # *** PROPERTIES *** - - @property - def _TableSchema(self) -> TableSchema: - return self._table_schema - - # *** PRIVATE STATICS *** - - # *** PRIVATE METHODS *** From 8b9742f921da52698b361e915c19101d3f5e7031 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:35:38 -0600 Subject: [PATCH 020/124] Oops, AsDict wasn't marked as a property. --- .../common/connectors/filters/collections/FilterCollection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ogd/common/connectors/filters/collections/FilterCollection.py b/src/ogd/common/connectors/filters/collections/FilterCollection.py index 0d0ad39..cac156d 100644 --- a/src/ogd/common/connectors/filters/collections/FilterCollection.py +++ b/src/ogd/common/connectors/filters/collections/FilterCollection.py @@ -12,5 +12,6 @@ def _asDict(self): def __init__(self): pass + @property def AsDict(self) -> Dict[str, Filter]: return self._asDict() \ No newline at end of file From f88735f735ee40b0986a90b2bb0ec680e9d43118 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:37:06 -0600 Subject: [PATCH 021/124] Also, need to hint return type of _asDict. --- .../common/connectors/filters/collections/FilterCollection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/connectors/filters/collections/FilterCollection.py b/src/ogd/common/connectors/filters/collections/FilterCollection.py index cac156d..d962dc3 100644 --- a/src/ogd/common/connectors/filters/collections/FilterCollection.py +++ b/src/ogd/common/connectors/filters/collections/FilterCollection.py @@ -6,7 +6,7 @@ class FilterCollection: @abc.abstractmethod - def _asDict(self): + def _asDict(self) -> Dict[str, Filter]: pass def __init__(self): From 70fa0db97a153213f40c9ea8940d3dc8fd6ec16c Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:47:03 -0600 Subject: [PATCH 022/124] Add a class for keeping track of an event dataset. --- src/ogd/common/models/EventDataset.py | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/ogd/common/models/EventDataset.py diff --git a/src/ogd/common/models/EventDataset.py b/src/ogd/common/models/EventDataset.py new file mode 100644 index 0000000..435ea03 --- /dev/null +++ b/src/ogd/common/models/EventDataset.py @@ -0,0 +1,28 @@ +## import standard libraries +from typing import Dict, List +# import local files +from ogd.common.connectors.filters.collections import * +from ogd.common.connectors.filters.Filter import Filter +from ogd.common.models.Event import Event + +class EventDataset: + """Dumb struct that primarily just contains an ordered list of events. + It also contains information on any filters used to define the dataset, such as a date range or set of versions. + """ + + def __init__(self, events:List[Event], id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> None: + self._events = events + self._filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + + @property + def Events(self) -> List[Event]: + return self._events + + @property + def Filters(self) -> Dict[str, Filter]: + return self._filters + + @property + def AsMarkdown(self): + _filters_clause = "* ".join([f"{key} : {val}" for key,val in self._filters.items()]) + return f"## Event Dataset\n\n{_filters_clause}" \ No newline at end of file From 3908e3d06c5c2af3bc091a9408cd98eb8e869c37 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:51:18 -0600 Subject: [PATCH 023/124] Add a 'Dataset' class for features. --- src/ogd/common/models/FeatureDataset.py | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/ogd/common/models/FeatureDataset.py diff --git a/src/ogd/common/models/FeatureDataset.py b/src/ogd/common/models/FeatureDataset.py new file mode 100644 index 0000000..1f08d2a --- /dev/null +++ b/src/ogd/common/models/FeatureDataset.py @@ -0,0 +1,28 @@ +## import standard libraries +from typing import Dict, List +# import local files +from ogd.common.connectors.filters.collections import * +from ogd.common.connectors.filters.Filter import Filter +from ogd.common.models.FeatureData import FeatureData + +class FeatureDataset: + """Dumb struct that primarily just contains an ordered list of events. + It also contains information on any filters used to define the dataset, such as a date range or set of versions. + """ + + def __init__(self, features:List[FeatureData], id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> None: + self._features = features + self._filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + + @property + def Features(self) -> List[FeatureData]: + return self._features + + @property + def Filters(self) -> Dict[str, Filter]: + return self._filters + + @property + def AsMarkdown(self): + _filters_clause = "* ".join([f"{key} : {val}" for key,val in self._filters.items()]) + return f"## Feature Dataset\n\n{_filters_clause}" \ No newline at end of file From 6b9cbb1d143efb9adaeb8ef9cf767ff9ebd51566 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 21:59:29 -0600 Subject: [PATCH 024/124] Create placeholder FeatureTableSchema class. --- .../schemas/tables/FeatureTableSchema.py | 318 ++++++++++++++++++ 1 file changed, 318 insertions(+) create mode 100644 src/ogd/common/schemas/tables/FeatureTableSchema.py diff --git a/src/ogd/common/schemas/tables/FeatureTableSchema.py b/src/ogd/common/schemas/tables/FeatureTableSchema.py new file mode 100644 index 0000000..d1fde93 --- /dev/null +++ b/src/ogd/common/schemas/tables/FeatureTableSchema.py @@ -0,0 +1,318 @@ +"""EventTableSchema Module""" +# import standard libraries +import logging +import re +from datetime import datetime, timedelta, timezone +from json.decoder import JSONDecodeError +from pathlib import Path +from typing import Any, Dict, Tuple, Optional + +# import 3rd-party libraries +from dateutil import parser + +# import local files +from ogd.common import schemas +from ogd.common.schemas.tables.TableSchema import TableSchema +from ogd.common.models.Event import Event, EventSource +from ogd.common.utils import utils +from ogd.common.utils.Logger import Logger +from ogd.common.utils.typing import Map + +## @class TableSchema +class FeatureTableSchema(TableSchema): + """Dumb struct to hold useful info about the structure of feature data for a particular game in a particular database. + This includes the indices of several important database columns, the names + of the database columns, and a list of + IDs for the game sessions in the given requested date range. + + TODO : right now, this is all just a copy of what's in EventTableSchema, need to implement for feature data. + """ + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/"): + """Constructor for the TableSchema class. + Given a database connection and a game data request, + this retrieves a bit of information from the database to fill in the + class variables. + + :param schema_name: The filename for the table schema JSON. + :type schema_name: str + :param schema_path: Path to find the given table schema file, defaults to "./schemas/table_schemas/" + :type schema_path: str, optional + :param is_legacy: [description], defaults to False + :type is_legacy: bool, optional + """ + super().__init__(schema_name=schema_name, schema_path=schema_path) + + @property + def AsMarkdown(self) -> str: + ret_val = "\n\n".join([ + "## Database Columns", + "The individual columns recorded in the database for this game.", + "\n".join([item.AsMarkdown for item in self.Columns]), + "## Event Object Elements", + "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", + self._column_map.AsMarkdown, + ""]) + return ret_val + + @property + def SessionIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.SessionID, int): + ret_val = self.ColumnNames[self._column_map.SessionID] + elif isinstance(self._column_map.SessionID, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.SessionID]) + return ret_val + + @property + def AppIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.AppID, int): + ret_val = self.ColumnNames[self._column_map.AppID] + elif isinstance(self._column_map.AppID, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppID]) + return ret_val + + @property + def TimestampColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.Timestamp, int): + ret_val = self.ColumnNames[self._column_map.Timestamp] + elif isinstance(self._column_map.Timestamp, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.Timestamp]) + return ret_val + + @property + def EventNameColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventName, int): + ret_val = self.ColumnNames[self._column_map.EventName] + elif isinstance(self._column_map.EventName, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventName]) + return ret_val + + @property + def EventDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventData, int): + ret_val = self.ColumnNames[self._column_map.EventData] + elif isinstance(self._column_map.EventData, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventData]) + return ret_val + + @property + def EventSourceColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventSource, int): + ret_val = self.ColumnNames[self._column_map.EventSource] + elif isinstance(self._column_map.EventSource, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSource]) + return ret_val + + @property + def AppVersionColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.AppVersion, int): + ret_val = self.ColumnNames[self._column_map.AppVersion] + elif isinstance(self._column_map.AppVersion, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppVersion]) + return ret_val + + @property + def AppBranchColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.AppBranch, int): + ret_val = self.ColumnNames[self._column_map.AppBranch] + elif isinstance(self._column_map.AppBranch, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppBranch]) + return ret_val + + @property + def LogVersionColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.LogVersion, int): + ret_val = self.ColumnNames[self._column_map.LogVersion] + elif isinstance(self._column_map.LogVersion, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.LogVersion]) + return ret_val + + @property + def TimeOffsetColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.TimeOffset, int): + ret_val = self.ColumnNames[self._column_map.TimeOffset] + elif isinstance(self._column_map.TimeOffset, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.TimeOffset]) + return ret_val + + @property + def UserIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.UserID, int): + ret_val = self.ColumnNames[self._column_map.UserID] + elif isinstance(self._column_map.UserID, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserID]) + return ret_val + + @property + def UserDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.UserData, int): + ret_val = self.ColumnNames[self._column_map.UserData] + elif isinstance(self._column_map.UserData, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserData]) + return ret_val + + @property + def GameStateColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.GameState, int): + ret_val = self.ColumnNames[self._column_map.GameState] + elif isinstance(self._column_map.GameState, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.GameState]) + return ret_val + + @property + def EventSequenceIndexColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self._column_map.EventSequenceIndex, int): + ret_val = self.ColumnNames[self._column_map.EventSequenceIndex] + elif isinstance(self._column_map.EventSequenceIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSequenceIndex]) + return ret_val + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + _conversion_warnings = [] + def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): + """Function to convert a row to an Event, based on the loaded schema. + In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. + If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. + Finally, the concatenated values (or single value) are parsed according to the type required by Event. + One exception: For event_data, we expect to create a Dict object, so each column in the list will have its value parsed according to the type in 'columns', + and placed into a dict mapping the original column name to the parsed value (unless the parsed value is a dict, then it is merged into the top-level dict). + + :param row: The raw row data for an event. Generally assumed to be a tuple, though in principle a list would be fine too. + :type row: Tuple[str] + :param concatenator: A string to use as a separator when concatenating multiple columns into a single Event element. + :type concatenator: str + :return: [description] + :rtype: [type] + """ + # define vars to be passed as params + sess_id : str + app_id : str + tstamp : datetime + ename : str + edata : Map + app_ver : str + app_br : str + log_ver : str + offset : Optional[timezone] + uid : Optional[str] + udata : Optional[Map] + state : Optional[Map] + index : Optional[int] + + # 2) Handle event_data parameter, a special case. + # For this case we've got to parse the json, and then fold in whatever other columns were desired. + # 3) Assign vals to our arg vars and pass to Event ctor. + sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) + if not isinstance(sess_id, str): + if "sess_id" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("sess_id") + sess_id = str(sess_id) + + app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) + if not isinstance(app_id, str): + if "app_id" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("app_id") + app_id = str(app_id) + + tstamp = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) + if not isinstance(tstamp, datetime): + if "timestamp" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) + EventTableSchema._conversion_warnings.append("timestamp") + tstamp = TableSchema._convertDateTime(tstamp) + + ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) + if not isinstance(ename, str): + if "ename" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("ename") + ename = str(ename) + + datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) + + # TODO: go bac to isostring function; need 0-padding on ms first, though + edata = dict(sorted(datas.items())) # Sort keys alphabetically + + esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) + if not isinstance(esrc, EventSource): + if "esrc" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) + EventTableSchema._conversion_warnings.append("esrc") + esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME + + app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) + if not isinstance(app_ver, str): + if "app_ver" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("app_ver") + app_ver = str(app_ver) + + app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) + if not isinstance(app_br, str): + if "app_br" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("app_br") + app_br = str(app_br) + + log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) + if not isinstance(log_ver, str): + if "log_ver" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("log_ver") + log_ver = str(log_ver) + + offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) + if isinstance(offset, timedelta): + if "offset" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) + EventTableSchema._conversion_warnings.append("offset") + offset = timezone(offset) + + uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) + if uid is not None and not isinstance(uid, str): + if "uid" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings.append("uid") + uid = str(uid) + + udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) + + state = self._getValueFromRow(row=row, indices=self._column_map.GameState, concatenator=concatenator, fallback=fallbacks.get('game_state')) + + index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) + if index is not None and not isinstance(index, int): + if "index" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) + EventTableSchema._conversion_warnings.append("index") + index = int(index) + + return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, + event_name=ename, event_data=edata, event_source=esrc, + app_version=app_ver, app_branch=app_br, log_version=log_ver, + time_offset=offset, user_id=uid, user_data=udata, + game_state=state, event_sequence_index=index) + + # *** PRIVATE STATICS *** From 6cbd4f899f1d737e252e56a2fa0e883132054d7b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:03:25 -0600 Subject: [PATCH 025/124] Remove unused import. --- src/ogd/common/connectors/interfaces/Interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 745cab1..6dcb1b2 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -4,7 +4,7 @@ import abc import logging from datetime import datetime -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Union # import local files from ogd.common.connectors.filters.collections import * From 36164f692ca7b88549f8ba0eeca5e7a5a7acf6d5 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:04:22 -0600 Subject: [PATCH 026/124] Fix indentation in FeatureData. --- src/ogd/common/models/FeatureData.py | 94 ++++++++++++++-------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/ogd/common/models/FeatureData.py b/src/ogd/common/models/FeatureData.py index 6e9ffbc..f34bc04 100644 --- a/src/ogd/common/models/FeatureData.py +++ b/src/ogd/common/models/FeatureData.py @@ -3,52 +3,52 @@ from ogd.common.models.enums.ExtractionMode import ExtractionMode class FeatureData: - def __init__(self, name:str, feature_type:str, count_index:Optional[int], + def __init__(self, name:str, feature_type:str, count_index:Optional[int], cols:List[str], vals:List[Any], mode:ExtractionMode, player_id:Optional[str]=None, sess_id:Optional[str]=None): - self._name = name - self._feature_type = feature_type - self._count_index = count_index - self._cols = cols - self._vals = vals - self._mode = mode - self._player_id = player_id - self._sess_id = sess_id - - def __str__(self): - return f"Name: {self.Name}\tCount Index: {self.CountIndex}\nColumns: {self._cols}\t Values: {self._vals}\nMode: {self._mode.name}\tPlayer: {self.PlayerID}\tSession: {self.SessionID}" - - def __repr__(self): - return self.Name - - @property - def Name(self): - return self._name - - @property - def FeatureType(self): - return self._feature_type - - @property - def CountIndex(self): - return self._count_index - - @property - def FeatureNames(self) -> List[str]: - return self._cols - - @property - def FeatureValues(self) -> List[Any]: - return self._vals - - @property - def ExportMode(self): - return self._mode - - @property - def PlayerID(self) -> Optional[str]: - return self._player_id - - @property - def SessionID(self) -> Optional[str]: - return self._sess_id \ No newline at end of file + self._name = name + self._feature_type = feature_type + self._count_index = count_index + self._cols = cols + self._vals = vals + self._mode = mode + self._player_id = player_id + self._sess_id = sess_id + + def __str__(self): + return f"Name: {self.Name}\tCount Index: {self.CountIndex}\nColumns: {self._cols}\t Values: {self._vals}\nMode: {self._mode.name}\tPlayer: {self.PlayerID}\tSession: {self.SessionID}" + + def __repr__(self): + return self.Name + + @property + def Name(self): + return self._name + + @property + def FeatureType(self): + return self._feature_type + + @property + def CountIndex(self): + return self._count_index + + @property + def FeatureNames(self) -> List[str]: + return self._cols + + @property + def FeatureValues(self) -> List[Any]: + return self._vals + + @property + def ExportMode(self): + return self._mode + + @property + def PlayerID(self) -> Optional[str]: + return self._player_id + + @property + def SessionID(self) -> Optional[str]: + return self._sess_id \ No newline at end of file From 02f0099bf9c0d67d5fd1c35a1cc9985a40438b61 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:07:25 -0600 Subject: [PATCH 027/124] Update types of functions to return datasets. --- src/ogd/common/connectors/interfaces/Interface.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 6dcb1b2..1e4d4c3 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -10,7 +10,9 @@ from ogd.common.connectors.filters.collections import * from ogd.common.connectors.StorageConnector import StorageConnector from ogd.common.models.Event import Event +from ogd.common.models.EventDataset import EventDataset from ogd.common.models.FeatureData import FeatureData +from ogd.common.models.FeatureDataset import FeatureDataset from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.VersionType import VersionType from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema @@ -52,11 +54,11 @@ def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, dat pass @abc.abstractmethod - def _getEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: + def _getEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> EventDataset: pass @abc.abstractmethod - def _getFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: + def _getFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> FeatureDataset: pass # *** BUILT-INS & PROPERTIES *** @@ -134,10 +136,10 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val - def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> List[Event]: + def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: return self._getEventCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) - def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> List[FeatureData]: + def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: return self._getFeatureCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) # *** PRIVATE STATICS *** From 5c46e40d0ea4772e65e24d9aa6b6f1684b75d1d3 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:11:46 -0600 Subject: [PATCH 028/124] Actually, separate out some of the data retrieval and filter summarizing logic. --- src/ogd/common/connectors/interfaces/Interface.py | 12 ++++++++---- src/ogd/common/models/EventDataset.py | 4 ++-- src/ogd/common/models/FeatureDataset.py | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 1e4d4c3..054179a 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -54,11 +54,11 @@ def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, dat pass @abc.abstractmethod - def _getEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> EventDataset: + def _getEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: pass @abc.abstractmethod - def _getFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> FeatureDataset: + def _getFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: pass # *** BUILT-INS & PROPERTIES *** @@ -137,10 +137,14 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date return ret_val def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: - return self._getEventCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + _events = self._getEventCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + return EventDataset(events=_events, filters=_filters) def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: - return self._getFeatureCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict + _features = self._getFeatureCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + return FeatureDataset(features=_features, filters=_filters) # *** PRIVATE STATICS *** diff --git a/src/ogd/common/models/EventDataset.py b/src/ogd/common/models/EventDataset.py index 435ea03..d4a7359 100644 --- a/src/ogd/common/models/EventDataset.py +++ b/src/ogd/common/models/EventDataset.py @@ -10,9 +10,9 @@ class EventDataset: It also contains information on any filters used to define the dataset, such as a date range or set of versions. """ - def __init__(self, events:List[Event], id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> None: + def __init__(self, events:List[Event], filters:Dict[str, Filter]) -> None: self._events = events - self._filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + self._filters = filters @property def Events(self) -> List[Event]: diff --git a/src/ogd/common/models/FeatureDataset.py b/src/ogd/common/models/FeatureDataset.py index 1f08d2a..fe273f6 100644 --- a/src/ogd/common/models/FeatureDataset.py +++ b/src/ogd/common/models/FeatureDataset.py @@ -10,9 +10,9 @@ class FeatureDataset: It also contains information on any filters used to define the dataset, such as a date range or set of versions. """ - def __init__(self, features:List[FeatureData], id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> None: + def __init__(self, features:List[FeatureData], filters:Dict[str, Filter]) -> None: self._features = features - self._filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + self._filters = filters @property def Features(self) -> List[FeatureData]: From bd064bf6b84d14160250132862489f91b0ffed12 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:16:43 -0600 Subject: [PATCH 029/124] Fix some imports in MySQLInterface. --- src/ogd/common/connectors/interfaces/MySQLInterface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index 61826da..20e4075 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -6,10 +6,10 @@ from datetime import datetime from typing import Dict, Final, List, Tuple, Optional # import locals -from ogd.common.interfaces.EventInterface import EventInterface +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.schemas.configs.data_sources.MySQLSourceSchema import MySQLSchema +from ogd.common.schemas.storage.MySQLSourceSchema import MySQLSchema from ogd.common.utils.Logger import Logger @@ -218,7 +218,7 @@ def Query(cursor:cursor.MySQLCursor, query:str, params:Optional[Tuple], fetch_re Logger.Log(f"Query fetch completed, total query time: {time_delta} to get {len(result) if result is not None else 0:d} rows", logging.DEBUG) return result -class MySQLInterface(EventInterface): +class MySQLInterface(Interface): # *** BUILT-INS & PROPERTIES *** From dea4465f6215abd2d57904acdc6307b64ed627cd Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:23:10 -0600 Subject: [PATCH 030/124] Fix imports in other interfaces. --- .../common/connectors/interfaces/BQFirebaseInterface.py | 2 +- .../connectors/interfaces/BigQueryCodingInterface.py | 8 +++----- src/ogd/common/connectors/interfaces/BigQueryInterface.py | 4 ++-- src/ogd/common/connectors/interfaces/CSVInterface.py | 2 +- src/ogd/common/connectors/interfaces/CodingInterface.py | 6 +++--- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py index 4baa7ce..d1a81ef 100644 --- a/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py +++ b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py @@ -4,7 +4,7 @@ from datetime import datetime from typing import Dict, Final, List, Tuple, Optional # import locals -from ogd.common.interfaces.BigQueryInterface import BigQueryInterface +from ogd.common.connectors.interfaces.BigQueryInterface import BigQueryInterface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py index 746e70b..01dc4c3 100644 --- a/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py @@ -1,13 +1,11 @@ -import json import logging import os -from datetime import datetime from google.cloud import bigquery from typing import Dict, List, Tuple, Optional # import locals -from coding.Code import Code -from coding.Coder import Coder -from ogd.common.interfaces.CodingInterface import CodingInterface +from ogd.common.models.coding.Code import Code +from ogd.common.models.coding.Coder import Coder +from ogd.common.connectors.interfaces.CodingInterface import CodingInterface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/connectors/interfaces/BigQueryInterface.py b/src/ogd/common/connectors/interfaces/BigQueryInterface.py index 0558e61..b48010e 100644 --- a/src/ogd/common/connectors/interfaces/BigQueryInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryInterface.py @@ -6,10 +6,10 @@ from google.api_core.exceptions import BadRequest from typing import Dict, Final, List, Tuple, Optional # import locals -from ogd.common.interfaces.EventInterface import EventInterface +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.schemas.configs.data_sources.BigQuerySourceSchema import BigQuerySchema +from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema from ogd.common.utils.Logger import Logger AQUALAB_MIN_VERSION : Final[float] = 6.2 diff --git a/src/ogd/common/connectors/interfaces/CSVInterface.py b/src/ogd/common/connectors/interfaces/CSVInterface.py index 61a1bcc..e59dd60 100644 --- a/src/ogd/common/connectors/interfaces/CSVInterface.py +++ b/src/ogd/common/connectors/interfaces/CSVInterface.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Any, Dict, IO, List, Tuple, Optional ## import local files -from ogd.common.interfaces.EventInterface import EventInterface +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.tables.TableSchema import TableSchema diff --git a/src/ogd/common/connectors/interfaces/CodingInterface.py b/src/ogd/common/connectors/interfaces/CodingInterface.py index 91edea2..698d09c 100644 --- a/src/ogd/common/connectors/interfaces/CodingInterface.py +++ b/src/ogd/common/connectors/interfaces/CodingInterface.py @@ -4,9 +4,9 @@ from typing import Dict, List, Tuple, Optional # import local files -from coding.Code import Code -from coding.Coder import Coder -from ogd.common.interfaces.Interface import Interface +from ogd.common.models.coding.Code import Code +from ogd.common.models.coding.Coder import Coder +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.utils.Logger import Logger From b8465ad56bb10432b61d9a54b6c257305fbe737f Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:39:12 -0600 Subject: [PATCH 031/124] GameSourceSchema should explicitly store a game ID, just for simplicity. --- src/ogd/common/schemas/configs/GameSourceSchema.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index aa9d0f1..0aec51a 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -27,15 +27,27 @@ class GameSourceSchema(Schema): :param Schema: _description_ :type Schema: _type_ """ - def __init__(self, name:str, source_name:str, source_schema:Optional[DataSourceSchema], + def __init__(self, name:str, game_id:Optional[str], + source_name:str, source_schema:Optional[DataSourceSchema], db_name:str, table_name:str, table_schema:str, other_elements:Dict[str, Any]): + self._game_id : str self._source_name : str = source_name self._source_schema : Optional[DataSourceSchema] = source_schema self._db_name : str = db_name self._table_name : str = table_name self._table_schema : str = table_schema + super().__init__(name=name, other_elements=other_elements) + if game_id is not None: + self._game_id = game_id + else: + Logger.Log(f"GameSourceSchema did not receive a game_id, defaulting to {name}") + self._game_id = name + + @property + def GameID(self) -> str: + return self._game_id @property def SourceName(self) -> str: From 486f45c364cb611f6033a7aa6a621d6f50a4a5ba Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 22:57:26 -0600 Subject: [PATCH 032/124] Bring back some logic from original interface implementation for converting row data to events. --- .../common/connectors/interfaces/Interface.py | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 054179a..c41e291 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -4,7 +4,8 @@ import abc import logging from datetime import datetime -from typing import Dict, List, Optional, Union +from pprint import pformat +from typing import Dict, List, Optional, Tuple, Union # import local files from ogd.common.connectors.filters.collections import * @@ -54,17 +55,18 @@ def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, dat pass @abc.abstractmethod - def _getEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Event]: + def _getEventRows(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: pass @abc.abstractmethod - def _getFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[FeatureData]: + def _getFeatureRows(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[Tuple]: pass # *** BUILT-INS & PROPERTIES *** - def __init__(self, schema:GameSourceSchema): + def __init__(self, schema:GameSourceSchema, fail_fast:bool): super().__init__(schema=schema) + self._fail_fast = fail_fast def __del__(self): self.Close() @@ -138,14 +140,46 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict - _events = self._getEventCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + if self.IsOpen: + # _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving event data from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + _rows = self._getEventRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + _events = self._eventsFromRows(rows=_rows, schema=schema) + else: + Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) + _events = [schema.RowToEvent(row) for row in _rows] return EventDataset(events=_events, filters=_filters) def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict - _features = self._getFeatureCollection(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + _features = self._getFeatureRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) return FeatureDataset(features=_features, filters=_filters) # *** PRIVATE STATICS *** # *** PRIVATE METHODS *** + def _eventsFromRows(self, rows:List[Tuple], schema:EventTableSchema) -> List[Event]: + ret_val = [] + + _curr_sess : str = "" + _evt_sess_index : int = 1 + _fallbacks = {"app_id":self._source_schema.GameID} + for row in rows: + try: + event = schema.RowToEvent(row) + # in case event index was not given, we should fall back on using the order it came to us. + if event.SessionID != _curr_sess: + _curr_sess = event.SessionID + _evt_sess_index = 1 + event.FallbackDefaults(index=_evt_sess_index) + _evt_sess_index += 1 + except Exception as err: + if self._fail_fast: + Logger.Log(f"Error while converting row to Event\nFull error: {err}\nRow data: {pformat(row)}", logging.ERROR, depth=2) + raise err + else: + Logger.Log(f"Error while converting row ({row}) to Event. This row will be skipped.\nFull error: {err}", logging.WARNING, depth=2) + else: + ret_val.append(event) + return ret_val From dc835d3b89a45f7501612594c1405b1129edd4ff Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 23:15:56 -0600 Subject: [PATCH 033/124] Add placeholder logic for equivalent conversion stuff for features. --- .../common/connectors/interfaces/Interface.py | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index c41e291..73c4d04 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -140,6 +140,7 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + _events = [] if self.IsOpen: # _date_clause = f" on date(s) {date_filter}" _msg = f"Retrieving event data from {self.ResourceName}." @@ -147,18 +148,26 @@ def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollecti _rows = self._getEventRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) _events = self._eventsFromRows(rows=_rows, schema=schema) else: - Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) - _events = [schema.RowToEvent(row) for row in _rows] + Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return EventDataset(events=_events, filters=_filters) def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict - _features = self._getFeatureRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + _features = [] + if self.IsOpen: + # _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving event data from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + _rows = self._getFeatureRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + _features = self._featuresFromRows(rows=_rows, schema=schema) + else: + Logger.Log(f"Could not retrieve feature data from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return FeatureDataset(features=_features, filters=_filters) # *** PRIVATE STATICS *** # *** PRIVATE METHODS *** + def _eventsFromRows(self, rows:List[Tuple], schema:EventTableSchema) -> List[Event]: ret_val = [] @@ -183,3 +192,17 @@ def _eventsFromRows(self, rows:List[Tuple], schema:EventTableSchema) -> List[Eve else: ret_val.append(event) return ret_val + + def _featuresFromRows(self, rows:List[Tuple], schema:FeatureTableSchema) -> List[FeatureData]: + """_summary_ + + TODO :implement + + :param rows: _description_ + :type rows: List[Tuple] + :param schema: _description_ + :type schema: FeatureTableSchema + :return: _description_ + :rtype: List[FeatureData] + """ + return [] From 7a2e081dca6063d196fad896ff1427b25f1bea8a Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 23:18:27 -0600 Subject: [PATCH 034/124] Add prop for the game source schema to storage connector. --- src/ogd/common/connectors/StorageConnector.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ogd/common/connectors/StorageConnector.py b/src/ogd/common/connectors/StorageConnector.py index 3de2be3..4246031 100644 --- a/src/ogd/common/connectors/StorageConnector.py +++ b/src/ogd/common/connectors/StorageConnector.py @@ -58,6 +58,10 @@ def IsOpen(self) -> bool: def ResourceName(self) -> str: return self._source_schema.Name + @property + def GameSourceSchema(self) -> GameSourceSchema: + return self._source_schema + # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** From 5c8f3b45a4fc1035f1be055fb8ceacb4cc82a4f7 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 23:21:14 -0600 Subject: [PATCH 035/124] Move super init to end of init function. --- src/ogd/common/connectors/interfaces/Interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 73c4d04..da00c89 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -65,8 +65,8 @@ def _getFeatureRows(self, schema:FeatureTableSchema, id_filter:IDFilterCollectio # *** BUILT-INS & PROPERTIES *** def __init__(self, schema:GameSourceSchema, fail_fast:bool): - super().__init__(schema=schema) self._fail_fast = fail_fast + super().__init__(schema=schema) def __del__(self): self.Close() From 08e94b5496bfae65863fe6a215d76c4466db5a24 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 23:21:46 -0600 Subject: [PATCH 036/124] Move super init to end of init function for GameSourceSchema as well. --- src/ogd/common/schemas/configs/GameSourceSchema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index 0aec51a..18d9383 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -38,12 +38,12 @@ def __init__(self, name:str, game_id:Optional[str], self._table_name : str = table_name self._table_schema : str = table_schema - super().__init__(name=name, other_elements=other_elements) if game_id is not None: self._game_id = game_id else: Logger.Log(f"GameSourceSchema did not receive a game_id, defaulting to {name}") self._game_id = name + super().__init__(name=name, other_elements=other_elements) @property def GameID(self) -> str: From 153cf66e1d5df0e7d3fc89784fe830a4fe04dc52 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 25 Nov 2024 23:31:50 -0600 Subject: [PATCH 037/124] Switch logic to rely on TableSchema from the GameSourceSchema. --- .../common/connectors/interfaces/Interface.py | 74 +++++++++++-------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index da00c89..80ba710 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -55,11 +55,11 @@ def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, dat pass @abc.abstractmethod - def _getEventRows(self, schema:EventTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: + def _getEventRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: pass @abc.abstractmethod - def _getFeatureRows(self, schema:FeatureTableSchema, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[Tuple]: + def _getFeatureRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[Tuple]: pass # *** BUILT-INS & PROPERTIES *** @@ -138,28 +138,34 @@ def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return ret_val - def GetEventCollection(self, schema:EventTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: + def GetEventCollection(self, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict _events = [] if self.IsOpen: - # _date_clause = f" on date(s) {date_filter}" - _msg = f"Retrieving event data from {self.ResourceName}." - Logger.Log(_msg, logging.INFO, depth=3) - _rows = self._getEventRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) - _events = self._eventsFromRows(rows=_rows, schema=schema) + if isinstance(self.GameSourceSchema.TableSchema, EventTableSchema): + # _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving event data from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + _rows = self._getEventRows(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + _events = self._eventsFromRows(rows=_rows) + else: + Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the given table schema was not for event data!", logging.WARNING, depth=3) else: Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return EventDataset(events=_events, filters=_filters) - def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: + def GetFeatureCollection(self, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict _features = [] if self.IsOpen: - # _date_clause = f" on date(s) {date_filter}" - _msg = f"Retrieving event data from {self.ResourceName}." - Logger.Log(_msg, logging.INFO, depth=3) - _rows = self._getFeatureRows(schema=schema, id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) - _features = self._featuresFromRows(rows=_rows, schema=schema) + if isinstance(self.GameSourceSchema.TableSchema, EventTableSchema): + # _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving event data from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + _rows = self._getFeatureRows(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + _features = self._featuresFromRows(rows=_rows) + else: + Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the given table schema was not for event data!", logging.WARNING, depth=3) else: Logger.Log(f"Could not retrieve feature data from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) return FeatureDataset(features=_features, filters=_filters) @@ -168,32 +174,36 @@ def GetFeatureCollection(self, schema:FeatureTableSchema, id_filter:IDFilterColl # *** PRIVATE METHODS *** - def _eventsFromRows(self, rows:List[Tuple], schema:EventTableSchema) -> List[Event]: + def _eventsFromRows(self, rows:List[Tuple]) -> List[Event]: ret_val = [] _curr_sess : str = "" _evt_sess_index : int = 1 _fallbacks = {"app_id":self._source_schema.GameID} - for row in rows: - try: - event = schema.RowToEvent(row) - # in case event index was not given, we should fall back on using the order it came to us. - if event.SessionID != _curr_sess: - _curr_sess = event.SessionID - _evt_sess_index = 1 - event.FallbackDefaults(index=_evt_sess_index) - _evt_sess_index += 1 - except Exception as err: - if self._fail_fast: - Logger.Log(f"Error while converting row to Event\nFull error: {err}\nRow data: {pformat(row)}", logging.ERROR, depth=2) - raise err + _table_schema = self.GameSourceSchema.TableSchema + if isinstance(_table_schema, EventTableSchema): + for row in rows: + try: + event = _table_schema.RowToEvent(row) + # in case event index was not given, we should fall back on using the order it came to us. + if event.SessionID != _curr_sess: + _curr_sess = event.SessionID + _evt_sess_index = 1 + event.FallbackDefaults(index=_evt_sess_index) + _evt_sess_index += 1 + except Exception as err: + if self._fail_fast: + Logger.Log(f"Error while converting row to Event\nFull error: {err}\nRow data: {pformat(row)}", logging.ERROR, depth=2) + raise err + else: + Logger.Log(f"Error while converting row ({row}) to Event. This row will be skipped.\nFull error: {err}", logging.WARNING, depth=2) else: - Logger.Log(f"Error while converting row ({row}) to Event. This row will be skipped.\nFull error: {err}", logging.WARNING, depth=2) - else: - ret_val.append(event) + ret_val.append(event) + else: + Logger.Log(f"Could not convert row data to Events, the given table schema was not for event data!", logging.WARNING, depth=3) return ret_val - def _featuresFromRows(self, rows:List[Tuple], schema:FeatureTableSchema) -> List[FeatureData]: + def _featuresFromRows(self, rows:List[Tuple]) -> List[FeatureData]: """_summary_ TODO :implement From e4864b763dbb621ec784e0290dd152e8b7d8606f Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 10:47:11 -0600 Subject: [PATCH 038/124] Various small adjustments to sort out the split. --- src/ogd/common/schemas/tables/TableSchema.py | 31 ++++++-------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 1dd30e1..fb42b97 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -19,12 +19,11 @@ from ogd.common.utils.typing import Map ## @class TableSchema -# Dumb struct to hold useful info about the structure of database data -# for a particular game. -# This includes the indices of several important database columns, the names -# of the database columns, the max and min levels in the game, and a list of -# IDs for the game sessions in the given requested date range. class TableSchema(Schema): + """Dumb struct to hold info about the structure of data for a particular game, from a particular source. + In particular, it contains an ordered list of columns in the data source table, + and a mapping of those columns to the corresponding elements of a formal OGD structure. + """ # *** BUILT-INS & PROPERTIES *** @@ -44,7 +43,7 @@ class variables. # declare and initialize vars # self._schema : Optional[Dict[str, Any]] = all_elements self._column_map : ColumnMapSchema = column_map - self._columns : List[ColumnSchema] = columns + self._table_columns : List[ColumnSchema] = columns # after loading the file, take the stuff we need and store. super().__init__(name=name, other_elements={}) @@ -56,26 +55,14 @@ def ColumnNames(self) -> List[str]: :return: Names of each column in the schema. :rtype: List[str] """ - return [col.Name for col in self._columns] + return [col.Name for col in self._table_columns] @property def Columns(self) -> List[ColumnSchema]: - return self._columns + return self._table_columns # *** IMPLEMENT ABSTRACT FUNCTIONS *** - @property - def AsMarkdown(self) -> str: - ret_val = "\n\n".join([ - "## Database Columns", - "The individual columns recorded in the database for this game.", - "\n".join([item.AsMarkdown for item in self.Columns]), - "## Event Object Elements", - "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", - self._column_map.AsMarkdown, - ""]) - return ret_val - @classmethod def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": _column_map : ColumnMapSchema @@ -95,7 +82,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging # *** PUBLIC STATICS *** - @staticmethod + @classmethod def FromFile(schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/") -> "TableSchema": _table_format_name : str = schema_name @@ -236,7 +223,7 @@ def _getValueFromRow(self, row:Tuple, indices:Union[int, List[int], Dict[str, in for key,column_index in indices.items(): if column_index > len(row): Logger.Log(f"Got column index of {column_index} for column {key}, but row only has {len(row)} columns!", logging.ERROR) - _val = TableSchema._parse(input=row[column_index], col_schema=self._columns[column_index]) + _val = TableSchema._parse(input=row[column_index], col_schema=self._table_columns[column_index]) ret_val.update(_val if isinstance(_val, dict) else {key:_val}) else: ret_val = fallback From 693265fe3e477ac332b1e2329ddbccc89445c3c4 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 11:02:26 -0600 Subject: [PATCH 039/124] Better handling of types in _parse function. --- src/ogd/common/schemas/tables/TableSchema.py | 93 ++++++++++---------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index fb42b97..f54cfc8 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -63,22 +63,22 @@ def Columns(self) -> List[ColumnSchema]: # *** IMPLEMENT ABSTRACT FUNCTIONS *** - @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": - _column_map : ColumnMapSchema - _column_schemas : List[ColumnSchema] - - if not isinstance(all_elements, dict): - all_elements = {} - _msg = f"For {name} Table Schema, all_elements was not a dict, defaulting to empty dict" - if logger: - logger.warning(_msg) - else: - Logger.Log(_msg, logging.WARN) - _column_json_list = all_elements.get('columns', []) - _column_schemas = [ColumnSchema.FromDict(name=column.get("name", "UNKNOWN COLUMN NAME"), all_elements=column) for column in _column_json_list] - _column_map = ColumnMapSchema.FromDict(name="Column Map", all_elements=all_elements.get('column_map', {}), column_names=[col.Name for col in _column_schemas]) - return TableSchema(name=name, column_map=_column_map, columns=_column_schemas) + # @classmethod + # def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": + # _column_map : ColumnMapSchema + # _column_schemas : List[ColumnSchema] + + # if not isinstance(all_elements, dict): + # all_elements = {} + # _msg = f"For {name} Table Schema, all_elements was not a dict, defaulting to empty dict" + # if logger: + # logger.warning(_msg) + # else: + # Logger.Log(_msg, logging.WARN) + # _column_json_list = all_elements.get('columns', []) + # _column_schemas = [ColumnSchema.FromDict(name=column.get("name", "UNKNOWN COLUMN NAME"), all_elements=column) for column in _column_json_list] + # _column_map = ColumnMapSchema.FromDict(name="Column Map", all_elements=all_elements.get('column_map', {}), column_names=[col.Name for col in _column_schemas]) + # return TableSchema(name=name, column_map=_column_map, columns=_column_schemas) # *** PUBLIC STATICS *** @@ -111,36 +111,39 @@ def _parse(input:Any, col_schema:ColumnSchema) -> Any: return None if input == "None" or input == "null" or input == "nan": return None - elif col_schema.ValueType == 'str': - return str(input) - elif col_schema.ValueType == 'int': - return int(input) - elif col_schema.ValueType == 'float': - return float(input) - elif col_schema.ValueType == 'datetime': - return input if isinstance(input, datetime) else TableSchema._convertDateTime(str(input)) - elif col_schema.ValueType == 'timedelta': - return input if isinstance(input, timedelta) else TableSchema._convertTimedelta(str(input)) - elif col_schema.ValueType == 'timezone': - return input if isinstance(input, timezone) else TableSchema._convertTimezone(str(input)) - elif col_schema.ValueType == 'json': - try: - if isinstance(input, dict): - # if input was a dict already, then just give it back. Else, try to load it from string. - return input - elif isinstance(input, str): - if input != 'None' and input != '': # watch out for nasty corner cases. - return json.loads(input) + match col_schema.ValueType.upper(): + case 'STR': + return str(input) + case 'INT': + return int(input) + case 'FLOAT': + return float(input) + case 'DATETIME': + return input if isinstance(input, datetime) else TableSchema._convertDateTime(str(input)) + case 'TIMEDELTA': + return input if isinstance(input, timedelta) else TableSchema._convertTimedelta(str(input)) + case 'TIMEZONE': + return input if isinstance(input, timezone) else TableSchema._convertTimezone(str(input)) + case 'JSON': + try: + if isinstance(input, dict): + # if input was a dict already, then just give it back. Else, try to load it from string. + return input + elif isinstance(input, str): + if input != 'None' and input != '': # watch out for nasty corner cases. + return json.loads(input) + else: + return None else: - return None - else: - return json.loads(str(input)) - except JSONDecodeError as err: - Logger.Log(f"Could not parse input '{input}' of type {type(input)} from column {col_schema.Name}, got the following error:\n{str(err)}", logging.WARN) - return {} - elif col_schema.ValueType.startswith('enum'): - # if the column is supposed to be an enum, for now we just stick with the string. - return str(input) + return json.loads(str(input)) + except JSONDecodeError as err: + Logger.Log(f"Could not parse input '{input}' of type {type(input)} from column {col_schema.Name}, got the following error:\n{str(err)}", logging.WARN) + return {} + case _dummy if _dummy.startswith('ENUM'): + # if the column is supposed to be an enum, for now we just stick with the string. + return str(input) + case _: + Logger.Log(f"_parse function got an unrecognized column type {col_schema.ValueType}, could not parse!", logging.WARNING) @staticmethod def _convertDateTime(time_str:str) -> datetime: From 72e44097985714629ade2648039549ee85e08297 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 11:06:07 -0600 Subject: [PATCH 040/124] Add logic to handle additional time formatting. --- src/ogd/common/schemas/tables/TableSchema.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index f54cfc8..1b20ad1 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -150,20 +150,26 @@ def _convertDateTime(time_str:str) -> datetime: ret_val : datetime if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - raise ValueError(f"Got a non-timestamp value of {time_str} when converting a datetime column of an Event!") + raise ValueError(f"Got a non-timestamp value of {time_str} when converting a datetime column from data source!") formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S.%f"] - # for fmt in formats: try: ret_val = parser.isoparse(time_str) - # ret_val = datetime.strptime(time_str, fmt) - except ValueError as err: - Logger.Log(f"Could not parse time string '{time_str}', got error {err}") - raise err + except ValueError: + # Logger.Log(f"Could not parse time string '{time_str}', got error {err}") + # raise err + pass else: return ret_val - # raise ValueError(f"Could not parse timestamp {time_str}, it did not match any expected formats.") + for fmt in formats: + try: + ret_val = datetime.strptime(time_str, fmt) + except ValueError: + pass + else: + return ret_val + raise ValueError(f"Could not parse timestamp {time_str}, it did not match any expected formats!") @staticmethod def _convertTimedelta(time_str:str) -> Optional[timedelta]: From a64880eafce4993e40b4153262bc1a34890b9ce1 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 11:07:52 -0600 Subject: [PATCH 041/124] Nicer type-hint for indices param. --- src/ogd/common/schemas/tables/TableSchema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 1b20ad1..a1e7c5d 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -218,7 +218,7 @@ def _convertTimezone(time_str:str) -> Optional[timezone]: # *** PRIVATE METHODS *** - def _getValueFromRow(self, row:Tuple, indices:Union[int, List[int], Dict[str, int], None], concatenator:str, fallback:Any) -> Any: + def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[str, int]], concatenator:str, fallback:Any) -> Any: ret_val : Any if indices is not None: if isinstance(indices, int): From b5cdd66020a8aa313bd621c63129eba04511e957 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 11:15:54 -0600 Subject: [PATCH 042/124] Add an enum to distinguish between event and feature tables. --- src/ogd/common/models/enums/TableType.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/ogd/common/models/enums/TableType.py diff --git a/src/ogd/common/models/enums/TableType.py b/src/ogd/common/models/enums/TableType.py new file mode 100644 index 0000000..3022484 --- /dev/null +++ b/src/ogd/common/models/enums/TableType.py @@ -0,0 +1,19 @@ +"""TableType Module +""" + +# import standard libraries +from enum import IntEnum + +class TableType(IntEnum): + """Enum representing the different kinds of data table from which data can be retrieved + + Namely: + + * Events + * Features + """ + EVENT = 1 + FEATURE = 2 + + def __str__(self): + return self.name From e01c6882c5103808f851b3672d5b55053a9bd918 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 11:40:43 -0600 Subject: [PATCH 043/124] Add a FromString to the TableType. --- src/ogd/common/models/enums/TableType.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/ogd/common/models/enums/TableType.py b/src/ogd/common/models/enums/TableType.py index 3022484..b0a9d6a 100644 --- a/src/ogd/common/models/enums/TableType.py +++ b/src/ogd/common/models/enums/TableType.py @@ -3,6 +3,7 @@ # import standard libraries from enum import IntEnum +from typing import Self class TableType(IntEnum): """Enum representing the different kinds of data table from which data can be retrieved @@ -17,3 +18,13 @@ class TableType(IntEnum): def __str__(self): return self.name + + @classmethod + def FromString(cls, string:str) -> "TableType": + match string.upper(): + case "EVENT": + return cls.EVENT + case "FEATURE": + return cls.FEATURE + case _: + raise ValueError(f"Unrecognized table type {string}!") From 81b074337800f66f08a61b29a8ed4fa273cdccfe Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 19:12:07 -0600 Subject: [PATCH 044/124] Think I've got a workable solution to splitting classes, now. --- src/ogd/common/schemas/tables/TableSchema.py | 64 ++++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index a1e7c5d..d77cf4c 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -1,16 +1,18 @@ ## import standard libraries +import abc import json import logging import re from datetime import datetime, timedelta, timezone from json.decoder import JSONDecodeError from pathlib import Path -from typing import Any, Dict, Final, List, Tuple, Optional, Union +from typing import Any, Dict, List, Tuple, Optional, TypeAlias ## import 3rd-party libraries from dateutil import parser ## import local files from ogd.common import schemas from ogd.common.models.Event import Event, EventSource +from ogd.common.models.enums.TableType import TableType from ogd.common.schemas.Schema import Schema from ogd.common.schemas.tables.ColumnMapSchema import ColumnMapSchema from ogd.common.schemas.tables.ColumnSchema import ColumnSchema @@ -18,6 +20,9 @@ from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map +ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] +ColumnMapElement : TypeAlias = Optional[str | List[str] | Dict[str,str]] + ## @class TableSchema class TableSchema(Schema): """Dumb struct to hold info about the structure of data for a particular game, from a particular source. @@ -25,9 +30,14 @@ class TableSchema(Schema): and a mapping of those columns to the corresponding elements of a formal OGD structure. """ + @abc.abstractmethod + @classmethod + def _fromDict(cls, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": + pass + # *** BUILT-INS & PROPERTIES *** - def __init__(self, name, column_map:ColumnMapSchema, columns:List[ColumnSchema]): + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema]): """Constructor for the TableSchema class. Given a database connection and a game data request, this retrieves a bit of information from the database to fill in the @@ -42,12 +52,17 @@ class variables. """ # declare and initialize vars # self._schema : Optional[Dict[str, Any]] = all_elements - self._column_map : ColumnMapSchema = column_map - self._table_columns : List[ColumnSchema] = columns + self._table_type : TableType = table_type + self._column_map : Dict[str, ColumnMapIndex] = column_map + self._table_columns : List[ColumnSchema] = columns # after loading the file, take the stuff we need and store. super().__init__(name=name, other_elements={}) + @property + def Columns(self) -> List[ColumnSchema]: + return self._table_columns + @property def ColumnNames(self) -> List[str]: """Function to get the names of all columns in the schema. @@ -57,40 +72,37 @@ def ColumnNames(self) -> List[str]: """ return [col.Name for col in self._table_columns] - @property - def Columns(self) -> List[ColumnSchema]: - return self._table_columns - # *** IMPLEMENT ABSTRACT FUNCTIONS *** - # @classmethod - # def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": - # _column_map : ColumnMapSchema - # _column_schemas : List[ColumnSchema] - - # if not isinstance(all_elements, dict): - # all_elements = {} - # _msg = f"For {name} Table Schema, all_elements was not a dict, defaulting to empty dict" - # if logger: - # logger.warning(_msg) - # else: - # Logger.Log(_msg, logging.WARN) - # _column_json_list = all_elements.get('columns', []) - # _column_schemas = [ColumnSchema.FromDict(name=column.get("name", "UNKNOWN COLUMN NAME"), all_elements=column) for column in _column_json_list] - # _column_map = ColumnMapSchema.FromDict(name="Column Map", all_elements=all_elements.get('column_map', {}), column_names=[col.Name for col in _column_schemas]) - # return TableSchema(name=name, column_map=_column_map, columns=_column_schemas) + @classmethod + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": + _column_schemas : List[ColumnSchema] + _table_type : TableType + + if not isinstance(all_elements, dict): + all_elements = {} + _msg = f"For {name} Table Schema, all_elements was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + _table_type_str = all_elements.get('table_type') + _table_type = TableType.FromString(_table_type_str) if _table_type_str is not None else TableType.EVENT + _column_json_list = all_elements.get('columns', []) + _column_schemas = [ColumnSchema.FromDict(name=column.get("name", "UNKNOWN COLUMN NAME"), all_elements=column) for column in _column_json_list] + return cls._fromDict(table_type=_table_type, raw_map=all_elements.get('column_map', {}), column_schemas=_column_schemas) # *** PUBLIC STATICS *** @classmethod - def FromFile(schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/") -> "TableSchema": + def FromFile(cls, schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/") -> "TableSchema": _table_format_name : str = schema_name if not _table_format_name.lower().endswith(".json"): _table_format_name += ".json" _schema = utils.loadJSONFile(filename=_table_format_name, path=schema_path) - return TableSchema.FromDict(name=schema_name, all_elements=_schema) + return cls.FromDict(name=schema_name, all_elements=_schema) # *** PUBLIC METHODS *** From 56b61a887040a879a1c465b6447e1a00b2c0c723 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 19:12:30 -0600 Subject: [PATCH 045/124] Tweak some imports and type hints. --- .../schemas/tables/FeatureTableSchema.py | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/src/ogd/common/schemas/tables/FeatureTableSchema.py b/src/ogd/common/schemas/tables/FeatureTableSchema.py index d1fde93..47d9c7f 100644 --- a/src/ogd/common/schemas/tables/FeatureTableSchema.py +++ b/src/ogd/common/schemas/tables/FeatureTableSchema.py @@ -13,7 +13,9 @@ # import local files from ogd.common import schemas from ogd.common.schemas.tables.TableSchema import TableSchema +from ogd.common.models.FeatureData import FeatureData from ogd.common.models.Event import Event, EventSource +from ogd.common.utils.typing import Map from ogd.common.utils import utils from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map @@ -51,8 +53,8 @@ def AsMarkdown(self) -> str: "## Database Columns", "The individual columns recorded in the database for this game.", "\n".join([item.AsMarkdown for item in self.Columns]), - "## Event Object Elements", - "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", + "## Feature Object Elements", + "The elements (member variables) of each Feature object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", self._column_map.AsMarkdown, ""]) return ret_val @@ -190,7 +192,7 @@ def EventSequenceIndexColumn(self) -> Optional[str]: # *** PUBLIC METHODS *** _conversion_warnings = [] - def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): + def RowToFeatureData(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}) -> FeatureData: """Function to convert a row to an Event, based on the loaded schema. In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. @@ -225,30 +227,30 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): # 3) Assign vals to our arg vars and pass to Event ctor. sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) if not isinstance(sess_id, str): - if "sess_id" not in EventTableSchema._conversion_warnings: + if "sess_id" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("sess_id") + FeatureTableSchema._conversion_warnings.append("sess_id") sess_id = str(sess_id) app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) if not isinstance(app_id, str): - if "app_id" not in EventTableSchema._conversion_warnings: + if "app_id" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("app_id") + FeatureTableSchema._conversion_warnings.append("app_id") app_id = str(app_id) tstamp = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) if not isinstance(tstamp, datetime): - if "timestamp" not in EventTableSchema._conversion_warnings: + if "timestamp" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) - EventTableSchema._conversion_warnings.append("timestamp") + FeatureTableSchema._conversion_warnings.append("timestamp") tstamp = TableSchema._convertDateTime(tstamp) ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) if not isinstance(ename, str): - if "ename" not in EventTableSchema._conversion_warnings: + if "ename" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("ename") + FeatureTableSchema._conversion_warnings.append("ename") ename = str(ename) datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) @@ -258,44 +260,44 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) if not isinstance(esrc, EventSource): - if "esrc" not in EventTableSchema._conversion_warnings: + if "esrc" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) - EventTableSchema._conversion_warnings.append("esrc") + FeatureTableSchema._conversion_warnings.append("esrc") esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) if not isinstance(app_ver, str): - if "app_ver" not in EventTableSchema._conversion_warnings: + if "app_ver" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("app_ver") + FeatureTableSchema._conversion_warnings.append("app_ver") app_ver = str(app_ver) app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) if not isinstance(app_br, str): - if "app_br" not in EventTableSchema._conversion_warnings: + if "app_br" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("app_br") + FeatureTableSchema._conversion_warnings.append("app_br") app_br = str(app_br) log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) if not isinstance(log_ver, str): - if "log_ver" not in EventTableSchema._conversion_warnings: + if "log_ver" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("log_ver") + FeatureTableSchema._conversion_warnings.append("log_ver") log_ver = str(log_ver) offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) if isinstance(offset, timedelta): - if "offset" not in EventTableSchema._conversion_warnings: + if "offset" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) - EventTableSchema._conversion_warnings.append("offset") + FeatureTableSchema._conversion_warnings.append("offset") offset = timezone(offset) uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) if uid is not None and not isinstance(uid, str): - if "uid" not in EventTableSchema._conversion_warnings: + if "uid" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("uid") + FeatureTableSchema._conversion_warnings.append("uid") uid = str(uid) udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) @@ -304,9 +306,9 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) if index is not None and not isinstance(index, int): - if "index" not in EventTableSchema._conversion_warnings: + if "index" not in FeatureTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) - EventTableSchema._conversion_warnings.append("index") + FeatureTableSchema._conversion_warnings.append("index") index = int(index) return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, From fd3f41baea06a3903d1646e0612d0ec7ca565c89 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 21:06:28 -0600 Subject: [PATCH 046/124] Move a bunch of the conversion code out to a class in utils, clearer separation of logic that way. --- src/ogd/common/schemas/tables/TableSchema.py | 130 +----------------- src/ogd/common/utils/typing.py | 131 +++++++++++++++++++ 2 files changed, 134 insertions(+), 127 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index d77cf4c..ce7690a 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -1,10 +1,6 @@ ## import standard libraries import abc -import json import logging -import re -from datetime import datetime, timedelta, timezone -from json.decoder import JSONDecodeError from pathlib import Path from typing import Any, Dict, List, Tuple, Optional, TypeAlias ## import 3rd-party libraries @@ -18,7 +14,7 @@ from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.utils import utils from ogd.common.utils.Logger import Logger -from ogd.common.utils.typing import Map +from ogd.common.utils.typing import conversions ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] ColumnMapElement : TypeAlias = Optional[str | List[str] | Dict[str,str]] @@ -108,126 +104,6 @@ def FromFile(cls, schema_name:str, schema_path:Path = Path(schemas.__file__).par # *** PRIVATE STATICS *** - @staticmethod - def _parse(input:Any, col_schema:ColumnSchema) -> Any: - """Applies whatever parsing is appropriate based on what type the schema said a column contained. - - :param input: _description_ - :type input: str - :param col_schema: _description_ - :type col_schema: ColumnSchema - :return: _description_ - :rtype: Any - """ - if input is None: - return None - if input == "None" or input == "null" or input == "nan": - return None - match col_schema.ValueType.upper(): - case 'STR': - return str(input) - case 'INT': - return int(input) - case 'FLOAT': - return float(input) - case 'DATETIME': - return input if isinstance(input, datetime) else TableSchema._convertDateTime(str(input)) - case 'TIMEDELTA': - return input if isinstance(input, timedelta) else TableSchema._convertTimedelta(str(input)) - case 'TIMEZONE': - return input if isinstance(input, timezone) else TableSchema._convertTimezone(str(input)) - case 'JSON': - try: - if isinstance(input, dict): - # if input was a dict already, then just give it back. Else, try to load it from string. - return input - elif isinstance(input, str): - if input != 'None' and input != '': # watch out for nasty corner cases. - return json.loads(input) - else: - return None - else: - return json.loads(str(input)) - except JSONDecodeError as err: - Logger.Log(f"Could not parse input '{input}' of type {type(input)} from column {col_schema.Name}, got the following error:\n{str(err)}", logging.WARN) - return {} - case _dummy if _dummy.startswith('ENUM'): - # if the column is supposed to be an enum, for now we just stick with the string. - return str(input) - case _: - Logger.Log(f"_parse function got an unrecognized column type {col_schema.ValueType}, could not parse!", logging.WARNING) - - @staticmethod - def _convertDateTime(time_str:str) -> datetime: - ret_val : datetime - - if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - raise ValueError(f"Got a non-timestamp value of {time_str} when converting a datetime column from data source!") - - formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S.%f"] - - try: - ret_val = parser.isoparse(time_str) - except ValueError: - # Logger.Log(f"Could not parse time string '{time_str}', got error {err}") - # raise err - pass - else: - return ret_val - for fmt in formats: - try: - ret_val = datetime.strptime(time_str, fmt) - except ValueError: - pass - else: - return ret_val - raise ValueError(f"Could not parse timestamp {time_str}, it did not match any expected formats!") - - @staticmethod - def _convertTimedelta(time_str:str) -> Optional[timedelta]: - ret_val : Optional[timedelta] - - if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - return None - elif re.fullmatch(pattern=r"\d+:\d+:\d+(\.\d+)?", string=time_str): - try: - pieces = time_str.split(':') - seconds_pieces = pieces[2].split('.') - ret_val = timedelta(hours=int(pieces[0]), - minutes=int(pieces[1]), - seconds=int(seconds_pieces[0]), - milliseconds=int(seconds_pieces[1]) if len(seconds_pieces) > 1 else 0) - except ValueError as err: - pass - except IndexError as err: - pass - else: - return ret_val - elif re.fullmatch(pattern=r"-?\d+", string=time_str): - try: - ret_val = timedelta(seconds=int(time_str)) - except ValueError as err: - pass - else: - return ret_val - raise ValueError(f"Could not parse timedelta {time_str} of type {type(time_str)}, it did not match any expected formats.") - - @staticmethod - def _convertTimezone(time_str:str) -> Optional[timezone]: - ret_val : Optional[timezone] - - if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - return None - elif re.fullmatch(pattern=r"UTC[+-]\d+:\d+", string=time_str): - try: - pieces = time_str.removeprefix("UTC").split(":") - ret_val = timezone(timedelta(hours=int(pieces[0]), minutes=int(pieces[1]))) - except ValueError as err: - pass - else: - return ret_val - raise ValueError(f"Could not parse timezone {time_str} of type {type(time_str)}, it did not match any expected formats.") - # *** PRIVATE METHODS *** def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[str, int]], concatenator:str, fallback:Any) -> Any: @@ -236,7 +112,7 @@ def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[st if isinstance(indices, int): # if there's a single index, use parse to get the value it is stated to be # print(f"About to parse value {row[indices]} as type {self.Columns[indices]},\nFull list from row is {row},\nFull list of columns is {self.Columns},\nwith names {self.ColumnNames}") - ret_val = TableSchema._parse(input=row[indices], col_schema=self.Columns[indices]) + ret_val = conversions.ConvertToType(variable=row[indices], to_type=self.Columns[indices].ValueType) elif isinstance(indices, list): ret_val = concatenator.join([str(row[index]) for index in indices]) elif isinstance(indices, dict): @@ -244,7 +120,7 @@ def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[st for key,column_index in indices.items(): if column_index > len(row): Logger.Log(f"Got column index of {column_index} for column {key}, but row only has {len(row)} columns!", logging.ERROR) - _val = TableSchema._parse(input=row[column_index], col_schema=self._table_columns[column_index]) + _val = conversions.ConvertToType(variable=row[column_index], to_type=self._table_columns[column_index].ValueType) ret_val.update(_val if isinstance(_val, dict) else {key:_val}) else: ret_val = fallback diff --git a/src/ogd/common/utils/typing.py b/src/ogd/common/utils/typing.py index b420f5f..868b0e3 100644 --- a/src/ogd/common/utils/typing.py +++ b/src/ogd/common/utils/typing.py @@ -1,4 +1,135 @@ +## import standard libraries +import json +import re +from datetime import datetime, timedelta, timezone +from json.decoder import JSONDecodeError from typing import Any, Callable, Dict, List, Optional, TypeAlias +## import 3rd-party libraries +from dateutil import parser +## import local files +from ogd.common.utils.Logger import Logger Map : TypeAlias = Dict[str, Any] # type alias: we'll call any dict using string keys a "Map" ExportRow : TypeAlias = List[Any] + +class conversions: + + @staticmethod + def ConvertToType(variable:Any, to_type:str) -> Any: + """Applies whatever parsing is appropriate based on what type the schema said a column contained. + + :param input: _description_ + :type input: str + :param col_schema: _description_ + :type col_schema: ColumnSchema + :return: _description_ + :rtype: Any + """ + if variable is None: + return None + if variable == "None" or variable == "null" or variable == "nan": + return None + match to_type.upper(): + case 'STR': + return str(variable) + case 'INT': + return int(variable) + case 'FLOAT': + return float(variable) + case 'DATETIME': + return variable if isinstance(variable, datetime) else conversions.DatetimeFromString(str(variable)) + case 'TIMEDELTA': + return variable if isinstance(variable, timedelta) else conversions.TimedeltaFromString(str(variable)) + case 'TIMEZONE': + return variable if isinstance(variable, timezone) else conversions.TimezoneFromString(str(variable)) + case 'JSON': + try: + if isinstance(variable, dict): + # if input was a dict already, then just give it back. Else, try to load it from string. + return variable + elif isinstance(variable, str): + if variable != 'None' and variable != '': # watch out for nasty corner cases. + return json.loads(variable) + else: + return None + else: + return json.loads(str(variable)) + except JSONDecodeError as err: + Logger.Log(f"Could not parse input '{variable}' of type {type(variable)} from column {col_schema.Name}, got the following error:\n{str(err)}", logging.WARN) + return {} + case _dummy if _dummy.startswith('ENUM'): + # if the column is supposed to be an enum, for now we just stick with the string. + return str(variable) + case _: + Logger.Log(f"_parse function got an unrecognized column type {col_schema.ValueType}, could not parse!", logging.WARNING) + + @staticmethod + def DatetimeFromString(time_str:str) -> datetime: + ret_val : datetime + + if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": + raise ValueError(f"Got a non-timestamp value of {time_str} when converting a datetime column from data source!") + + formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S.%f"] + + try: + ret_val = parser.isoparse(time_str) + except ValueError: + # Logger.Log(f"Could not parse time string '{time_str}', got error {err}") + # raise err + pass + else: + return ret_val + for fmt in formats: + try: + ret_val = datetime.strptime(time_str, fmt) + except ValueError: + pass + else: + return ret_val + raise ValueError(f"Could not parse timestamp {time_str}, it did not match any expected formats!") + + @staticmethod + def TimedeltaFromString(time_str:str) -> Optional[timedelta]: + ret_val : Optional[timedelta] + + if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": + return None + elif re.fullmatch(pattern=r"\d+:\d+:\d+(\.\d+)?", string=time_str): + try: + pieces = time_str.split(':') + seconds_pieces = pieces[2].split('.') + ret_val = timedelta(hours=int(pieces[0]), + minutes=int(pieces[1]), + seconds=int(seconds_pieces[0]), + milliseconds=int(seconds_pieces[1]) if len(seconds_pieces) > 1 else 0) + except ValueError as err: + pass + except IndexError as err: + pass + else: + return ret_val + elif re.fullmatch(pattern=r"-?\d+", string=time_str): + try: + ret_val = timedelta(seconds=int(time_str)) + except ValueError as err: + pass + else: + return ret_val + raise ValueError(f"Could not parse timedelta {time_str} of type {type(time_str)}, it did not match any expected formats.") + + @staticmethod + def TimezoneFromString(time_str:str) -> Optional[timezone]: + ret_val : Optional[timezone] + + if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": + return None + elif re.fullmatch(pattern=r"UTC[+-]\d+:\d+", string=time_str): + try: + pieces = time_str.removeprefix("UTC").split(":") + ret_val = timezone(timedelta(hours=int(pieces[0]), minutes=int(pieces[1]))) + except ValueError as err: + pass + else: + return ret_val + raise ValueError(f"Could not parse timezone {time_str} of type {type(time_str)}, it did not match any expected formats.") \ No newline at end of file From 434e271fc55a8c50e3ca74e4866cd414f2c66228 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:06:54 -0600 Subject: [PATCH 047/124] Fix a missing import and some uses of old variable in debug prints. --- src/ogd/common/utils/typing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/utils/typing.py b/src/ogd/common/utils/typing.py index 868b0e3..dfc1a9e 100644 --- a/src/ogd/common/utils/typing.py +++ b/src/ogd/common/utils/typing.py @@ -1,5 +1,6 @@ ## import standard libraries import json +import logging import re from datetime import datetime, timedelta, timezone from json.decoder import JSONDecodeError @@ -55,13 +56,13 @@ def ConvertToType(variable:Any, to_type:str) -> Any: else: return json.loads(str(variable)) except JSONDecodeError as err: - Logger.Log(f"Could not parse input '{variable}' of type {type(variable)} from column {col_schema.Name}, got the following error:\n{str(err)}", logging.WARN) + Logger.Log(f"Could not parse input '{variable}' of type {type(variable)} to type {to_type}, got the following error:\n{str(err)}", logging.WARN) return {} case _dummy if _dummy.startswith('ENUM'): # if the column is supposed to be an enum, for now we just stick with the string. return str(variable) case _: - Logger.Log(f"_parse function got an unrecognized column type {col_schema.ValueType}, could not parse!", logging.WARNING) + Logger.Log(f"ConvertToType function got an unrecognized type {to_type}, could not complete conversion!", logging.WARNING) @staticmethod def DatetimeFromString(time_str:str) -> datetime: From 91c954edf0e0df36de4b11f79302ff83c0eb3786 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:14:31 -0600 Subject: [PATCH 048/124] Move the logic for column map markdown to TableSchema, since it'll no longer be separate class. --- src/ogd/common/schemas/tables/TableSchema.py | 37 ++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index ce7690a..12f77de 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -3,11 +3,8 @@ import logging from pathlib import Path from typing import Any, Dict, List, Tuple, Optional, TypeAlias -## import 3rd-party libraries -from dateutil import parser ## import local files from ogd.common import schemas -from ogd.common.models.Event import Event, EventSource from ogd.common.models.enums.TableType import TableType from ogd.common.schemas.Schema import Schema from ogd.common.schemas.tables.ColumnMapSchema import ColumnMapSchema @@ -70,6 +67,19 @@ def ColumnNames(self) -> List[str]: # *** IMPLEMENT ABSTRACT FUNCTIONS *** + @property + def AsMarkdown(self) -> str: + _column_map_markdown : str + ret_val = "\n\n".join([ + "## Database Columns", + "The individual columns recorded in the database for this game.", + "\n".join([item.AsMarkdown for item in self.Columns]), + f"## {self._table_type} Object Elements", + "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", + self._columnMapMarkdown, + ""]) + return ret_val + @classmethod def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": _column_schemas : List[ColumnSchema] @@ -106,6 +116,27 @@ def FromFile(cls, schema_name:str, schema_path:Path = Path(schemas.__file__).par # *** PRIVATE METHODS *** + @property + def _columnMapMarkdown(self) -> str: + ret_val : str + + event_column_list = [] + for event_element,columns_mapped in self._column_map.items(): + if columns_mapped is not None: + if isinstance(columns_mapped, str): + event_column_list.append(f"**{event_element}** = Column '*{columns_mapped}*' ") + elif isinstance(columns_mapped, list): + mapped_list = ", ".join([f"'*{item}*'" for item in columns_mapped]) + event_column_list.append(f"**{event_element}** = Columns {mapped_list} ") # figure out how to do one string foreach item in list. + elif isinstance(columns_mapped, int): + event_column_list.append(f"**{event_element}** = Column '*{self.ColumnNames[columns_mapped]}*' (index {columns_mapped}) ") + else: + event_column_list.append(f"**{event_element}** = Column '*{columns_mapped}*' (DEBUG: Type {type(columns_mapped)}) ") + else: + event_column_list.append(f"**{event_element}** = null ") + ret_val = "\n".join(event_column_list) + return ret_val + def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[str, int]], concatenator:str, fallback:Any) -> Any: ret_val : Any if indices is not None: From d6bb4672d69b9f5824b68959319d613f114e9b53 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:21:12 -0600 Subject: [PATCH 049/124] A few more organization improvements for markdown generation. --- src/ogd/common/schemas/tables/TableSchema.py | 35 ++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 12f77de..512f5fd 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -52,6 +52,20 @@ class variables. # after loading the file, take the stuff we need and store. super().__init__(name=name, other_elements={}) + @property + def TableKind(self) -> TableType: + """Property to show whether the given table schema is for events or features. + + If this TableSchema was read from a file, this will reflect the type indicated in the file, + *even if the specific TableSchema subclass does not match*. + If this TableSchema was generated through some other means, or no type was indicated in the source file, + this will reflect the type of the instance. + + :return: Either TableType.EVENT or TableType.FEATURE + :rtype: TableType + """ + return self._table_type + @property def Columns(self) -> List[ColumnSchema]: return self._table_columns @@ -65,16 +79,27 @@ def ColumnNames(self) -> List[str]: """ return [col.Name for col in self._table_columns] + @property + def ColumnMap(self) -> Dict[str, ColumnMapIndex]: + """Mapping from Event element names to the indices of the database columns mapped to them. + There may be a single index, indicating a 1-to-1 mapping of a database column to the element; + There may be a list of indices, indicating multiple columns will be concatenated to form the element value; + There may be a further mapping of keys to indicies, indicating multiple columns will be joined into a JSON object, with keys mapped to values found at the columns with given indices. + + :return: The dictionary mapping of element names to indices. + :rtype: Dict[str, Union[int, List[int], Dict[str, int], None]] + """ + return self._column_map + # *** IMPLEMENT ABSTRACT FUNCTIONS *** @property def AsMarkdown(self) -> str: - _column_map_markdown : str ret_val = "\n\n".join([ "## Database Columns", "The individual columns recorded in the database for this game.", - "\n".join([item.AsMarkdown for item in self.Columns]), - f"## {self._table_type} Object Elements", + self._columnSetMarkdown, + f"## {self.TableKind} Object Elements", "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", self._columnMapMarkdown, ""]) @@ -116,6 +141,10 @@ def FromFile(cls, schema_name:str, schema_path:Path = Path(schemas.__file__).par # *** PRIVATE METHODS *** + @property + def _columnSetMarkdown(self) -> str: + return "\n".join([item.AsMarkdown for item in self.Columns]) + @property def _columnMapMarkdown(self) -> str: ret_val : str From 6013478b87a404e9ed0ad776491580746ebf42a2 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:29:06 -0600 Subject: [PATCH 050/124] Remove duplicate declaration of abstract AsMarkdown prop. --- src/ogd/common/schemas/Schema.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/ogd/common/schemas/Schema.py b/src/ogd/common/schemas/Schema.py index f25ea43..4c2821d 100644 --- a/src/ogd/common/schemas/Schema.py +++ b/src/ogd/common/schemas/Schema.py @@ -12,6 +12,11 @@ class Schema(abc.ABC): @property @abc.abstractmethod def AsMarkdown(self) -> str: + """Gets a markdown-formatted representation of the schema. + + :return: A markdown-formatted representation of the schema. + :rtype: str + """ pass @classmethod @@ -37,16 +42,6 @@ def __str__(self): def __repr__(self): return f"{type(self).__name__}[{self.Name}]" - @property - @abc.abstractmethod - def AsMarkdown(self) -> str: - """Gets a markdown-formatted representation of the schema. - - :return: A markdown-formatted representation of the schema. - :rtype: str - """ - pass - @property def Name(self) -> str: """Gets the name of the specific schema represented by the class instance. From 9508a746b9d805d76802e36a6d948d2b4ccf5af4 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:32:23 -0600 Subject: [PATCH 051/124] Pull in the props from ColumnMapSchema. --- .../common/schemas/tables/EventTableSchema.py | 226 ++++++++++-------- 1 file changed, 132 insertions(+), 94 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 77ed716..11f1912 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -1,22 +1,16 @@ """EventTableSchema Module""" # import standard libraries import logging -import re from datetime import datetime, timedelta, timezone -from json.decoder import JSONDecodeError -from pathlib import Path -from typing import Any, Dict, Tuple, Optional - -# import 3rd-party libraries -from dateutil import parser +from typing import Any, Dict, List, Tuple, Optional # import local files -from ogd.common import schemas -from ogd.common.schemas.tables.TableSchema import TableSchema +from ogd.common.models.enums.TableType import TableType from ogd.common.models.Event import Event, EventSource +from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex from ogd.common.utils import utils from ogd.common.utils.Logger import Logger -from ogd.common.utils.typing import Map +from ogd.common.utils.typing import Map, conversions ## @class TableSchema # Dumb struct to hold useful info about the structure of database data @@ -28,7 +22,7 @@ class EventTableSchema(TableSchema): # *** BUILT-INS & PROPERTIES *** - def __init__(self, schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/"): + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema]): """Constructor for the TableSchema class. Given a database connection and a game data request, this retrieves a bit of information from the database to fill in the @@ -41,144 +35,188 @@ class variables. :param is_legacy: [description], defaults to False :type is_legacy: bool, optional """ - super().__init__(schema_name=schema_name, schema_path=schema_path) + super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns) @property - def AsMarkdown(self) -> str: - ret_val = "\n\n".join([ - "## Database Columns", - "The individual columns recorded in the database for this game.", - "\n".join([item.AsMarkdown for item in self.Columns]), - "## Event Object Elements", - "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", - self._column_map.AsMarkdown, - ""]) - return ret_val + def SessionIDIndex(self) -> ColumnMapIndex: + return self._column_map['session_id'] + + @property + def AppIDIndex(self) -> ColumnMapIndex: + return self._column_map['app_id'] + + @property + def TimestampIndex(self) -> ColumnMapIndex: + return self._column_map['timestamp'] + + @property + def EventNameIndex(self) -> ColumnMapIndex: + return self._column_map['event_name'] + + @property + def EventDataIndex(self) -> ColumnMapIndex: + return self._column_map['event_data'] + + @property + def EventSourceIndex(self) -> ColumnMapIndex: + return self._column_map['event_source'] + + @property + def AppVersionIndex(self) -> ColumnMapIndex: + return self._column_map['app_version'] + + @property + def AppBranchIndex(self) -> ColumnMapIndex: + return self._column_map['app_branch'] + + @property + def LogVersionIndex(self) -> ColumnMapIndex: + return self._column_map['log_version'] + + @property + def TimeOffsetIndex(self) -> ColumnMapIndex: + return self._column_map['time_offset'] + + @property + def UserIDIndex(self) -> ColumnMapIndex: + return self._column_map['user_id'] + + @property + def UserDataIndex(self) -> ColumnMapIndex: + return self._column_map['user_data'] + + @property + def GameStateIndex(self) -> ColumnMapIndex: + return self._column_map['game_state'] + + @property + def EventSequenceIndexIndex(self) -> ColumnMapIndex: + return self._column_map['event_sequence_index'] @property def SessionIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.SessionID, int): - ret_val = self.ColumnNames[self._column_map.SessionID] - elif isinstance(self._column_map.SessionID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.SessionID]) + if isinstance(self.SessionIDIndex, int): + ret_val = self.ColumnNames[self.SessionIDIndex] + elif isinstance(self.SessionIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.SessionIDIndex]) return ret_val @property def AppIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppID, int): - ret_val = self.ColumnNames[self._column_map.AppID] - elif isinstance(self._column_map.AppID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppID]) + if isinstance(self.AppIDIndex, int): + ret_val = self.ColumnNames[self.AppIDIndex] + elif isinstance(self.AppIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppIDIndex]) return ret_val @property def TimestampColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.Timestamp, int): - ret_val = self.ColumnNames[self._column_map.Timestamp] - elif isinstance(self._column_map.Timestamp, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.Timestamp]) + if isinstance(self.TimestampIndex, int): + ret_val = self.ColumnNames[self.TimestampIndex] + elif isinstance(self.TimestampIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimestampIndex]) return ret_val @property def EventNameColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventName, int): - ret_val = self.ColumnNames[self._column_map.EventName] - elif isinstance(self._column_map.EventName, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventName]) + if isinstance(self.EventNameIndex, int): + ret_val = self.ColumnNames[self.EventNameIndex] + elif isinstance(self.EventNameIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventNameIndex]) return ret_val @property def EventDataColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventData, int): - ret_val = self.ColumnNames[self._column_map.EventData] - elif isinstance(self._column_map.EventData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventData]) + if isinstance(self.EventDataIndex, int): + ret_val = self.ColumnNames[self.EventDataIndex] + elif isinstance(self.EventDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventDataIndex]) return ret_val @property def EventSourceColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventSource, int): - ret_val = self.ColumnNames[self._column_map.EventSource] - elif isinstance(self._column_map.EventSource, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSource]) + if isinstance(self.EventSourceIndex, int): + ret_val = self.ColumnNames[self.EventSourceIndex] + elif isinstance(self.EventSourceIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSourceIndex]) return ret_val @property def AppVersionColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppVersion, int): - ret_val = self.ColumnNames[self._column_map.AppVersion] - elif isinstance(self._column_map.AppVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppVersion]) + if isinstance(self.AppVersionIndex, int): + ret_val = self.ColumnNames[self.AppVersionIndex] + elif isinstance(self.AppVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppVersionIndex]) return ret_val @property def AppBranchColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppBranch, int): - ret_val = self.ColumnNames[self._column_map.AppBranch] - elif isinstance(self._column_map.AppBranch, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppBranch]) + if isinstance(self.AppBranchIndex, int): + ret_val = self.ColumnNames[self.AppBranchIndex] + elif isinstance(self.AppBranchIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppBranchIndex]) return ret_val @property def LogVersionColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.LogVersion, int): - ret_val = self.ColumnNames[self._column_map.LogVersion] - elif isinstance(self._column_map.LogVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.LogVersion]) + if isinstance(self.LogVersionIndex, int): + ret_val = self.ColumnNames[self.LogVersionIndex] + elif isinstance(self.LogVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.LogVersionIndex]) return ret_val @property def TimeOffsetColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.TimeOffset, int): - ret_val = self.ColumnNames[self._column_map.TimeOffset] - elif isinstance(self._column_map.TimeOffset, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.TimeOffset]) + if isinstance(self.TimeOffsetIndex, int): + ret_val = self.ColumnNames[self.TimeOffsetIndex] + elif isinstance(self.TimeOffsetIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimeOffsetIndex]) return ret_val @property def UserIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.UserID, int): - ret_val = self.ColumnNames[self._column_map.UserID] - elif isinstance(self._column_map.UserID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserID]) + if isinstance(self.UserIDIndex, int): + ret_val = self.ColumnNames[self.UserIDIndex] + elif isinstance(self.UserIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserIDIndex]) return ret_val @property def UserDataColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.UserData, int): - ret_val = self.ColumnNames[self._column_map.UserData] - elif isinstance(self._column_map.UserData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserData]) + if isinstance(self.UserDataIndex, int): + ret_val = self.ColumnNames[self.UserDataIndex] + elif isinstance(self.UserDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserDataIndex]) return ret_val @property def GameStateColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.GameState, int): - ret_val = self.ColumnNames[self._column_map.GameState] - elif isinstance(self._column_map.GameState, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.GameState]) + if isinstance(self.GameStateIndex, int): + ret_val = self.ColumnNames[self.GameStateIndex] + elif isinstance(self.GameStateIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.GameStateIndex]) return ret_val @property def EventSequenceIndexColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventSequenceIndex, int): - ret_val = self.ColumnNames[self._column_map.EventSequenceIndex] - elif isinstance(self._column_map.EventSequenceIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSequenceIndex]) + if isinstance(self.EventSequenceIndexIndex, int): + ret_val = self.ColumnNames[self.EventSequenceIndexIndex] + elif isinstance(self.EventSequenceIndexIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSequenceIndexIndex]) return ret_val # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -188,7 +226,7 @@ def EventSequenceIndexColumn(self) -> Optional[str]: # *** PUBLIC METHODS *** _conversion_warnings = [] - def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): + def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): """Function to convert a row to an Event, based on the loaded schema. In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. @@ -221,86 +259,86 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:utils.map={}): # 2) Handle event_data parameter, a special case. # For this case we've got to parse the json, and then fold in whatever other columns were desired. # 3) Assign vals to our arg vars and pass to Event ctor. - sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) + sess_id = self._getValueFromRow(row=row, indices=self.SessionIDIndex, concatenator=concatenator, fallback=fallbacks.get('session_id')) if not isinstance(sess_id, str): if "sess_id" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("sess_id") sess_id = str(sess_id) - app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) + app_id = self._getValueFromRow(row=row, indices=self.AppIDIndex, concatenator=concatenator, fallback=fallbacks.get('app_id')) if not isinstance(app_id, str): if "app_id" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("app_id") app_id = str(app_id) - tstamp = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) + tstamp = self._getValueFromRow(row=row, indices=self.TimestampIndex, concatenator=concatenator, fallback=fallbacks.get('timestamp')) if not isinstance(tstamp, datetime): if "timestamp" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) EventTableSchema._conversion_warnings.append("timestamp") - tstamp = TableSchema._convertDateTime(tstamp) + tstamp = conversions.DatetimeFromString(tstamp) - ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) + ename = self._getValueFromRow(row=row, indices=self.EventNameIndex, concatenator=concatenator, fallback=fallbacks.get('event_name')) if not isinstance(ename, str): if "ename" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("ename") ename = str(ename) - datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) + datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self.EventDataIndex, concatenator=concatenator, fallback=fallbacks.get('event_data')) # TODO: go bac to isostring function; need 0-padding on ms first, though edata = dict(sorted(datas.items())) # Sort keys alphabetically - esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) + esrc = self._getValueFromRow(row=row, indices=self.EventSourceIndex, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) if not isinstance(esrc, EventSource): if "esrc" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) EventTableSchema._conversion_warnings.append("esrc") esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME - app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) + app_ver = self._getValueFromRow(row=row, indices=self.AppVersionIndex, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) if not isinstance(app_ver, str): if "app_ver" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("app_ver") app_ver = str(app_ver) - app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) + app_br = self._getValueFromRow(row=row, indices=self.AppBranchIndex, concatenator=concatenator, fallback=fallbacks.get('app_branch')) if not isinstance(app_br, str): if "app_br" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("app_br") app_br = str(app_br) - log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) + log_ver = self._getValueFromRow(row=row, indices=self.LogVersionIndex, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) if not isinstance(log_ver, str): if "log_ver" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("log_ver") log_ver = str(log_ver) - offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) + offset = self._getValueFromRow(row=row, indices=self.TimeOffsetIndex, concatenator=concatenator, fallback=fallbacks.get('time_offset')) if isinstance(offset, timedelta): if "offset" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) EventTableSchema._conversion_warnings.append("offset") offset = timezone(offset) - uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) + uid = self._getValueFromRow(row=row, indices=self.UserIDIndex, concatenator=concatenator, fallback=fallbacks.get('user_id')) if uid is not None and not isinstance(uid, str): if "uid" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("uid") uid = str(uid) - udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) + udata = self._getValueFromRow(row=row, indices=self.UserDataIndex, concatenator=concatenator, fallback=fallbacks.get('user_data')) - state = self._getValueFromRow(row=row, indices=self._column_map.GameState, concatenator=concatenator, fallback=fallbacks.get('game_state')) + state = self._getValueFromRow(row=row, indices=self.GameStateIndex, concatenator=concatenator, fallback=fallbacks.get('game_state')) - index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) + index = self._getValueFromRow(row=row, indices=self.EventSequenceIndexIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) if index is not None and not isinstance(index, int): if "index" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) From 2c3802981309186287a64090487d3a8c82eb409e Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:36:20 -0600 Subject: [PATCH 052/124] Pass along TableSchema name to the subclass _fromDict. --- src/ogd/common/schemas/tables/TableSchema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 512f5fd..81676a9 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -25,7 +25,7 @@ class TableSchema(Schema): @abc.abstractmethod @classmethod - def _fromDict(cls, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": pass # *** BUILT-INS & PROPERTIES *** @@ -121,7 +121,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _table_type = TableType.FromString(_table_type_str) if _table_type_str is not None else TableType.EVENT _column_json_list = all_elements.get('columns', []) _column_schemas = [ColumnSchema.FromDict(name=column.get("name", "UNKNOWN COLUMN NAME"), all_elements=column) for column in _column_json_list] - return cls._fromDict(table_type=_table_type, raw_map=all_elements.get('column_map', {}), column_schemas=_column_schemas) + return cls._fromDict(name=name, table_type=_table_type, raw_map=all_elements.get('column_map', {}), column_schemas=_column_schemas) # *** PUBLIC STATICS *** From ecaafaf4c94fe95a04e602c734753db736fb4d09 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:39:48 -0600 Subject: [PATCH 053/124] Add missing import, and put in header for _fromDict, to be filled in shortly. --- src/ogd/common/schemas/tables/EventTableSchema.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 11f1912..812a97a 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -7,7 +7,7 @@ # import local files from ogd.common.models.enums.TableType import TableType from ogd.common.models.Event import Event, EventSource -from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex +from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex, ColumnMapElement from ogd.common.utils import utils from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map, conversions @@ -221,6 +221,9 @@ def EventSequenceIndexColumn(self) -> Optional[str]: # *** IMPLEMENT ABSTRACT FUNCTIONS *** + @classmethod + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": + # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** From badeb5631f3ba3ee6264098d2ab375ed1acd81d5 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:41:03 -0600 Subject: [PATCH 054/124] Add placeholder body to _fromDict function. --- src/ogd/common/schemas/tables/EventTableSchema.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 812a97a..bdb5493 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -223,6 +223,8 @@ def EventSequenceIndexColumn(self) -> Optional[str]: @classmethod def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": + _column_map : Dict[str, ColumnMapIndex] = {} + return EventTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas) # *** PUBLIC STATICS *** From 4e376718712b1a0d7137f5b6a98c6ad318057e7a Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:42:34 -0600 Subject: [PATCH 055/124] Fix and improve debug outputs for EventTableSchema RowToEvent function. --- .../common/schemas/tables/EventTableSchema.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index bdb5493..e5f2c0f 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -267,28 +267,28 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): sess_id = self._getValueFromRow(row=row, indices=self.SessionIDIndex, concatenator=concatenator, fallback=fallbacks.get('session_id')) if not isinstance(sess_id, str): if "sess_id" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("sess_id") sess_id = str(sess_id) app_id = self._getValueFromRow(row=row, indices=self.AppIDIndex, concatenator=concatenator, fallback=fallbacks.get('app_id')) if not isinstance(app_id, str): if "app_id" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("app_id") app_id = str(app_id) tstamp = self._getValueFromRow(row=row, indices=self.TimestampIndex, concatenator=concatenator, fallback=fallbacks.get('timestamp')) if not isinstance(tstamp, datetime): if "timestamp" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) EventTableSchema._conversion_warnings.append("timestamp") tstamp = conversions.DatetimeFromString(tstamp) ename = self._getValueFromRow(row=row, indices=self.EventNameIndex, concatenator=concatenator, fallback=fallbacks.get('event_name')) if not isinstance(ename, str): if "ename" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("ename") ename = str(ename) @@ -300,42 +300,42 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): esrc = self._getValueFromRow(row=row, indices=self.EventSourceIndex, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) if not isinstance(esrc, EventSource): if "esrc" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) EventTableSchema._conversion_warnings.append("esrc") esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME app_ver = self._getValueFromRow(row=row, indices=self.AppVersionIndex, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) if not isinstance(app_ver, str): if "app_ver" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("app_ver") app_ver = str(app_ver) app_br = self._getValueFromRow(row=row, indices=self.AppBranchIndex, concatenator=concatenator, fallback=fallbacks.get('app_branch')) if not isinstance(app_br, str): if "app_br" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("app_br") app_br = str(app_br) log_ver = self._getValueFromRow(row=row, indices=self.LogVersionIndex, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) if not isinstance(log_ver, str): if "log_ver" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("log_ver") log_ver = str(log_ver) offset = self._getValueFromRow(row=row, indices=self.TimeOffsetIndex, concatenator=concatenator, fallback=fallbacks.get('time_offset')) if isinstance(offset, timedelta): if "offset" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) EventTableSchema._conversion_warnings.append("offset") offset = timezone(offset) uid = self._getValueFromRow(row=row, indices=self.UserIDIndex, concatenator=concatenator, fallback=fallbacks.get('user_id')) if uid is not None and not isinstance(uid, str): if "uid" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) EventTableSchema._conversion_warnings.append("uid") uid = str(uid) @@ -346,7 +346,7 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): index = self._getValueFromRow(row=row, indices=self.EventSequenceIndexIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) if index is not None and not isinstance(index, int): if "index" not in EventTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) EventTableSchema._conversion_warnings.append("index") index = int(index) From f59605c236d29d073d4dbcfa918e9179000e45a1 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:52:42 -0600 Subject: [PATCH 056/124] Make _conversion_warnings into a counter, that way we could theoretically print the counts after an export as part of debug outputs. --- .../common/schemas/tables/EventTableSchema.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index e5f2c0f..34d932a 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -1,6 +1,7 @@ """EventTableSchema Module""" # import standard libraries import logging +from collections import Counter from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Tuple, Optional @@ -8,6 +9,7 @@ from ogd.common.models.enums.TableType import TableType from ogd.common.models.Event import Event, EventSource from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex, ColumnMapElement +from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.utils import utils from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map, conversions @@ -230,7 +232,7 @@ def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapEl # *** PUBLIC METHODS *** - _conversion_warnings = [] + _conversion_warnings = Counter() def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): """Function to convert a row to an Event, based on the loaded schema. In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. @@ -268,28 +270,28 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): if not isinstance(sess_id, str): if "sess_id" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("sess_id") + EventTableSchema._conversion_warnings["sess_id"] += 1 sess_id = str(sess_id) app_id = self._getValueFromRow(row=row, indices=self.AppIDIndex, concatenator=concatenator, fallback=fallbacks.get('app_id')) if not isinstance(app_id, str): if "app_id" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("app_id") + EventTableSchema._conversion_warnings["app_id"] += 1 app_id = str(app_id) tstamp = self._getValueFromRow(row=row, indices=self.TimestampIndex, concatenator=concatenator, fallback=fallbacks.get('timestamp')) if not isinstance(tstamp, datetime): if "timestamp" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) - EventTableSchema._conversion_warnings.append("timestamp") + EventTableSchema._conversion_warnings["timestamp"] += 1 tstamp = conversions.DatetimeFromString(tstamp) ename = self._getValueFromRow(row=row, indices=self.EventNameIndex, concatenator=concatenator, fallback=fallbacks.get('event_name')) if not isinstance(ename, str): if "ename" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("ename") + EventTableSchema._conversion_warnings["ename"] += 1 ename = str(ename) datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self.EventDataIndex, concatenator=concatenator, fallback=fallbacks.get('event_data')) @@ -301,42 +303,42 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): if not isinstance(esrc, EventSource): if "esrc" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) - EventTableSchema._conversion_warnings.append("esrc") + EventTableSchema._conversion_warnings["esrc"] += 1 esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME app_ver = self._getValueFromRow(row=row, indices=self.AppVersionIndex, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) if not isinstance(app_ver, str): if "app_ver" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("app_ver") + EventTableSchema._conversion_warnings["app_ver"] += 1 app_ver = str(app_ver) app_br = self._getValueFromRow(row=row, indices=self.AppBranchIndex, concatenator=concatenator, fallback=fallbacks.get('app_branch')) if not isinstance(app_br, str): if "app_br" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("app_br") + EventTableSchema._conversion_warnings["app_br"] += 1 app_br = str(app_br) log_ver = self._getValueFromRow(row=row, indices=self.LogVersionIndex, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) if not isinstance(log_ver, str): if "log_ver" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("log_ver") + EventTableSchema._conversion_warnings["log_ver"] += 1 log_ver = str(log_ver) offset = self._getValueFromRow(row=row, indices=self.TimeOffsetIndex, concatenator=concatenator, fallback=fallbacks.get('time_offset')) if isinstance(offset, timedelta): if "offset" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) - EventTableSchema._conversion_warnings.append("offset") + EventTableSchema._conversion_warnings["offset"] += 1 offset = timezone(offset) uid = self._getValueFromRow(row=row, indices=self.UserIDIndex, concatenator=concatenator, fallback=fallbacks.get('user_id')) if uid is not None and not isinstance(uid, str): if "uid" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) - EventTableSchema._conversion_warnings.append("uid") + EventTableSchema._conversion_warnings["uid"] += 1 uid = str(uid) udata = self._getValueFromRow(row=row, indices=self.UserDataIndex, concatenator=concatenator, fallback=fallbacks.get('user_data')) @@ -347,7 +349,7 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): if index is not None and not isinstance(index, int): if "index" not in EventTableSchema._conversion_warnings: Logger.Log(f"{self.Name} {self.TableKind} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) - EventTableSchema._conversion_warnings.append("index") + EventTableSchema._conversion_warnings["index"] += 1 index = int(index) return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, From 5db2a9d8ee6d97a3bd4fa554f377245d612e3477 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 22:53:31 -0600 Subject: [PATCH 057/124] Remove unused import. --- src/ogd/common/schemas/tables/EventTableSchema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 34d932a..90a0cdd 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -10,7 +10,6 @@ from ogd.common.models.Event import Event, EventSource from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex, ColumnMapElement from ogd.common.schemas.tables.ColumnSchema import ColumnSchema -from ogd.common.utils import utils from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map, conversions From 47092dcc71a8a69434aaf38ff46b2ab04ef3f6a9 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:03:14 -0600 Subject: [PATCH 058/124] Remove unused import. --- src/ogd/common/schemas/tables/TableSchema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 81676a9..9fdced2 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -7,7 +7,6 @@ from ogd.common import schemas from ogd.common.models.enums.TableType import TableType from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.tables.ColumnMapSchema import ColumnMapSchema from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.utils import utils from ogd.common.utils.Logger import Logger From 1c8e1aa73ad625da6127db0020e13c1677c06861 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:12:10 -0600 Subject: [PATCH 059/124] Add optional logger for _fromDict, since we'll be copying from the ColumnMapSchema FromDict. --- src/ogd/common/schemas/tables/TableSchema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 9fdced2..85ecb82 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -24,7 +24,7 @@ class TableSchema(Schema): @abc.abstractmethod @classmethod - def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema], logger:Optional[logging.Logger]=None) -> "TableSchema": pass # *** BUILT-INS & PROPERTIES *** From d27d9540cba45ddbdf45a6b80876e24b28e86578 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:17:51 -0600 Subject: [PATCH 060/124] Fill in _fromDict body from what was in ColumnMapSchema. --- .../common/schemas/tables/EventTableSchema.py | 65 ++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 90a0cdd..93a9dd4 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -223,8 +223,51 @@ def EventSequenceIndexColumn(self) -> Optional[str]: # *** IMPLEMENT ABSTRACT FUNCTIONS *** @classmethod - def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema]) -> "TableSchema": - _column_map : Dict[str, ColumnMapIndex] = {} + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema], logger:Optional[logging.Logger]=None) -> "TableSchema": + _column_map : Dict[str, ColumnMapIndex] = { + "session_id" : None, + "app_id" : None, + "timestamp" : None, + "event_name" : None, + "event_data" : None, + "event_source" : None, + "app_version" : None, + "app_branch" : None, + "log_version" : None, + "time_offset" : None, + "user_id" : None, + "user_data" : None, + "game_state" : None, + "event_sequence_index" : None + } + + column_names = [elem.Name for elem in column_schemas] + if not isinstance(raw_map, dict): + raw_map = {} + _msg = f"For {name} column map schema, raw_map was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + # for each item in the map above that we expect... + for key in _column_map.keys(): + # if the item was found in the given "column_map" dictionary... + if key in raw_map: + # parse what was mapped to the item. Could get back a string, or a list, or a dict... + element = cls._parseElement(elem=map[key], name=key) + # then if we got a string, we just find it in list of column names + if isinstance(element, str): + _column_map[key] = column_names.index(element) + # but if it's a list, we need to get index of each item in list of column names + elif isinstance(element, list): + _column_map[key] = [column_names.index(listelem) for listelem in element] + # but if it's a dict, we need to make equivalent dict mapping the key (new name) to the index (in list of column names) + elif isinstance(element, dict): + _column_map[key] = {key : column_names.index(listelem) for key,listelem in element.items()} + else: + Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) + _leftovers = { key : val for key,val in raw_map.items() if key not in _column_map.keys() } + return EventTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas) # *** PUBLIC STATICS *** @@ -358,3 +401,21 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): game_state=state, event_sequence_index=index) # *** PRIVATE STATICS *** + + @staticmethod + def _parseElement(elem:Any, name:str) -> Optional[str | List[str] | Dict[str, str]]: + ret_val : Optional[str | List[str] | Dict[str, str]] + if elem is not None: + if isinstance(elem, str): + ret_val = elem + elif isinstance(elem, list): + ret_val = elem + elif isinstance(elem, dict): + ret_val = elem + else: + ret_val = str(elem) + Logger.Log(f"Column name(s) mapped to {name} was not a string or list, defaulting to str(name) == {ret_val} being mapped to {name}", logging.WARN) + else: + ret_val = None + Logger.Log(f"Column name mapped to {name} was left null, nothing will be mapped to {name}", logging.WARN) + return ret_val From ce7a2ce1fb9fd75617847f14faf1b53a24c6c7f3 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:18:49 -0600 Subject: [PATCH 061/124] Handle leftover elements. --- src/ogd/common/schemas/tables/EventTableSchema.py | 6 +++--- src/ogd/common/schemas/tables/TableSchema.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 93a9dd4..542ab20 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -23,7 +23,7 @@ class EventTableSchema(TableSchema): # *** BUILT-INS & PROPERTIES *** - def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema]): + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema], other_elements:Optional[Map]): """Constructor for the TableSchema class. Given a database connection and a game data request, this retrieves a bit of information from the database to fill in the @@ -36,7 +36,7 @@ class variables. :param is_legacy: [description], defaults to False :type is_legacy: bool, optional """ - super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns) + super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns, other_elements=other_elements) @property def SessionIDIndex(self) -> ColumnMapIndex: @@ -268,7 +268,7 @@ def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapEl Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) _leftovers = { key : val for key,val in raw_map.items() if key not in _column_map.keys() } - return EventTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas) + return EventTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas, other_elements=_leftovers) # *** PUBLIC STATICS *** diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 85ecb82..7d0b1bf 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -10,7 +10,7 @@ from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.utils import utils from ogd.common.utils.Logger import Logger -from ogd.common.utils.typing import conversions +from ogd.common.utils.typing import Map, conversions ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] ColumnMapElement : TypeAlias = Optional[str | List[str] | Dict[str,str]] @@ -29,7 +29,7 @@ def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapEl # *** BUILT-INS & PROPERTIES *** - def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema]): + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema], other_elements:Optional[Map]): """Constructor for the TableSchema class. Given a database connection and a game data request, this retrieves a bit of information from the database to fill in the @@ -49,7 +49,7 @@ class variables. self._table_columns : List[ColumnSchema] = columns # after loading the file, take the stuff we need and store. - super().__init__(name=name, other_elements={}) + super().__init__(name=name, other_elements=other_elements) @property def TableKind(self) -> TableType: From 492787de9a457b863bfdc67faf83bf7db10794e3 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:29:09 -0600 Subject: [PATCH 062/124] Fix some imports and param names in testbed for GameSourceSchema. --- tests/cases/schemas/config/t_GameSourceSchema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 862e939..498a706 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -5,8 +5,8 @@ from typing import Any, Dict, Optional from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema -from ogd.common.schemas.configs.data_sources.BigQuerySourceSchema import BigQuerySchema +from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals @@ -32,6 +32,7 @@ def setUpClass(cls) -> None: } cls.test_schema = GameSourceSchema( name="Game Source Schema", + game_id="AQUALAB", source_name="AQUALAB_BQ", source_schema=BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None), db_name="aqualab", From 394cdb9945b9f0e783418f47ca8dfdc0b56ad3ce Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:32:03 -0600 Subject: [PATCH 063/124] Update imports in some of the storage schemas. --- src/ogd/common/schemas/storage/BigQuerySourceSchema.py | 2 +- src/ogd/common/schemas/storage/FileSourceSchema.py | 2 +- src/ogd/common/schemas/storage/MySQLSourceSchema.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/schemas/storage/BigQuerySourceSchema.py b/src/ogd/common/schemas/storage/BigQuerySourceSchema.py index 382ac71..fa6b716 100644 --- a/src/ogd/common/schemas/storage/BigQuerySourceSchema.py +++ b/src/ogd/common/schemas/storage/BigQuerySourceSchema.py @@ -3,7 +3,7 @@ import logging from typing import Any, Dict, Optional, Type # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema from ogd.common.utils.Logger import Logger class BigQuerySchema(DataSourceSchema): diff --git a/src/ogd/common/schemas/storage/FileSourceSchema.py b/src/ogd/common/schemas/storage/FileSourceSchema.py index 59321a0..3fe0f80 100644 --- a/src/ogd/common/schemas/storage/FileSourceSchema.py +++ b/src/ogd/common/schemas/storage/FileSourceSchema.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Optional from pathlib import Path # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema from ogd.common.utils.Logger import Logger class FileSourceSchema(DataSourceSchema): diff --git a/src/ogd/common/schemas/storage/MySQLSourceSchema.py b/src/ogd/common/schemas/storage/MySQLSourceSchema.py index 9ddcae0..34ca552 100644 --- a/src/ogd/common/schemas/storage/MySQLSourceSchema.py +++ b/src/ogd/common/schemas/storage/MySQLSourceSchema.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Optional, Type # import local files from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema from ogd.common.utils.Logger import Logger class SSHSchema(Schema): From 7492c7592b3faf79e46e476b4a27f02f6c30b899 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:35:06 -0600 Subject: [PATCH 064/124] Make init param more in line with conventions. --- .../common/schemas/storage/DataSourceSchema.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ogd/common/schemas/storage/DataSourceSchema.py b/src/ogd/common/schemas/storage/DataSourceSchema.py index 03328ca..372cc0f 100644 --- a/src/ogd/common/schemas/storage/DataSourceSchema.py +++ b/src/ogd/common/schemas/storage/DataSourceSchema.py @@ -20,24 +20,24 @@ class DataSourceSchema(Schema): # @overload # def __init__(self, name:str, other_elements:Dict[str, Any]): ... - def __init__(self, name:str, unparsed_elements:Dict[str, Any] | Any): + def __init__(self, name:str, other_elements:Dict[str, Any] | Any): self._source_type : str # 1. Ensure we've actually got a dict to parse from - if not isinstance(unparsed_elements, dict): - unparsed_elements = {} + if not isinstance(other_elements, dict): + other_elements = {} Logger.Log(f"For {name} Data Source config, other_elements was not a dict, defaulting to empty dict", logging.WARN) # 2. Parse standard elements, with legacy elements nested under "else" case. - if "SOURCE_TYPE" in unparsed_elements.keys(): - self._source_type = DataSourceSchema._parseSourceType(unparsed_elements["SOURCE_TYPE"]) + if "SOURCE_TYPE" in other_elements.keys(): + self._source_type = DataSourceSchema._parseSourceType(other_elements["SOURCE_TYPE"]) else: - if "DB_TYPE" in unparsed_elements.keys(): - self._source_type = DataSourceSchema._parseSourceType(unparsed_elements["DB_TYPE"]) + if "DB_TYPE" in other_elements.keys(): + self._source_type = DataSourceSchema._parseSourceType(other_elements["DB_TYPE"]) else: self._source_type = "UNKNOWN" Logger.Log(f"{name} config does not have a 'SOURCE_TYPE' element; defaulting to db_name={self._source_type}", logging.WARN) _used = {"SOURCE_TYPE", "DB_TYPE"} - _leftovers = { key : val for key,val in unparsed_elements.items() if key not in _used } + _leftovers = { key : val for key,val in other_elements.items() if key not in _used } super().__init__(name=name, other_elements=_leftovers) @property From c3910ba2ddd44820f14ce73db4bf9ac38a303ebf Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:43:38 -0600 Subject: [PATCH 065/124] Move _parseElement to TableSchema base class. --- .../common/schemas/tables/EventTableSchema.py | 18 ------------------ src/ogd/common/schemas/tables/TableSchema.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 542ab20..6089ea1 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -401,21 +401,3 @@ def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): game_state=state, event_sequence_index=index) # *** PRIVATE STATICS *** - - @staticmethod - def _parseElement(elem:Any, name:str) -> Optional[str | List[str] | Dict[str, str]]: - ret_val : Optional[str | List[str] | Dict[str, str]] - if elem is not None: - if isinstance(elem, str): - ret_val = elem - elif isinstance(elem, list): - ret_val = elem - elif isinstance(elem, dict): - ret_val = elem - else: - ret_val = str(elem) - Logger.Log(f"Column name(s) mapped to {name} was not a string or list, defaulting to str(name) == {ret_val} being mapped to {name}", logging.WARN) - else: - ret_val = None - Logger.Log(f"Column name mapped to {name} was left null, nothing will be mapped to {name}", logging.WARN) - return ret_val diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 7d0b1bf..ff6d1a6 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -164,6 +164,24 @@ def _columnMapMarkdown(self) -> str: event_column_list.append(f"**{event_element}** = null ") ret_val = "\n".join(event_column_list) return ret_val + + @staticmethod + def _parseElement(elem:Any, name:str) -> Optional[ColumnMapElement]: + ret_val : Optional[str | List[str] | Dict[str, str]] + if elem is not None: + if isinstance(elem, str): + ret_val = elem + elif isinstance(elem, list): + ret_val = elem + elif isinstance(elem, dict): + ret_val = elem + else: + ret_val = str(elem) + Logger.Log(f"Column name(s) mapped to {name} was not a string or list, defaulting to str(name) == {ret_val} being mapped to {name}", logging.WARN) + else: + ret_val = None + Logger.Log(f"Column name mapped to {name} was left null, nothing will be mapped to {name}", logging.WARN) + return ret_val def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[str, int]], concatenator:str, fallback:Any) -> Any: ret_val : Any From 2cb44ff799ac98a7d640ab71bf998cbdb6551cb7 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 26 Nov 2024 23:53:48 -0600 Subject: [PATCH 066/124] Add comment block for _parseElement, along with a reminder to pick a more descriptive name. --- src/ogd/common/schemas/tables/TableSchema.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index ff6d1a6..32086fa 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -167,6 +167,17 @@ def _columnMapMarkdown(self) -> str: @staticmethod def _parseElement(elem:Any, name:str) -> Optional[ColumnMapElement]: + """_summary_ + + TODO : Pick a better name + + :param elem: _description_ + :type elem: Any + :param name: _description_ + :type name: str + :return: _description_ + :rtype: Optional[ColumnMapElement] + """ ret_val : Optional[str | List[str] | Dict[str, str]] if elem is not None: if isinstance(elem, str): From caab91570fff47902c0818ccf293eda5ae41f573 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 27 Nov 2024 00:02:25 -0600 Subject: [PATCH 067/124] Fix up a lot of the FeatureTableSchema code. Still mostly a copy of EventTableSchema. --- .../schemas/tables/FeatureTableSchema.py | 321 +++++++++++------- 1 file changed, 204 insertions(+), 117 deletions(-) diff --git a/src/ogd/common/schemas/tables/FeatureTableSchema.py b/src/ogd/common/schemas/tables/FeatureTableSchema.py index 47d9c7f..a505106 100644 --- a/src/ogd/common/schemas/tables/FeatureTableSchema.py +++ b/src/ogd/common/schemas/tables/FeatureTableSchema.py @@ -1,24 +1,19 @@ """EventTableSchema Module""" # import standard libraries import logging -import re +from collections import Counter from datetime import datetime, timedelta, timezone -from json.decoder import JSONDecodeError -from pathlib import Path -from typing import Any, Dict, Tuple, Optional - -# import 3rd-party libraries -from dateutil import parser +from typing import Any, Dict, List, Tuple, Optional # import local files -from ogd.common import schemas -from ogd.common.schemas.tables.TableSchema import TableSchema +from ogd.common.models.enums.TableType import TableType from ogd.common.models.FeatureData import FeatureData from ogd.common.models.Event import Event, EventSource +from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex, ColumnMapElement +from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.utils.typing import Map -from ogd.common.utils import utils from ogd.common.utils.Logger import Logger -from ogd.common.utils.typing import Map +from ogd.common.utils.typing import Map, conversions ## @class TableSchema class FeatureTableSchema(TableSchema): @@ -32,7 +27,7 @@ class FeatureTableSchema(TableSchema): # *** BUILT-INS & PROPERTIES *** - def __init__(self, schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/"): + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema], other_elements:Optional[Map]): """Constructor for the TableSchema class. Given a database connection and a game data request, this retrieves a bit of information from the database to fill in the @@ -45,153 +40,245 @@ class variables. :param is_legacy: [description], defaults to False :type is_legacy: bool, optional """ - super().__init__(schema_name=schema_name, schema_path=schema_path) + super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns, other_elements=other_elements) @property - def AsMarkdown(self) -> str: - ret_val = "\n\n".join([ - "## Database Columns", - "The individual columns recorded in the database for this game.", - "\n".join([item.AsMarkdown for item in self.Columns]), - "## Feature Object Elements", - "The elements (member variables) of each Feature object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", - self._column_map.AsMarkdown, - ""]) - return ret_val + def SessionIDIndex(self) -> ColumnMapIndex: + return self._column_map['session_id'] + + @property + def AppIDIndex(self) -> ColumnMapIndex: + return self._column_map['app_id'] + + @property + def TimestampIndex(self) -> ColumnMapIndex: + return self._column_map['timestamp'] + + @property + def EventNameIndex(self) -> ColumnMapIndex: + return self._column_map['event_name'] + + @property + def EventDataIndex(self) -> ColumnMapIndex: + return self._column_map['event_data'] + + @property + def EventSourceIndex(self) -> ColumnMapIndex: + return self._column_map['event_source'] + + @property + def AppVersionIndex(self) -> ColumnMapIndex: + return self._column_map['app_version'] + + @property + def AppBranchIndex(self) -> ColumnMapIndex: + return self._column_map['app_branch'] + + @property + def LogVersionIndex(self) -> ColumnMapIndex: + return self._column_map['log_version'] + + @property + def TimeOffsetIndex(self) -> ColumnMapIndex: + return self._column_map['time_offset'] + + @property + def UserIDIndex(self) -> ColumnMapIndex: + return self._column_map['user_id'] + + @property + def UserDataIndex(self) -> ColumnMapIndex: + return self._column_map['user_data'] + + @property + def GameStateIndex(self) -> ColumnMapIndex: + return self._column_map['game_state'] + + @property + def EventSequenceIndexIndex(self) -> ColumnMapIndex: + return self._column_map['event_sequence_index'] @property def SessionIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.SessionID, int): - ret_val = self.ColumnNames[self._column_map.SessionID] - elif isinstance(self._column_map.SessionID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.SessionID]) + if isinstance(self.SessionIDIndex, int): + ret_val = self.ColumnNames[self.SessionIDIndex] + elif isinstance(self.SessionIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.SessionIDIndex]) return ret_val @property def AppIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppID, int): - ret_val = self.ColumnNames[self._column_map.AppID] - elif isinstance(self._column_map.AppID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppID]) + if isinstance(self.AppIDIndex, int): + ret_val = self.ColumnNames[self.AppIDIndex] + elif isinstance(self.AppIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppIDIndex]) return ret_val @property def TimestampColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.Timestamp, int): - ret_val = self.ColumnNames[self._column_map.Timestamp] - elif isinstance(self._column_map.Timestamp, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.Timestamp]) + if isinstance(self.TimestampIndex, int): + ret_val = self.ColumnNames[self.TimestampIndex] + elif isinstance(self.TimestampIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimestampIndex]) return ret_val @property def EventNameColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventName, int): - ret_val = self.ColumnNames[self._column_map.EventName] - elif isinstance(self._column_map.EventName, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventName]) + if isinstance(self.EventNameIndex, int): + ret_val = self.ColumnNames[self.EventNameIndex] + elif isinstance(self.EventNameIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventNameIndex]) return ret_val @property def EventDataColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventData, int): - ret_val = self.ColumnNames[self._column_map.EventData] - elif isinstance(self._column_map.EventData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventData]) + if isinstance(self.EventDataIndex, int): + ret_val = self.ColumnNames[self.EventDataIndex] + elif isinstance(self.EventDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventDataIndex]) return ret_val @property def EventSourceColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventSource, int): - ret_val = self.ColumnNames[self._column_map.EventSource] - elif isinstance(self._column_map.EventSource, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSource]) + if isinstance(self.EventSourceIndex, int): + ret_val = self.ColumnNames[self.EventSourceIndex] + elif isinstance(self.EventSourceIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSourceIndex]) return ret_val @property def AppVersionColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppVersion, int): - ret_val = self.ColumnNames[self._column_map.AppVersion] - elif isinstance(self._column_map.AppVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppVersion]) + if isinstance(self.AppVersionIndex, int): + ret_val = self.ColumnNames[self.AppVersionIndex] + elif isinstance(self.AppVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppVersionIndex]) return ret_val @property def AppBranchColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppBranch, int): - ret_val = self.ColumnNames[self._column_map.AppBranch] - elif isinstance(self._column_map.AppBranch, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppBranch]) + if isinstance(self.AppBranchIndex, int): + ret_val = self.ColumnNames[self.AppBranchIndex] + elif isinstance(self.AppBranchIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppBranchIndex]) return ret_val @property def LogVersionColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.LogVersion, int): - ret_val = self.ColumnNames[self._column_map.LogVersion] - elif isinstance(self._column_map.LogVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.LogVersion]) + if isinstance(self.LogVersionIndex, int): + ret_val = self.ColumnNames[self.LogVersionIndex] + elif isinstance(self.LogVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.LogVersionIndex]) return ret_val @property def TimeOffsetColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.TimeOffset, int): - ret_val = self.ColumnNames[self._column_map.TimeOffset] - elif isinstance(self._column_map.TimeOffset, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.TimeOffset]) + if isinstance(self.TimeOffsetIndex, int): + ret_val = self.ColumnNames[self.TimeOffsetIndex] + elif isinstance(self.TimeOffsetIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimeOffsetIndex]) return ret_val @property def UserIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.UserID, int): - ret_val = self.ColumnNames[self._column_map.UserID] - elif isinstance(self._column_map.UserID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserID]) + if isinstance(self.UserIDIndex, int): + ret_val = self.ColumnNames[self.UserIDIndex] + elif isinstance(self.UserIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserIDIndex]) return ret_val @property def UserDataColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.UserData, int): - ret_val = self.ColumnNames[self._column_map.UserData] - elif isinstance(self._column_map.UserData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserData]) + if isinstance(self.UserDataIndex, int): + ret_val = self.ColumnNames[self.UserDataIndex] + elif isinstance(self.UserDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserDataIndex]) return ret_val @property def GameStateColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.GameState, int): - ret_val = self.ColumnNames[self._column_map.GameState] - elif isinstance(self._column_map.GameState, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.GameState]) + if isinstance(self.GameStateIndex, int): + ret_val = self.ColumnNames[self.GameStateIndex] + elif isinstance(self.GameStateIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.GameStateIndex]) return ret_val @property def EventSequenceIndexColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventSequenceIndex, int): - ret_val = self.ColumnNames[self._column_map.EventSequenceIndex] - elif isinstance(self._column_map.EventSequenceIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSequenceIndex]) + if isinstance(self.EventSequenceIndexIndex, int): + ret_val = self.ColumnNames[self.EventSequenceIndexIndex] + elif isinstance(self.EventSequenceIndexIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSequenceIndexIndex]) return ret_val # *** IMPLEMENT ABSTRACT FUNCTIONS *** + @classmethod + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema], logger:Optional[logging.Logger]=None) -> "TableSchema": + _column_map : Dict[str, ColumnMapIndex] = { + "session_id" : None, + "app_id" : None, + "timestamp" : None, + "event_name" : None, + "event_data" : None, + "event_source" : None, + "app_version" : None, + "app_branch" : None, + "log_version" : None, + "time_offset" : None, + "user_id" : None, + "user_data" : None, + "game_state" : None, + "event_sequence_index" : None + } + + column_names = [elem.Name for elem in column_schemas] + if not isinstance(raw_map, dict): + raw_map = {} + _msg = f"For {name} column map schema, raw_map was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + # for each item in the map above that we expect... + for key in _column_map.keys(): + # if the item was found in the given "column_map" dictionary... + if key in raw_map: + # parse what was mapped to the item. Could get back a string, or a list, or a dict... + element = cls._parseElement(elem=map[key], name=key) + # then if we got a string, we just find it in list of column names + if isinstance(element, str): + _column_map[key] = column_names.index(element) + # but if it's a list, we need to get index of each item in list of column names + elif isinstance(element, list): + _column_map[key] = [column_names.index(listelem) for listelem in element] + # but if it's a dict, we need to make equivalent dict mapping the key (new name) to the index (in list of column names) + elif isinstance(element, dict): + _column_map[key] = {key : column_names.index(listelem) for key,listelem in element.items()} + else: + Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) + _leftovers = { key : val for key,val in raw_map.items() if key not in _column_map.keys() } + + return FeatureTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas, other_elements=_leftovers) + # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** - _conversion_warnings = [] + _conversion_warnings = Counter() def RowToFeatureData(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}) -> FeatureData: """Function to convert a row to an Event, based on the loaded schema. In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. @@ -225,90 +312,90 @@ def RowToFeatureData(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}) # 2) Handle event_data parameter, a special case. # For this case we've got to parse the json, and then fold in whatever other columns were desired. # 3) Assign vals to our arg vars and pass to Event ctor. - sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) + sess_id = self._getValueFromRow(row=row, indices=self.SessionIDIndex, concatenator=concatenator, fallback=fallbacks.get('session_id')) if not isinstance(sess_id, str): if "sess_id" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("sess_id") + Logger.Log(f"{self.Name} {self.TableKind} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["sess_id"] += 1 sess_id = str(sess_id) - app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) + app_id = self._getValueFromRow(row=row, indices=self.AppIDIndex, concatenator=concatenator, fallback=fallbacks.get('app_id')) if not isinstance(app_id, str): if "app_id" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("app_id") + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["app_id"] += 1 app_id = str(app_id) - tstamp = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) + tstamp = self._getValueFromRow(row=row, indices=self.TimestampIndex, concatenator=concatenator, fallback=fallbacks.get('timestamp')) if not isinstance(tstamp, datetime): if "timestamp" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) - FeatureTableSchema._conversion_warnings.append("timestamp") - tstamp = TableSchema._convertDateTime(tstamp) + Logger.Log(f"{self.Name} {self.TableKind} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) + FeatureTableSchema._conversion_warnings["timestamp"] += 1 + tstamp = conversions.DatetimeFromString(tstamp) - ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) + ename = self._getValueFromRow(row=row, indices=self.EventNameIndex, concatenator=concatenator, fallback=fallbacks.get('event_name')) if not isinstance(ename, str): if "ename" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("ename") + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["ename"] += 1 ename = str(ename) - datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) + datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self.EventDataIndex, concatenator=concatenator, fallback=fallbacks.get('event_data')) # TODO: go bac to isostring function; need 0-padding on ms first, though edata = dict(sorted(datas.items())) # Sort keys alphabetically - esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) + esrc = self._getValueFromRow(row=row, indices=self.EventSourceIndex, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) if not isinstance(esrc, EventSource): if "esrc" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) - FeatureTableSchema._conversion_warnings.append("esrc") + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) + FeatureTableSchema._conversion_warnings["esrc"] += 1 esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME - app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) + app_ver = self._getValueFromRow(row=row, indices=self.AppVersionIndex, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) if not isinstance(app_ver, str): if "app_ver" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("app_ver") + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["app_ver"] += 1 app_ver = str(app_ver) - app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) + app_br = self._getValueFromRow(row=row, indices=self.AppBranchIndex, concatenator=concatenator, fallback=fallbacks.get('app_branch')) if not isinstance(app_br, str): if "app_br" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("app_br") + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["app_br"] += 1 app_br = str(app_br) - log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) + log_ver = self._getValueFromRow(row=row, indices=self.LogVersionIndex, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) if not isinstance(log_ver, str): if "log_ver" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("log_ver") + Logger.Log(f"{self.Name} {self.TableKind} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["log_ver"] += 1 log_ver = str(log_ver) - offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) + offset = self._getValueFromRow(row=row, indices=self.TimeOffsetIndex, concatenator=concatenator, fallback=fallbacks.get('time_offset')) if isinstance(offset, timedelta): if "offset" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) - FeatureTableSchema._conversion_warnings.append("offset") + Logger.Log(f"{self.Name} {self.TableKind} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) + FeatureTableSchema._conversion_warnings["offset"] += 1 offset = timezone(offset) - uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) + uid = self._getValueFromRow(row=row, indices=self.UserIDIndex, concatenator=concatenator, fallback=fallbacks.get('user_id')) if uid is not None and not isinstance(uid, str): if "uid" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) - FeatureTableSchema._conversion_warnings.append("uid") + Logger.Log(f"{self.Name} {self.TableKind} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["uid"] += 1 uid = str(uid) - udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) + udata = self._getValueFromRow(row=row, indices=self.UserDataIndex, concatenator=concatenator, fallback=fallbacks.get('user_data')) - state = self._getValueFromRow(row=row, indices=self._column_map.GameState, concatenator=concatenator, fallback=fallbacks.get('game_state')) + state = self._getValueFromRow(row=row, indices=self.GameStateIndex, concatenator=concatenator, fallback=fallbacks.get('game_state')) - index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) + index = self._getValueFromRow(row=row, indices=self.EventSequenceIndexIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) if index is not None and not isinstance(index, int): if "index" not in FeatureTableSchema._conversion_warnings: - Logger.Log(f"{self._table_format_name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) - FeatureTableSchema._conversion_warnings.append("index") + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) + FeatureTableSchema._conversion_warnings["index"] += 1 index = int(index) return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, From 4eb8195c21f5685c963c284631425db43b4d4c97 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 28 Nov 2024 09:28:49 -0600 Subject: [PATCH 068/124] In GameSourceSchema, rename TableSchema prop -> TableSchemaName This is technically a string indicating which to use, so better to call it a name, and leave to another step the finding of the actual schema from that name. --- .../common/connectors/interfaces/Interface.py | 6 +++--- .../schemas/configs/GameSourceSchema.py | 20 ++++++++++--------- .../schemas/config/t_GameSourceSchema.py | 6 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index 80ba710..f351310 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -142,7 +142,7 @@ def GetEventCollection(self, id_filter:IDFilterCollection=IDFilterCollection(), _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict _events = [] if self.IsOpen: - if isinstance(self.GameSourceSchema.TableSchema, EventTableSchema): + if isinstance(self.GameSourceSchema.TableSchemaName, EventTableSchema): # _date_clause = f" on date(s) {date_filter}" _msg = f"Retrieving event data from {self.ResourceName}." Logger.Log(_msg, logging.INFO, depth=3) @@ -158,7 +158,7 @@ def GetFeatureCollection(self, id_filter:IDFilterCollection=IDFilterCollection() _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict _features = [] if self.IsOpen: - if isinstance(self.GameSourceSchema.TableSchema, EventTableSchema): + if isinstance(self.GameSourceSchema.TableSchemaName, EventTableSchema): # _date_clause = f" on date(s) {date_filter}" _msg = f"Retrieving event data from {self.ResourceName}." Logger.Log(_msg, logging.INFO, depth=3) @@ -180,7 +180,7 @@ def _eventsFromRows(self, rows:List[Tuple]) -> List[Event]: _curr_sess : str = "" _evt_sess_index : int = 1 _fallbacks = {"app_id":self._source_schema.GameID} - _table_schema = self.GameSourceSchema.TableSchema + _table_schema = self.GameSourceSchema.TableSchemaName if isinstance(_table_schema, EventTableSchema): for row in rows: try: diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index 18d9383..354ccb2 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -4,6 +4,7 @@ # import local files from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema from ogd.common.schemas.Schema import Schema +from ogd.common.schemas.tables.TableSchema import TableSchema from ogd.common.utils.Logger import Logger class GameSourceSchema(Schema): @@ -31,12 +32,13 @@ def __init__(self, name:str, game_id:Optional[str], source_name:str, source_schema:Optional[DataSourceSchema], db_name:str, table_name:str, table_schema:str, other_elements:Dict[str, Any]): - self._game_id : str - self._source_name : str = source_name - self._source_schema : Optional[DataSourceSchema] = source_schema - self._db_name : str = db_name - self._table_name : str = table_name - self._table_schema : str = table_schema + self._game_id : str + self._source_name : str = source_name + self._source_schema : Optional[DataSourceSchema] = source_schema + self._db_name : str = db_name + self._table_name : str = table_name + self._table_schema_name : str = table_schema + self._table_schema : TableSchema = TableSchema.FromFile(schema_name=self._table_schema_name) if game_id is not None: self._game_id = game_id @@ -66,8 +68,8 @@ def TableName(self) -> str: return self._table_name @property - def TableSchema(self) -> str: - return self._table_schema + def TableSchemaName(self) -> str: + return self._table_schema_name # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -75,7 +77,7 @@ def TableSchema(self) -> str: def AsMarkdown(self) -> str: ret_val : str - ret_val = f"{self.Name}: _{self.TableSchema}_ format, source {self.Source.Name if self.Source else 'None'} : {self.DatabaseName}.{self.TableName}" + ret_val = f"{self.Name}: _{self.TableSchemaName}_ format, source {self.Source.Name if self.Source else 'None'} : {self.DatabaseName}.{self.TableName}" return ret_val @classmethod diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 498a706..57ab84e 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -73,7 +73,7 @@ def test_TableName(self): self.assertEqual(_str, "aqualab_daily") def test_TableSchema(self): - _str = self.test_schema.TableSchema + _str = self.test_schema.TableSchemaName self.assertIsInstance(_str, str) self.assertEqual(_str, "OPENGAMEDATA_BIGQUERY") @@ -118,8 +118,8 @@ def test_FromDict(self): self.assertEqual(_schema.DatabaseName, "aqualab") self.assertIsInstance(_schema.TableName, str) self.assertEqual(_schema.TableName, "aqualab_daily") - self.assertIsInstance(_schema.TableSchema, str) - self.assertEqual(_schema.TableSchema, "OPENGAMEDATA_BIGQUERY") + self.assertIsInstance(_schema.TableSchemaName, str) + self.assertEqual(_schema.TableSchemaName, "OPENGAMEDATA_BIGQUERY") @unittest.skip("Not yet implemented") def test_parseSource(self): From 6b86ac6ec4ccc6fc7e518a0404526a227f19f5ca Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 28 Nov 2024 09:31:38 -0600 Subject: [PATCH 069/124] We're just integrating the logic of ColumnMapSchema back into TableSchema, so no need for the original file anymore. --- .../common/schemas/tables/ColumnMapSchema.py | 205 ------------------ 1 file changed, 205 deletions(-) delete mode 100644 src/ogd/common/schemas/tables/ColumnMapSchema.py diff --git a/src/ogd/common/schemas/tables/ColumnMapSchema.py b/src/ogd/common/schemas/tables/ColumnMapSchema.py deleted file mode 100644 index 5e8f1a2..0000000 --- a/src/ogd/common/schemas/tables/ColumnMapSchema.py +++ /dev/null @@ -1,205 +0,0 @@ -# import standard libraries -import logging -from typing import Any, Dict, List, Optional, TypeAlias -# import local files -from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.tables.ColumnSchema import ColumnSchema -from ogd.common.utils.Logger import Logger - -class ColumnMapSchema(Schema): - ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] - - # *** BUILT-INS & PROPERTIES *** - - def __init__(self, name:str, map:Dict[str, ColumnMapIndex], column_names:List[str], other_elements:Dict[str, Any]={}): - self._map : Dict[str, ColumnMapSchema.ColumnMapIndex] = map - self._column_names : List[str] = column_names - - super().__init__(name=name, other_elements=other_elements) - - @property - def Map(self) -> Dict[str, ColumnMapIndex]: - """Mapping from Event element names to the indices of the database columns mapped to them. - There may be a single index, indicating a 1-to-1 mapping of a database column to the element; - There may be a list of indices, indicating multiple columns will be concatenated to form the element value; - There may be a further mapping of keys to indicies, indicating multiple columns will be joined into a JSON object, with keys mapped to values found at the columns with given indices. - - :return: The dictionary mapping of element names to indices. - :rtype: Dict[str, Union[int, List[int], Dict[str, int], None]] - """ - return self._map - - @property - def SessionID(self) -> ColumnMapIndex: - return self._map['session_id'] - - @property - def AppID(self) -> ColumnMapIndex: - return self._map['app_id'] - - @property - def Timestamp(self) -> ColumnMapIndex: - return self._map['timestamp'] - - @property - def EventName(self) -> ColumnMapIndex: - return self._map['event_name'] - - @property - def EventData(self) -> ColumnMapIndex: - return self._map['event_data'] - - @property - def EventSource(self) -> ColumnMapIndex: - return self._map['event_source'] - - @property - def AppVersion(self) -> ColumnMapIndex: - return self._map['app_version'] - - @property - def AppBranch(self) -> ColumnMapIndex: - return self._map['app_branch'] - - @property - def LogVersion(self) -> ColumnMapIndex: - return self._map['log_version'] - - @property - def TimeOffset(self) -> ColumnMapIndex: - return self._map['time_offset'] - - @property - def UserID(self) -> ColumnMapIndex: - return self._map['user_id'] - - @property - def UserData(self) -> ColumnMapIndex: - return self._map['user_data'] - - @property - def GameState(self) -> ColumnMapIndex: - return self._map['game_state'] - - @property - def EventSequenceIndex(self) -> ColumnMapIndex: - return self._map['event_sequence_index'] - - @property - def Elements(self) -> Dict[str, str]: - return self._other_elements - - @property - def ElementNames(self) -> List[str]: - return list(self._other_elements.keys()) - - # *** IMPLEMENT ABSTRACT FUNCTIONS *** - - @property - def AsMarkdown(self) -> str: - ret_val : str - - event_column_list = [] - for evt_col,row_col in self._map.items(): - if row_col is not None: - if isinstance(row_col, str): - event_column_list.append(f"**{evt_col}** = Column '*{row_col}*' ") - elif isinstance(row_col, list): - mapped_list = ", ".join([f"'*{item}*'" for item in row_col]) - event_column_list.append(f"**{evt_col}** = Columns {mapped_list} ") # figure out how to do one string foreach item in list. - elif isinstance(row_col, int): - event_column_list.append(f"**{evt_col}** = Column '*{self._column_names[row_col]}*' (index {row_col}) ") - else: - event_column_list.append(f"**{evt_col}** = Column '*{row_col}*' (DEBUG: Type {type(row_col)}) ") - else: - event_column_list.append(f"**{evt_col}** = null ") - ret_val = "\n".join(event_column_list) - return ret_val - - @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], logger:Optional[logging.Logger]=None)-> "ColumnMapSchema": - """Function to generate a ColumnMapSchema from a JSON object - - TODO : find a way around using column_names as a direct parameter. - - :param name: _description_ - :type name: str - :param all_elements: _description_ - :type all_elements: Dict[str, Any] - :param column_names: _description_ - :type column_names: List[str] - :param logger: _description_, defaults to None - :type logger: Optional[logging.Logger], optional - :return: _description_ - :rtype: ColumnMapSchema - """ - _map : Dict[str, ColumnMapSchema.ColumnMapIndex] = { - "session_id" : None, - "app_id" : None, - "timestamp" : None, - "event_name" : None, - "event_data" : None, - "event_source" : None, - "app_version" : None, - "app_branch" : None, - "log_version" : None, - "time_offset" : None, - "user_id" : None, - "user_data" : None, - "game_state" : None, - "event_sequence_index" : None - } - - if not isinstance(all_elements, dict): - all_elements = {} - _msg = f"For {name} column map schema, all_elements was not a dict, defaulting to empty dict" - if logger: - logger.warning(_msg) - else: - Logger.Log(_msg, logging.WARN) - # for each item in the map above that we expect... - for key in _map.keys(): - # if the item was found in the given "column_map" dictionary... - if key in all_elements: - # parse what was mapped to the item. Could get back a string, or a list, or a dict... - element = cls._parseElement(elem=map[key], name=key) - # then if we got a string, we just find it in list of column names - if isinstance(element, str): - _map[key] = column_names.index(element) - # but if it's a list, we need to get index of each item in list of column names - elif isinstance(element, list): - _map[key] = [column_names.index(listelem) for listelem in element] - # but if it's a dict, we need to make equivalent dict mapping the key (new name) to the index (in list of column names) - elif isinstance(element, dict): - _map[key] = {key : column_names.index(listelem) for key,listelem in element.items()} - else: - Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) - _leftovers = { key : val for key,val in all_elements.items() if key not in _map.keys() } - - return ColumnMapSchema(name=name, map=_map, column_names=column_names, other_elements=_leftovers) - - # *** PUBLIC STATICS *** - - # *** PUBLIC METHODS *** - - # *** PRIVATE STATICS *** - - @staticmethod - def _parseElement(elem:Any, name:str) -> Optional[str | List[str] | Dict[str, str]]: - ret_val : Optional[str | List[str] | Dict[str, str]] - if elem is not None: - if isinstance(elem, str): - ret_val = elem - elif isinstance(elem, list): - ret_val = elem - elif isinstance(elem, dict): - ret_val = elem - else: - ret_val = str(elem) - Logger.Log(f"Column name(s) mapped to {name} was not a string or list, defaulting to str(name) == {ret_val} being mapped to {name}", logging.WARN) - else: - ret_val = None - Logger.Log(f"Column name mapped to {name} was left null, nothing will be mapped to {name}", logging.WARN) - return ret_val - - # *** PRIVATE METHODS *** From 7c2ccaa4c96833b5b08fca8079b55776cb796fee Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Thu, 28 Nov 2024 09:52:01 -0600 Subject: [PATCH 070/124] Some very preliminary tweaks to MySQLInterface, working towards the 'refactor' part of this project. --- .../connectors/interfaces/MySQLInterface.py | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index 20e4075..a175ea0 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -6,6 +6,7 @@ from datetime import datetime from typing import Dict, Final, List, Tuple, Optional # import locals +from ogd.common.connectors.filters.collections import * from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema @@ -222,11 +223,11 @@ class MySQLInterface(Interface): # *** BUILT-INS & PROPERTIES *** - def __init__(self, game_id:str, config:GameSourceSchema, fail_fast:bool): + def __init__(self, schema:GameSourceSchema, fail_fast:bool): self._tunnel : Optional[sshtunnel.SSHTunnelForwarder] = None self._db : Optional[connection.MySQLConnection] = None self._db_cursor : Optional[cursor.MySQLCursor] = None - super().__init__(game_id=game_id, config=config, fail_fast=fail_fast) + super().__init__(schema=schema, fail_fast=fail_fast) self.Open() # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -237,8 +238,8 @@ def _open(self, force_reopen:bool = False) -> bool: self.Open(force_reopen=False) if not self._is_open: start = datetime.now() - if isinstance(self._config.Source, MySQLSchema): - self._tunnel, self._db = SQL.ConnectDB(schema=self._config) + if isinstance(self.GameSourceSchema.Source, MySQLSchema): + self._tunnel, self._db = SQL.ConnectDB(schema=self.GameSourceSchema) if self._db is not None: self._db_cursor = self._getCursor() self._is_open = True @@ -250,7 +251,7 @@ def _open(self, force_reopen:bool = False) -> bool: SQL.disconnectMySQL(tunnel=self._tunnel, db=self._db) return False else: - Logger.Log(f"Unable to open MySQL interface, the schema has invalid type {type(self._config)}", logging.ERROR) + Logger.Log(f"Unable to open MySQL interface, the game source schema has invalid type {type(self.GameSourceSchema)}", logging.ERROR) SQL.disconnectMySQL(tunnel=self._tunnel, db=self._db) return False else: @@ -262,12 +263,12 @@ def _close(self) -> bool: self._is_open = False return True - def _allIDs(self) -> List[str]: - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[str]: + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName - sess_id_col : str = self._TableSchema.SessionIDColumn or "session_id" + sess_id_col : str = self.GameSourceSchema.TableSchemaName.SessionIDColumn or "session_id" filters : List[str] = [] params : List[str] = [] @@ -286,9 +287,9 @@ def _allIDs(self) -> List[str]: def _fullDateRange(self) -> Dict[str,datetime]: ret_val = {'min':datetime.now(), 'max':datetime.now()} - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName # prep filter strings filters = [] @@ -311,10 +312,10 @@ def _fullDateRange(self) -> Dict[str,datetime]: def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None, exclude_rows:Optional[List[str]]=None) -> List[Tuple]: ret_val = [] # grab data for the given session range. Sort by event time, so - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): # filt = f"app_id='{self._game_id}' AND (session_id BETWEEN '{next_slice[0]}' AND '{next_slice[-1]}'){ver_filter}" - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName sess_id_col = self._TableSchema.SessionIDColumn or 'session_id' play_id_col = self._TableSchema.UserIDColumn or 'player_id' @@ -355,10 +356,10 @@ def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, version def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> List[str]: ret_val = [] - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): # alias long setting names. - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName # prep filter strings filters = [] @@ -387,10 +388,10 @@ def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]] def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Dict[str, datetime]: ret_val = {'min':datetime.now(), 'max':datetime.now()} - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): # alias long setting names. - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName # prep filter strings filters = [] From b7f99b7e56af34580659077101c5be30a644b474 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Fri, 29 Nov 2024 23:08:15 -0600 Subject: [PATCH 071/124] Fix for name of parameter to GameSourceSchema class. --- tests/cases/interfaces/t_CSVInterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cases/interfaces/t_CSVInterface.py b/tests/cases/interfaces/t_CSVInterface.py index 25155f6..d9905cb 100644 --- a/tests/cases/interfaces/t_CSVInterface.py +++ b/tests/cases/interfaces/t_CSVInterface.py @@ -64,7 +64,7 @@ def test_IDsFromDates(self): def test_DatesFromIDs(self): with self.zipped_file.open(self.zipped_file.namelist()[0]) as f: - _cfg = GameSourceSchema(name="FILE SOURCE", all_elements={"SCHEMA":"OGD_EVENT_FILE", "DB_TYPE":"FILE"}, data_sources={}) + _cfg = GameSourceSchema(name="FILE SOURCE", other_elements={"SCHEMA":"OGD_EVENT_FILE", "DB_TYPE":"FILE"}, data_sources={}) CSVI = CSVInterface(game_id='BACTERIA', config=_cfg, filepath=f, delim='\t', fail_fast=False) if CSVI.Open(): dates = CSVI.DatesFromIDs(self.TEST_SESSION_LIST) From e8d59ddfdebbb56bfb47436f5f37a0799523d703 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 30 Nov 2024 01:32:21 -0600 Subject: [PATCH 072/124] Fix GameSourceSchema needing to parse out game ID. --- .../schemas/configs/GameSourceSchema.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index 6674226..d945111 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -11,6 +11,7 @@ class GameSourceSchema(Schema): + _DEFAULT_GAME_ID = "UNKNOWN GAME" _DEFAULT_SOURCE_NAME = "OPENGAMEDATA_BQ" _DEFAULT_DB_NAME = "UNKNOWN GAME" _DEFAULT_TABLE_NAME = "_daily" @@ -91,6 +92,7 @@ def AsMarkdown(self) -> str: def Default(cls) -> "GameSourceSchema": return GameSourceSchema( name="DefaultGameSourceSchema", + game_id=cls._DEFAULT_GAME_ID, source_name=cls._DEFAULT_SOURCE_NAME, source_schema=BigQuerySchema.Default(), db_name=cls._DEFAULT_DB_NAME, @@ -127,6 +129,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging logger.warning(_msg) else: Logger.Log(_msg, logging.WARN) + _game_id = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["game", "game_id"], + parser_function=cls._parseGameID, + default_value=name + ) _source_name = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["source"], parser_function=cls._parseSource, @@ -156,7 +163,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"source", "database", "table", "schema"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return GameSourceSchema(name=name, source_name=_source_name, source_schema=_source_schema, + return GameSourceSchema(name=name, game_id=_game_id, source_name=_source_name, source_schema=_source_schema, db_name=_db_name, table_name=_table_name, table_schema=_table_schema, other_elements=_leftovers) @@ -164,7 +171,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @staticmethod def EmptySchema() -> "GameSourceSchema": - return GameSourceSchema(name="NOT FOUND", source_name="NOT FOUND", source_schema=None, db_name="NOT FOUND", + return GameSourceSchema(name="NOT FOUND", game_id="NOT FOUND", source_name="NOT FOUND", source_schema=None, db_name="NOT FOUND", table_name="NOT FOUND", table_schema="NOT FOUND", other_elements={}) # *** PUBLIC METHODS *** @@ -181,6 +188,16 @@ def _parseSource(source) -> str: Logger.Log(f"Game Source source name was unexpected type {type(source)}, defaulting to str(source)={ret_val}.", logging.WARN) return ret_val + @staticmethod + def _parseGameID(game_id) -> str: + ret_val : str + if isinstance(game_id, str): + ret_val = game_id + else: + ret_val = str(game_id) + Logger.Log(f"Game Source app ID was unexpected type {type(game_id)}, defaulting to str(game_id)={ret_val}.", logging.WARN) + return ret_val + @staticmethod def _parseDBName(db_name) -> str: ret_val : str From da465bd147647a2bc12856243aedf8bf0d757740 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 30 Nov 2024 01:34:46 -0600 Subject: [PATCH 073/124] Fix import paths, which were presumably overwritten by merged changes or something. --- src/ogd/common/schemas/configs/GameSourceSchema.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index d945111..bd5e9e2 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -2,8 +2,8 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema -from ogd.common.schemas.configs.data_sources.BigQuerySourceSchema import BigQuerySchema +from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema from ogd.common.schemas.Schema import Schema from ogd.common.schemas.tables.TableSchema import TableSchema from ogd.common.utils.Logger import Logger @@ -33,6 +33,8 @@ class GameSourceSchema(Schema): - `TableName` : The neame of the specific table within the database holding the given game's data - `TableSchema` : A schema indicating the structure of the table containing the given game's data. + TODO : Implement and use a smart Load(...) function of TableSchema to load schema from given name, rather than FromFile. + :param Schema: _description_ :type Schema: _type_ """ From e1ab2c6119f66f0fcd26fbe72aed1a88983ce922 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 30 Nov 2024 01:37:21 -0600 Subject: [PATCH 074/124] Resolve type error message in GameSourceSchema testbed. --- tests/cases/schemas/config/t_GameSourceSchema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 57ab84e..117818e 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -106,7 +106,7 @@ def test_FromDict(self): "PROJECT_ID" : "aqualab-project", "PROJECT_KEY": "./key.txt" } - _sources = { "AQUALAB_BQ" : BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } + _sources : Dict[str, DataSourceSchema] = { "AQUALAB_BQ" : BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } _schema = GameSourceSchema.FromDict(name="AQUALAB", all_elements=_dict, logger=None, data_sources=_sources) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "AQUALAB") From 45a6dc82b4f8ea617d9dea8233f02c820fa0bc6f Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 30 Nov 2024 01:38:31 -0600 Subject: [PATCH 075/124] Add property for accessing the TableSchema of GameSourceSchema. --- src/ogd/common/schemas/configs/GameSourceSchema.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/schemas/configs/GameSourceSchema.py index bd5e9e2..9df800d 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/schemas/configs/GameSourceSchema.py @@ -77,6 +77,10 @@ def DatabaseName(self) -> str: def TableName(self) -> str: return self._table_name + @property + def TableSchema(self) -> TableSchema: + return self._table_schema + @property def TableSchemaName(self) -> str: return self._table_schema_name From edab15f0a7fae9ab22c698a70adcde941569888a Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 30 Nov 2024 01:49:57 -0600 Subject: [PATCH 076/124] Further WIP changes to get this up to new Interface function set. --- .../connectors/interfaces/MySQLInterface.py | 170 +++++++++--------- 1 file changed, 90 insertions(+), 80 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index a175ea0..2cb2fa2 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -9,6 +9,7 @@ from ogd.common.connectors.filters.collections import * from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode +from ogd.common.models.enums.VersionType import VersionType from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.storage.MySQLSourceSchema import MySQLSchema from ogd.common.utils.Logger import Logger @@ -268,13 +269,19 @@ def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version _db_name : str = self.GameSourceSchema.DatabaseName _table_name : str = self.GameSourceSchema.TableName - sess_id_col : str = self.GameSourceSchema.TableSchemaName.SessionIDColumn or "session_id" + sess_id_col : str = self.GameSourceSchema.TableSchema.SessionIDColumn or "session_id" filters : List[str] = [] params : List[str] = [] - if _table_name != self._game_id: + # 1. If we're in shared table, then need to filter on game ID + if _table_name != self.GameSourceSchema.GameID: filters.append(f"`app_id`=%s") - params.append(self._game_id) + params.append(self.GameSourceSchema.GameID) + # 2. Sort out filters from date_filter + + # 3. Sort out filters from version_filter + + # 4. Combine filters & execute filter_clause = " AND ".join(filters) data = SQL.SELECT(cursor =self._db_cursor, db_name=_db_name, table =_table_name, @@ -285,7 +292,39 @@ def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version Logger.Log(f"Could not get list of all session ids, MySQL connection is not open.", logging.WARN) return [] - def _fullDateRange(self) -> Dict[str,datetime]: + # def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> List[str]: + # ret_val = [] + # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + # # alias long setting names. + # _db_name : str = self.GameSourceSchema.DatabaseName + # _table_name : str = self.GameSourceSchema.TableName + + # # prep filter strings + # filters = [] + # params = [] + # if _table_name != self._game_id: + # filters.append(f"`app_id`=%s") + # params.append(self._game_id) + # # if versions is not None and versions is not []: + # # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") + # filters.append(f"`{self._TableSchema.EventSequenceIndexColumn}`='0'") + # filters.append(f"(`server_time` BETWEEN '{min.isoformat()}' AND '{max.isoformat()}')") + # filter_clause = " AND ".join(filters) + + # # run query + # # We grab the ids for all sessions that have 0th move in the proper date range. + # sess_id_col = self._TableSchema.SessionIDColumn or "`session_id`" + # sess_ids_raw = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, + # columns=[sess_id_col], filter=filter_clause, + # sort_columns=[sess_id_col], sort_direction="ASC", distinct=True, + # params=tuple(params)) + # if sess_ids_raw is not None: + # ret_val = [str(sess[0]) for sess in sess_ids_raw] + # else: + # Logger.Log(f"Could not get session list for {min.isoformat()}-{max.isoformat()} range, MySQL connection is not open or config was not for MySQL.", logging.WARN) + # return ret_val + + def _availableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Dict[str,datetime]: ret_val = {'min':datetime.now(), 'max':datetime.now()} if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): _db_name : str = self.GameSourceSchema.DatabaseName @@ -294,9 +333,9 @@ def _fullDateRange(self) -> Dict[str,datetime]: # prep filter strings filters = [] params = [] - if _table_name != self._game_id: + if _table_name != self.GameSourceSchema.GameID: filters.append(f"`app_id`=%s") - params.append(self._game_id) + params.append(self.GameSourceSchema.GameID) filter_clause = " AND ".join(filters) # run query @@ -309,7 +348,45 @@ def _fullDateRange(self) -> Dict[str,datetime]: Logger.Log(f"Could not get full date range, MySQL connection is not open or config was not for MySQL.", logging.WARN) return ret_val - def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None, exclude_rows:Optional[List[str]]=None) -> List[Tuple]: + # def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Dict[str, datetime]: + # ret_val = {'min':datetime.now(), 'max':datetime.now()} + # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + # # alias long setting names. + # _db_name : str = self.GameSourceSchema.DatabaseName + # _table_name : str = self.GameSourceSchema.TableName + + # # prep filter strings + # filters = [] + # params = tuple() + # if _table_name != self._game_id: + # filters.append(f"`app_id`=%s") + # params = tuple(self._game_id) + # # if versions is not None and versions is not []: + # # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") + # ids_string = ','.join([f"'{x}'" for x in id_list]) + # if id_mode == IDMode.SESSION: + # sess_id_col = self._TableSchema.SessionIDColumn or "session_id" + # filters.append(f"{sess_id_col} IN ({ids_string})") + # elif id_mode == IDMode.USER: + # play_id_col = self._TableSchema.UserIDColumn or "player_id" + # filters.append(f"`{play_id_col}` IN ({ids_string})") + # else: + # raise ValueError("Invalid IDMode in MySQLInterface!") + # filter_clause = " AND ".join(filters) + # # run query + # result = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, + # columns=['MIN(server_time)', 'MAX(server_time)'], filter=filter_clause, + # params=params) + # if result is not None: + # ret_val = {'min':result[0][0], 'max':result[0][1]} + # else: + # Logger.Log(f"Could not get date range for {len(id_list)} sessions, MySQL connection is not open.", logging.WARN) + # return ret_val + + def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: + return [] + + def _getEventRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: ret_val = [] # grab data for the given session range. Sort by event time, so if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): @@ -317,16 +394,16 @@ def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, version _db_name : str = self.GameSourceSchema.DatabaseName _table_name : str = self.GameSourceSchema.TableName - sess_id_col = self._TableSchema.SessionIDColumn or 'session_id' - play_id_col = self._TableSchema.UserIDColumn or 'player_id' - seq_idx_col = self._TableSchema.EventSequenceIndexColumn or 'session_n' - evt_nam_col = self._TableSchema.EventNameColumn or "event_name" + sess_id_col = self.GameSourceSchema.TableSchema.SessionIDColumn or 'session_id' + play_id_col = self.GameSourceSchema.TableSchema.UserIDColumn or 'player_id' + seq_idx_col = self.GameSourceSchema.TableSchema.EventSequenceIndexColumn or 'session_n' + evt_nam_col = self.GameSourceSchema.TableSchema.EventNameColumn or "event_name" filters = [] params = [] - if _table_name != self._game_id: + if _table_name != self.GameSourceSchema.GameID: filters.append(f"`app_id`=%s") - params.append(self._game_id) + params.append(self.GameSourceSchema.GameID) # if versions is not None and versions is not []: # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") id_param_string = ",".join( [f"%s"]*len(id_list) ) @@ -354,73 +431,6 @@ def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, version Logger.Log(f"Could not get data for {len(id_list)} sessions, MySQL connection is not open or config was not for MySQL.", logging.WARN) return ret_val - def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> List[str]: - ret_val = [] - if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): - # alias long setting names. - _db_name : str = self.GameSourceSchema.DatabaseName - _table_name : str = self.GameSourceSchema.TableName - - # prep filter strings - filters = [] - params = [] - if _table_name != self._game_id: - filters.append(f"`app_id`=%s") - params.append(self._game_id) - # if versions is not None and versions is not []: - # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") - filters.append(f"`{self._TableSchema.EventSequenceIndexColumn}`='0'") - filters.append(f"(`server_time` BETWEEN '{min.isoformat()}' AND '{max.isoformat()}')") - filter_clause = " AND ".join(filters) - - # run query - # We grab the ids for all sessions that have 0th move in the proper date range. - sess_id_col = self._TableSchema.SessionIDColumn or "`session_id`" - sess_ids_raw = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, - columns=[sess_id_col], filter=filter_clause, - sort_columns=[sess_id_col], sort_direction="ASC", distinct=True, - params=tuple(params)) - if sess_ids_raw is not None: - ret_val = [str(sess[0]) for sess in sess_ids_raw] - else: - Logger.Log(f"Could not get session list for {min.isoformat()}-{max.isoformat()} range, MySQL connection is not open or config was not for MySQL.", logging.WARN) - return ret_val - - def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Dict[str, datetime]: - ret_val = {'min':datetime.now(), 'max':datetime.now()} - if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): - # alias long setting names. - _db_name : str = self.GameSourceSchema.DatabaseName - _table_name : str = self.GameSourceSchema.TableName - - # prep filter strings - filters = [] - params = tuple() - if _table_name != self._game_id: - filters.append(f"`app_id`=%s") - params = tuple(self._game_id) - # if versions is not None and versions is not []: - # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") - ids_string = ','.join([f"'{x}'" for x in id_list]) - if id_mode == IDMode.SESSION: - sess_id_col = self._TableSchema.SessionIDColumn or "session_id" - filters.append(f"{sess_id_col} IN ({ids_string})") - elif id_mode == IDMode.USER: - play_id_col = self._TableSchema.UserIDColumn or "player_id" - filters.append(f"`{play_id_col}` IN ({ids_string})") - else: - raise ValueError("Invalid IDMode in MySQLInterface!") - filter_clause = " AND ".join(filters) - # run query - result = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, - columns=['MIN(server_time)', 'MAX(server_time)'], filter=filter_clause, - params=params) - if result is not None: - ret_val = {'min':result[0][0], 'max':result[0][1]} - else: - Logger.Log(f"Could not get date range for {len(id_list)} sessions, MySQL connection is not open.", logging.WARN) - return ret_val - # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** From d7fa2f5639b040e5f2683255a5adbcf75d120c8c Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sat, 30 Nov 2024 11:08:31 -0600 Subject: [PATCH 077/124] Start outlining a class for getting SQL from filters. --- src/ogd/common/connectors/interfaces/MySQLInterface.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index 2cb2fa2..5360e25 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -6,6 +6,7 @@ from datetime import datetime from typing import Dict, Final, List, Tuple, Optional # import locals +from ogd.common.connectors.filters import * from ogd.common.connectors.filters.collections import * from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode @@ -220,6 +221,13 @@ def Query(cursor:cursor.MySQLCursor, query:str, params:Optional[Tuple], fetch_re Logger.Log(f"Query fetch completed, total query time: {time_delta} to get {len(result) if result is not None else 0:d} rows", logging.DEBUG) return result +class MySQLFilters: + @staticmethod + def FilterToMySQL(filter:Filter): + if isinstance(filter, NoFilter): + return + elif isinstance(filter, MinFilter) + class MySQLInterface(Interface): # *** BUILT-INS & PROPERTIES *** From d6a7a653e16e5b4c82e485dcb15093791505f45f Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 4 Dec 2024 17:05:16 -0600 Subject: [PATCH 078/124] Move common XIndex and XColumn props up to TableSchema base class. --- .../common/schemas/tables/EventTableSchema.py | 78 ------------------- .../schemas/tables/FeatureTableSchema.py | 78 ------------------- src/ogd/common/schemas/tables/TableSchema.py | 78 +++++++++++++++++++ 3 files changed, 78 insertions(+), 156 deletions(-) diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py index 999d131..326084e 100644 --- a/src/ogd/common/schemas/tables/EventTableSchema.py +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -38,14 +38,6 @@ class variables. """ super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns, other_elements=other_elements) - @property - def SessionIDIndex(self) -> ColumnMapIndex: - return self._column_map['session_id'] - - @property - def AppIDIndex(self) -> ColumnMapIndex: - return self._column_map['app_id'] - @property def TimestampIndex(self) -> ColumnMapIndex: return self._column_map['timestamp'] @@ -62,26 +54,10 @@ def EventDataIndex(self) -> ColumnMapIndex: def EventSourceIndex(self) -> ColumnMapIndex: return self._column_map['event_source'] - @property - def AppVersionIndex(self) -> ColumnMapIndex: - return self._column_map['app_version'] - - @property - def AppBranchIndex(self) -> ColumnMapIndex: - return self._column_map['app_branch'] - - @property - def LogVersionIndex(self) -> ColumnMapIndex: - return self._column_map['log_version'] - @property def TimeOffsetIndex(self) -> ColumnMapIndex: return self._column_map['time_offset'] - @property - def UserIDIndex(self) -> ColumnMapIndex: - return self._column_map['user_id'] - @property def UserDataIndex(self) -> ColumnMapIndex: return self._column_map['user_data'] @@ -94,24 +70,6 @@ def GameStateIndex(self) -> ColumnMapIndex: def EventSequenceIndexIndex(self) -> ColumnMapIndex: return self._column_map['event_sequence_index'] - @property - def SessionIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.SessionIDIndex, int): - ret_val = self.ColumnNames[self.SessionIDIndex] - elif isinstance(self.SessionIDIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.SessionIDIndex]) - return ret_val - - @property - def AppIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.AppIDIndex, int): - ret_val = self.ColumnNames[self.AppIDIndex] - elif isinstance(self.AppIDIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppIDIndex]) - return ret_val - @property def TimestampColumn(self) -> Optional[str]: ret_val = None @@ -148,33 +106,6 @@ def EventSourceColumn(self) -> Optional[str]: ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSourceIndex]) return ret_val - @property - def AppVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.AppVersionIndex, int): - ret_val = self.ColumnNames[self.AppVersionIndex] - elif isinstance(self.AppVersionIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppVersionIndex]) - return ret_val - - @property - def AppBranchColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.AppBranchIndex, int): - ret_val = self.ColumnNames[self.AppBranchIndex] - elif isinstance(self.AppBranchIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppBranchIndex]) - return ret_val - - @property - def LogVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.LogVersionIndex, int): - ret_val = self.ColumnNames[self.LogVersionIndex] - elif isinstance(self.LogVersionIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.LogVersionIndex]) - return ret_val - @property def TimeOffsetColumn(self) -> Optional[str]: ret_val = None @@ -184,15 +115,6 @@ def TimeOffsetColumn(self) -> Optional[str]: ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimeOffsetIndex]) return ret_val - @property - def UserIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.UserIDIndex, int): - ret_val = self.ColumnNames[self.UserIDIndex] - elif isinstance(self.UserIDIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserIDIndex]) - return ret_val - @property def UserDataColumn(self) -> Optional[str]: ret_val = None diff --git a/src/ogd/common/schemas/tables/FeatureTableSchema.py b/src/ogd/common/schemas/tables/FeatureTableSchema.py index d94d13b..b57a676 100644 --- a/src/ogd/common/schemas/tables/FeatureTableSchema.py +++ b/src/ogd/common/schemas/tables/FeatureTableSchema.py @@ -42,14 +42,6 @@ class variables. """ super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns, other_elements=other_elements) - @property - def SessionIDIndex(self) -> ColumnMapIndex: - return self._column_map['session_id'] - - @property - def AppIDIndex(self) -> ColumnMapIndex: - return self._column_map['app_id'] - @property def TimestampIndex(self) -> ColumnMapIndex: return self._column_map['timestamp'] @@ -66,26 +58,10 @@ def EventDataIndex(self) -> ColumnMapIndex: def EventSourceIndex(self) -> ColumnMapIndex: return self._column_map['event_source'] - @property - def AppVersionIndex(self) -> ColumnMapIndex: - return self._column_map['app_version'] - - @property - def AppBranchIndex(self) -> ColumnMapIndex: - return self._column_map['app_branch'] - - @property - def LogVersionIndex(self) -> ColumnMapIndex: - return self._column_map['log_version'] - @property def TimeOffsetIndex(self) -> ColumnMapIndex: return self._column_map['time_offset'] - @property - def UserIDIndex(self) -> ColumnMapIndex: - return self._column_map['user_id'] - @property def UserDataIndex(self) -> ColumnMapIndex: return self._column_map['user_data'] @@ -98,24 +74,6 @@ def GameStateIndex(self) -> ColumnMapIndex: def EventSequenceIndexIndex(self) -> ColumnMapIndex: return self._column_map['event_sequence_index'] - @property - def SessionIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.SessionIDIndex, int): - ret_val = self.ColumnNames[self.SessionIDIndex] - elif isinstance(self.SessionIDIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.SessionIDIndex]) - return ret_val - - @property - def AppIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.AppIDIndex, int): - ret_val = self.ColumnNames[self.AppIDIndex] - elif isinstance(self.AppIDIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppIDIndex]) - return ret_val - @property def TimestampColumn(self) -> Optional[str]: ret_val = None @@ -152,33 +110,6 @@ def EventSourceColumn(self) -> Optional[str]: ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSourceIndex]) return ret_val - @property - def AppVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.AppVersionIndex, int): - ret_val = self.ColumnNames[self.AppVersionIndex] - elif isinstance(self.AppVersionIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppVersionIndex]) - return ret_val - - @property - def AppBranchColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.AppBranchIndex, int): - ret_val = self.ColumnNames[self.AppBranchIndex] - elif isinstance(self.AppBranchIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppBranchIndex]) - return ret_val - - @property - def LogVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.LogVersionIndex, int): - ret_val = self.ColumnNames[self.LogVersionIndex] - elif isinstance(self.LogVersionIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.LogVersionIndex]) - return ret_val - @property def TimeOffsetColumn(self) -> Optional[str]: ret_val = None @@ -188,15 +119,6 @@ def TimeOffsetColumn(self) -> Optional[str]: ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimeOffsetIndex]) return ret_val - @property - def UserIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self.UserIDIndex, int): - ret_val = self.ColumnNames[self.UserIDIndex] - elif isinstance(self.UserIDIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserIDIndex]) - return ret_val - @property def UserDataColumn(self) -> Optional[str]: ret_val = None diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index ee769ba..a9a60a3 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -92,6 +92,84 @@ def ColumnMap(self) -> Dict[str, ColumnMapIndex]: """ return self._column_map + @property + def AppIDIndex(self) -> ColumnMapIndex: + return self._column_map['app_id'] + + @property + def AppIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.AppIDIndex, int): + ret_val = self.ColumnNames[self.AppIDIndex] + elif isinstance(self.AppIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppIDIndex]) + return ret_val + + @property + def UserIDIndex(self) -> ColumnMapIndex: + return self._column_map['user_id'] + + @property + def UserIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.UserIDIndex, int): + ret_val = self.ColumnNames[self.UserIDIndex] + elif isinstance(self.UserIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserIDIndex]) + return ret_val + + @property + def SessionIDIndex(self) -> ColumnMapIndex: + return self._column_map['session_id'] + + @property + def SessionIDColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.SessionIDIndex, int): + ret_val = self.ColumnNames[self.SessionIDIndex] + elif isinstance(self.SessionIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.SessionIDIndex]) + return ret_val + + @property + def AppVersionIndex(self) -> ColumnMapIndex: + return self._column_map['app_version'] + + @property + def AppVersionColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.AppVersionIndex, int): + ret_val = self.ColumnNames[self.AppVersionIndex] + elif isinstance(self.AppVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppVersionIndex]) + return ret_val + + @property + def AppBranchIndex(self) -> ColumnMapIndex: + return self._column_map['app_branch'] + + @property + def AppBranchColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.AppBranchIndex, int): + ret_val = self.ColumnNames[self.AppBranchIndex] + elif isinstance(self.AppBranchIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppBranchIndex]) + return ret_val + + @property + def LogVersionIndex(self) -> ColumnMapIndex: + return self._column_map['log_version'] + + @property + def LogVersionColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.LogVersionIndex, int): + ret_val = self.ColumnNames[self.LogVersionIndex] + elif isinstance(self.LogVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.LogVersionIndex]) + return ret_val + # *** IMPLEMENT ABSTRACT FUNCTIONS *** @property From 7e3ad087090c9a6bc22ae3fdacc3a3932590d386 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 4 Dec 2024 22:27:27 -0600 Subject: [PATCH 079/124] Don't specify a type for contents of the params Tuple. --- src/ogd/common/connectors/interfaces/MySQLInterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index 5360e25..06c1e1a 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -164,7 +164,7 @@ def SELECT(cursor :cursor.MySQLCursor, db_name : str, columns :List[str] = [], filter : Optional[str] = None, sort_columns :Optional[List[str]] = None, sort_direction : str = "ASC", grouping : Optional[str] = None, distinct :bool = False, offset : int = 0, limit : int = -1, - fetch_results :bool = True, params : Tuple[str] = tuple()) -> Optional[List[Tuple]]: + fetch_results :bool = True, params : Tuple = tuple()) -> Optional[List[Tuple]]: """Function to build and execute SELECT statements on a database connection. :param cursor: A database cursor, retrieved from the active connection. From 828ecd6f2e0cc0c7abc248df4279dc5956e1197b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 4 Dec 2024 23:55:00 -0600 Subject: [PATCH 080/124] Finish implementation of filter converter function --- .../connectors/interfaces/MySQLInterface.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index 06c1e1a..cb5b42f 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -9,6 +9,7 @@ from ogd.common.connectors.filters import * from ogd.common.connectors.filters.collections import * from ogd.common.connectors.interfaces.Interface import Interface +from ogd.common.models.enums.FilterMode import FilterMode from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.VersionType import VersionType from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema @@ -223,10 +224,18 @@ def Query(cursor:cursor.MySQLCursor, query:str, params:Optional[Tuple], fetch_re class MySQLFilters: @staticmethod - def FilterToMySQL(filter:Filter): + def FilterToMySQL(filter:Filter, column_name:str): if isinstance(filter, NoFilter): - return - elif isinstance(filter, MinFilter) + return "" + elif isinstance(filter, MinFilter): + return f"{column_name} > {filter.Min}" if filter.FilterMode == FilterMode.INCLUDE else f"{column_name} < {filter.Min}" + elif isinstance(filter, MaxFilter): + return f"{column_name} < {filter.Max}" if filter.FilterMode == FilterMode.INCLUDE else f"{column_name} > {filter.Max}" + elif isinstance(filter, MinMaxFilter): + return f"{filter.Min} < {column_name} AND {column_name} < {filter.Max}" if filter.FilterMode == FilterMode.INCLUDE else f"{filter.Min} > {column_name} AND {column_name} > {filter.Max}" + elif isinstance(filter, SetFilter): + set_str = ','.join(filter.Set) + return f"{column_name} IN ({set_str})" if filter.FilterMode == FilterMode.INCLUDE else f"{column_name} NOT IN ({set_str})" class MySQLInterface(Interface): From f9227e0b960c8bbd72dca52eda6c97ec3562bc66 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sun, 8 Dec 2024 23:09:41 -0600 Subject: [PATCH 081/124] Start converting old ColumnMapSchema over to newer ElementMappingSchema approach. --- ...mnMapSchema.py => ElementMappingSchema.py} | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) rename src/ogd/common/schemas/tables/{ColumnMapSchema.py => ElementMappingSchema.py} (87%) diff --git a/src/ogd/common/schemas/tables/ColumnMapSchema.py b/src/ogd/common/schemas/tables/ElementMappingSchema.py similarity index 87% rename from src/ogd/common/schemas/tables/ColumnMapSchema.py rename to src/ogd/common/schemas/tables/ElementMappingSchema.py index 2ae1fb3..8a9ef00 100644 --- a/src/ogd/common/schemas/tables/ColumnMapSchema.py +++ b/src/ogd/common/schemas/tables/ElementMappingSchema.py @@ -6,7 +6,26 @@ from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class ColumnMapSchema(Schema): +class ElementMappingSchema(Schema): + """Simple struct-like class to define a mapping of one or more data table columns to a single GameData element. + + For example, the following JSON-style mapping definition for the EventData element of an Event: + ```json + "event_data" : { "item1":"someColumn", "item2":"someOtherColumn" } + ``` + would result in an ElementMappingSchema with name "EventData", mapping type "DICT" and mapping definition like: + ```python + { + "item1" : , + "item2" : + } + ``` + + :param Schema: _description_ + :type Schema: _type_ + :return: _description_ + :rtype: _type_ + """ ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] _DEFAULT_MAP = {} @@ -15,7 +34,7 @@ class ColumnMapSchema(Schema): # *** BUILT-INS & PROPERTIES *** def __init__(self, name:str, map:Dict[str, ColumnMapIndex], column_names:List[str], other_elements:Optional[Map]=None): - self._map : Dict[str, ColumnMapSchema.ColumnMapIndex] = map + self._map : Dict[str, ElementMappingSchema.ColumnMapIndex] = map self._column_names : List[str] = column_names super().__init__(name=name, other_elements=other_elements) @@ -120,7 +139,7 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], logger:Optional[logging.Logger]=None)-> "ColumnMapSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], logger:Optional[logging.Logger]=None)-> "ElementMappingSchema": """Function to generate a ColumnMapSchema from a JSON object TODO : find a way around using column_names as a direct parameter. @@ -136,7 +155,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], :return: _description_ :rtype: ColumnMapSchema """ - _map : Dict[str, ColumnMapSchema.ColumnMapIndex] = { + _map : Dict[str, ElementMappingSchema.ColumnMapIndex] = { "session_id" : None, "app_id" : None, "timestamp" : None, @@ -179,11 +198,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) _leftovers = { key : val for key,val in all_elements.items() if key not in _map.keys() } - return ColumnMapSchema(name=name, map=_map, column_names=column_names, other_elements=_leftovers) + return ElementMappingSchema(name=name, map=_map, column_names=column_names, other_elements=_leftovers) @classmethod - def Default(cls) -> "ColumnMapSchema": - return ColumnMapSchema( + def Default(cls) -> "ElementMappingSchema": + return ElementMappingSchema( name="DefaultColumnMapSchema", map=cls._DEFAULT_MAP, column_names=cls._DEFAULT_COLUMN_NAMES, From 5ee1444734504d0a8a8fc4180d49b4be468eb5f3 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Sun, 8 Dec 2024 23:10:12 -0600 Subject: [PATCH 082/124] Add an enum for type of ElementMapping, i.e. single, list or dict. --- .../common/models/enums/ElementMappingType.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/ogd/common/models/enums/ElementMappingType.py diff --git a/src/ogd/common/models/enums/ElementMappingType.py b/src/ogd/common/models/enums/ElementMappingType.py new file mode 100644 index 0000000..e548280 --- /dev/null +++ b/src/ogd/common/models/enums/ElementMappingType.py @@ -0,0 +1,33 @@ +"""ElementMappingType Module +""" + +# import standard libraries +from enum import IntEnum + +class ElementMappingType(IntEnum): + """Enum representing the different kinds of column-element mappings in TableSchemas. + + Namely: + + * Dictionary + * List + * Single + """ + SINGLE = 1 + LIST = 2 + DICT = 3 + + def __str__(self): + return self.name + + @classmethod + def FromString(cls, string:str) -> "ElementMappingType": + match string.upper(): + case "SINGLE": + return cls.SINGLE + case "LIST": + return cls.LIST + case "DICT" | "DICTIONARY": + return cls.DICT + case _: + raise ValueError(f"Unrecognized element mapping type {string}!") From 73a5b921ab3143909782818c1fc7087ae9a17bcb Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 22:19:35 -0600 Subject: [PATCH 083/124] Move configs subfolder of schemas out to be own folder. --- src/ogd/common/{schemas => }/configs/GameSourceSchema.py | 0 src/ogd/common/{schemas => }/configs/IndexingSchema.py | 0 src/ogd/common/{schemas => }/configs/TestConfigSchema.py | 0 src/ogd/common/{schemas => }/configs/__init__.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename src/ogd/common/{schemas => }/configs/GameSourceSchema.py (100%) rename src/ogd/common/{schemas => }/configs/IndexingSchema.py (100%) rename src/ogd/common/{schemas => }/configs/TestConfigSchema.py (100%) rename src/ogd/common/{schemas => }/configs/__init__.py (100%) diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/configs/GameSourceSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/GameSourceSchema.py rename to src/ogd/common/configs/GameSourceSchema.py diff --git a/src/ogd/common/schemas/configs/IndexingSchema.py b/src/ogd/common/configs/IndexingSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/IndexingSchema.py rename to src/ogd/common/configs/IndexingSchema.py diff --git a/src/ogd/common/schemas/configs/TestConfigSchema.py b/src/ogd/common/configs/TestConfigSchema.py similarity index 100% rename from src/ogd/common/schemas/configs/TestConfigSchema.py rename to src/ogd/common/configs/TestConfigSchema.py diff --git a/src/ogd/common/schemas/configs/__init__.py b/src/ogd/common/configs/__init__.py similarity index 100% rename from src/ogd/common/schemas/configs/__init__.py rename to src/ogd/common/configs/__init__.py From f90ac8f927473f3ace146dd1f8ce37df7da6688e Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:33:21 -0600 Subject: [PATCH 084/124] Move scemas/storage to configs, and rename DataSourceSchema -> DataStoreSchema. --- .../common/{schemas => configs}/storage/BigQuerySourceSchema.py | 0 src/ogd/common/{schemas => configs}/storage/CredentialSchema.py | 0 .../DataSourceSchema.py => configs/storage/DataStoreSchema.py} | 0 src/ogd/common/{schemas => configs}/storage/FileSourceSchema.py | 0 src/ogd/common/{schemas => configs}/storage/MySQLSourceSchema.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename src/ogd/common/{schemas => configs}/storage/BigQuerySourceSchema.py (100%) rename src/ogd/common/{schemas => configs}/storage/CredentialSchema.py (100%) rename src/ogd/common/{schemas/storage/DataSourceSchema.py => configs/storage/DataStoreSchema.py} (100%) rename src/ogd/common/{schemas => configs}/storage/FileSourceSchema.py (100%) rename src/ogd/common/{schemas => configs}/storage/MySQLSourceSchema.py (100%) diff --git a/src/ogd/common/schemas/storage/BigQuerySourceSchema.py b/src/ogd/common/configs/storage/BigQuerySourceSchema.py similarity index 100% rename from src/ogd/common/schemas/storage/BigQuerySourceSchema.py rename to src/ogd/common/configs/storage/BigQuerySourceSchema.py diff --git a/src/ogd/common/schemas/storage/CredentialSchema.py b/src/ogd/common/configs/storage/CredentialSchema.py similarity index 100% rename from src/ogd/common/schemas/storage/CredentialSchema.py rename to src/ogd/common/configs/storage/CredentialSchema.py diff --git a/src/ogd/common/schemas/storage/DataSourceSchema.py b/src/ogd/common/configs/storage/DataStoreSchema.py similarity index 100% rename from src/ogd/common/schemas/storage/DataSourceSchema.py rename to src/ogd/common/configs/storage/DataStoreSchema.py diff --git a/src/ogd/common/schemas/storage/FileSourceSchema.py b/src/ogd/common/configs/storage/FileSourceSchema.py similarity index 100% rename from src/ogd/common/schemas/storage/FileSourceSchema.py rename to src/ogd/common/configs/storage/FileSourceSchema.py diff --git a/src/ogd/common/schemas/storage/MySQLSourceSchema.py b/src/ogd/common/configs/storage/MySQLSourceSchema.py similarity index 100% rename from src/ogd/common/schemas/storage/MySQLSourceSchema.py rename to src/ogd/common/configs/storage/MySQLSourceSchema.py From 19fc955716e927a69d8c2d20e461faeb5ec26afd Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:34:59 -0600 Subject: [PATCH 085/124] Move feature-related schemas to configs, since they exist mostly for config. --- src/ogd/common/{schemas => configs}/games/AggregateSchema.py | 0 src/ogd/common/{schemas => configs}/games/DetectorMapSchema.py | 0 src/ogd/common/{schemas => configs}/games/DetectorSchema.py | 0 src/ogd/common/{schemas => configs}/games/FeatureMapSchema.py | 0 src/ogd/common/{schemas => configs}/games/FeatureSchema.py | 0 src/ogd/common/{schemas => configs}/games/GeneratorSchema.py | 0 src/ogd/common/{schemas => configs}/games/PerCountSchema.py | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename src/ogd/common/{schemas => configs}/games/AggregateSchema.py (100%) rename src/ogd/common/{schemas => configs}/games/DetectorMapSchema.py (100%) rename src/ogd/common/{schemas => configs}/games/DetectorSchema.py (100%) rename src/ogd/common/{schemas => configs}/games/FeatureMapSchema.py (100%) rename src/ogd/common/{schemas => configs}/games/FeatureSchema.py (100%) rename src/ogd/common/{schemas => configs}/games/GeneratorSchema.py (100%) rename src/ogd/common/{schemas => configs}/games/PerCountSchema.py (100%) diff --git a/src/ogd/common/schemas/games/AggregateSchema.py b/src/ogd/common/configs/games/AggregateSchema.py similarity index 100% rename from src/ogd/common/schemas/games/AggregateSchema.py rename to src/ogd/common/configs/games/AggregateSchema.py diff --git a/src/ogd/common/schemas/games/DetectorMapSchema.py b/src/ogd/common/configs/games/DetectorMapSchema.py similarity index 100% rename from src/ogd/common/schemas/games/DetectorMapSchema.py rename to src/ogd/common/configs/games/DetectorMapSchema.py diff --git a/src/ogd/common/schemas/games/DetectorSchema.py b/src/ogd/common/configs/games/DetectorSchema.py similarity index 100% rename from src/ogd/common/schemas/games/DetectorSchema.py rename to src/ogd/common/configs/games/DetectorSchema.py diff --git a/src/ogd/common/schemas/games/FeatureMapSchema.py b/src/ogd/common/configs/games/FeatureMapSchema.py similarity index 100% rename from src/ogd/common/schemas/games/FeatureMapSchema.py rename to src/ogd/common/configs/games/FeatureMapSchema.py diff --git a/src/ogd/common/schemas/games/FeatureSchema.py b/src/ogd/common/configs/games/FeatureSchema.py similarity index 100% rename from src/ogd/common/schemas/games/FeatureSchema.py rename to src/ogd/common/configs/games/FeatureSchema.py diff --git a/src/ogd/common/schemas/games/GeneratorSchema.py b/src/ogd/common/configs/games/GeneratorSchema.py similarity index 100% rename from src/ogd/common/schemas/games/GeneratorSchema.py rename to src/ogd/common/configs/games/GeneratorSchema.py diff --git a/src/ogd/common/schemas/games/PerCountSchema.py b/src/ogd/common/configs/games/PerCountSchema.py similarity index 100% rename from src/ogd/common/schemas/games/PerCountSchema.py rename to src/ogd/common/configs/games/PerCountSchema.py From b20bc4054f011e418951feb848184cd1e3b672fa Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:55:09 -0600 Subject: [PATCH 086/124] Add a stupidly thin wrapper around Schema, named Config, as base for the other config classes. --- src/ogd/common/configs/Config.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/ogd/common/configs/Config.py diff --git a/src/ogd/common/configs/Config.py b/src/ogd/common/configs/Config.py new file mode 100644 index 0000000..e274957 --- /dev/null +++ b/src/ogd/common/configs/Config.py @@ -0,0 +1,13 @@ +"""Config Class Module +""" +## import standard libraries +from typing import Any, Dict +# import local files +from ogd.common.schemas.Schema import Schema + +class Config(Schema): + """Thin layer over Schema base class to act as a base for all our Config-type classes. + """ + + def __init__(self, name: str, other_elements: Dict[str, Any] | None = None): + super().__init__(name, other_elements) \ No newline at end of file From 43404653a0100a22f264f8ffca765b04b8634925 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:56:08 -0600 Subject: [PATCH 087/124] Rename TestConfigSchema -> TestConfig. --- src/ogd/common/configs/{TestConfigSchema.py => TestConfig.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/ogd/common/configs/{TestConfigSchema.py => TestConfig.py} (100%) diff --git a/src/ogd/common/configs/TestConfigSchema.py b/src/ogd/common/configs/TestConfig.py similarity index 100% rename from src/ogd/common/configs/TestConfigSchema.py rename to src/ogd/common/configs/TestConfig.py From e83e3e4c388497a197d6bb18aebd1b5a6591589c Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:56:30 -0600 Subject: [PATCH 088/124] Missed changes in last commit. --- src/ogd/common/configs/TestConfig.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ogd/common/configs/TestConfig.py b/src/ogd/common/configs/TestConfig.py index 516bc27..62f6a46 100644 --- a/src/ogd/common/configs/TestConfig.py +++ b/src/ogd/common/configs/TestConfig.py @@ -13,13 +13,13 @@ # import 3rd-party libraries # import OGD libraries -from ogd.common.schemas.Schema import Schema +from ogd.common.configs.Config import Config from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map # import local files -class TestConfigSchema(Schema): +class TestConfig(Config): _DEFAULT_VERBOSE = False _DEFAULT_ENABLED_TESTS = {} @@ -48,8 +48,8 @@ def AsMarkdown(self) -> str: # *** IMPLEMENT ABSTRACT FUNCTIONS *** @classmethod - def Default(cls) -> "TestConfigSchema": - return TestConfigSchema( + def Default(cls) -> "TestConfig": + return TestConfig( name = "DefaultTestConfig", verbose = cls._DEFAULT_VERBOSE, enabled_tests = cls._DEFAULT_ENABLED_TESTS @@ -58,7 +58,7 @@ def Default(cls) -> "TestConfigSchema": # *** PUBLIC STATICS *** @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TestConfigSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TestConfig": _verbose : bool _enabled_tests : Dict[str, bool] @@ -82,7 +82,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"VERBOSE", "ENABLED"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return TestConfigSchema(name=name, verbose=_verbose, enabled_tests=_enabled_tests, other_elements=_leftovers) + return TestConfig(name=name, verbose=_verbose, enabled_tests=_enabled_tests, other_elements=_leftovers) # *** PUBLIC METHODS *** @@ -114,7 +114,7 @@ def _parseEnabledTests(enabled, logger:Optional[logging.Logger]=None) -> Dict[st if isinstance(enabled, dict): ret_val = { str(key) : bool(val) for key, val in enabled.items() } else: - ret_val = TestConfigSchema.Default().EnabledTests + ret_val = TestConfig.Default().EnabledTests _msg = f"Config 'enabled tests' setting was unexpected type {type(enabled)}, defaulting to class default = {ret_val}." if logger: logger.warn(_msg, logging.WARN) From 3f41bdb2e5b70d99c3bcfc2691f8d70a7fea6c48 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:57:34 -0600 Subject: [PATCH 089/124] Rename IndexingSchema -> IndexingConfig. --- .../{IndexingSchema.py => IndexingConfig.py} | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename src/ogd/common/configs/{IndexingSchema.py => IndexingConfig.py} (89%) diff --git a/src/ogd/common/configs/IndexingSchema.py b/src/ogd/common/configs/IndexingConfig.py similarity index 89% rename from src/ogd/common/configs/IndexingSchema.py rename to src/ogd/common/configs/IndexingConfig.py index 3c50224..c6b1131 100644 --- a/src/ogd/common/configs/IndexingSchema.py +++ b/src/ogd/common/configs/IndexingConfig.py @@ -3,11 +3,11 @@ from pathlib import Path from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.Schema import Schema +from ogd.common.configs.Config import Config from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class FileIndexingSchema(Schema): +class FileIndexingConfig(Config): _DEFAULT_LOCAL_DIR = Path("./data/") _DEFAULT_REMOTE_URL = "https://fieldday-web.ad.education.wisc.edu/opengamedata/" _DEFAULT_TEMPLATE_URL = "https://github.com/opengamedata/opengamedata-samples" @@ -35,8 +35,8 @@ def TemplatesURL(self) -> str: # *** IMPLEMENT ABSTRACT FUNCTIONS *** @classmethod - def Default(cls) -> "FileIndexingSchema": - return FileIndexingSchema( + def Default(cls) -> "FileIndexingConfig": + return FileIndexingConfig( name = "DefaultFileIndexingSchema", local_dir = cls._DEFAULT_LOCAL_DIR, remote_url = cls._DEFAULT_REMOTE_URL, @@ -45,7 +45,7 @@ def Default(cls) -> "FileIndexingSchema": ) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileIndexingSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileIndexingConfig": _local_dir : Path _remote_url : Optional[str] _templates_url : str @@ -60,22 +60,22 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _local_dir = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["LOCAL_DIR"], parser_function=cls._parseLocalDir, - default_value=FileIndexingSchema._DEFAULT_LOCAL_DIR + default_value=FileIndexingConfig._DEFAULT_LOCAL_DIR ) _remote_url = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["REMOTE_URL"], parser_function=cls._parseRemoteURL, - default_value=FileIndexingSchema._DEFAULT_REMOTE_URL + default_value=FileIndexingConfig._DEFAULT_REMOTE_URL ) _templates_url = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["TEMPLATES_URL"], parser_function=cls._parseTemplatesURL, - default_value=FileIndexingSchema._DEFAULT_TEMPLATE_URL + default_value=FileIndexingConfig._DEFAULT_TEMPLATE_URL ) _used = {"LOCAL_DIR", "REMOTE_URL", "TEMPLATES_URL"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return FileIndexingSchema(name=name, local_dir=_local_dir, remote_url=_remote_url, templates_url=_templates_url, other_elements=_leftovers) + return FileIndexingConfig(name=name, local_dir=_local_dir, remote_url=_remote_url, templates_url=_templates_url, other_elements=_leftovers) @property From 3df0459e0265ad6a1d0ab3c95ef0cd8eb655bbc9 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Mon, 16 Dec 2024 23:59:41 -0600 Subject: [PATCH 090/124] Rename DataStoreSchema -> DataStoreConfig. --- .../storage/{DataStoreSchema.py => DataStoreConfig.py} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename src/ogd/common/configs/storage/{DataStoreSchema.py => DataStoreConfig.py} (88%) diff --git a/src/ogd/common/configs/storage/DataStoreSchema.py b/src/ogd/common/configs/storage/DataStoreConfig.py similarity index 88% rename from src/ogd/common/configs/storage/DataStoreSchema.py rename to src/ogd/common/configs/storage/DataStoreConfig.py index 372cc0f..cea4524 100644 --- a/src/ogd/common/configs/storage/DataStoreSchema.py +++ b/src/ogd/common/configs/storage/DataStoreConfig.py @@ -4,12 +4,12 @@ from pathlib import Path from typing import Any, Dict # , overload # import local files -from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.storage.CredentialSchema import CredentialSchema +from ogd.common.configs.Config import Config +from ogd.common.configs.storage.CredentialSchema import CredentialSchema from ogd.common.utils.Logger import Logger -class DataSourceSchema(Schema): +class DataStoreConfig(Config): """Dumb struct to contain data pertaining to a data source, which a StorageConnector can connect to. Every source has: @@ -28,10 +28,10 @@ def __init__(self, name:str, other_elements:Dict[str, Any] | Any): Logger.Log(f"For {name} Data Source config, other_elements was not a dict, defaulting to empty dict", logging.WARN) # 2. Parse standard elements, with legacy elements nested under "else" case. if "SOURCE_TYPE" in other_elements.keys(): - self._source_type = DataSourceSchema._parseSourceType(other_elements["SOURCE_TYPE"]) + self._source_type = DataStoreConfig._parseSourceType(other_elements["SOURCE_TYPE"]) else: if "DB_TYPE" in other_elements.keys(): - self._source_type = DataSourceSchema._parseSourceType(other_elements["DB_TYPE"]) + self._source_type = DataStoreConfig._parseSourceType(other_elements["DB_TYPE"]) else: self._source_type = "UNKNOWN" Logger.Log(f"{name} config does not have a 'SOURCE_TYPE' element; defaulting to db_name={self._source_type}", logging.WARN) From 1696cafb168ea36f258cd050716ba8ea5a9abc3e Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 00:01:09 -0600 Subject: [PATCH 091/124] Move CredentialSchema to credentials folder, and rename to CredentialConfig. --- .../{CredentialSchema.py => credentials/CredentialConfig.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/ogd/common/configs/storage/{CredentialSchema.py => credentials/CredentialConfig.py} (92%) diff --git a/src/ogd/common/configs/storage/CredentialSchema.py b/src/ogd/common/configs/storage/credentials/CredentialConfig.py similarity index 92% rename from src/ogd/common/configs/storage/CredentialSchema.py rename to src/ogd/common/configs/storage/credentials/CredentialConfig.py index 9ed4211..7aea633 100644 --- a/src/ogd/common/configs/storage/CredentialSchema.py +++ b/src/ogd/common/configs/storage/credentials/CredentialConfig.py @@ -1,7 +1,7 @@ # import standard libraries from typing import Any, Dict # , overload # import local files -from ogd.common.schemas.Schema import Schema +from ogd.common.configs.Config import Config class CredentialSchema(Schema): From 16fe01c286ec3b9198e7197fdb9e1bd7912c28a7 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 00:02:29 -0600 Subject: [PATCH 092/124] Missed changes in last commit. --- .../common/configs/storage/credentials/CredentialConfig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/configs/storage/credentials/CredentialConfig.py b/src/ogd/common/configs/storage/credentials/CredentialConfig.py index 7aea633..ded8297 100644 --- a/src/ogd/common/configs/storage/credentials/CredentialConfig.py +++ b/src/ogd/common/configs/storage/credentials/CredentialConfig.py @@ -4,10 +4,10 @@ from ogd.common.configs.Config import Config -class CredentialSchema(Schema): +class CredentialConfig(Config): """Dumb struct to contain data pertaining to credentials for accessing a data source. - In general, a credential can have a key, or a user-password combination + In general, a credential can have a key, or a user-password combination. """ # @overload # def __init__(self, name:str, other_elements:Dict[str, Any]): ... From 87886988252f07b76c40424bff4686309cc9d30b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 00:17:54 -0600 Subject: [PATCH 093/124] Rename unparsed_elements -> other_elements, to match convention. --- .../common/configs/storage/credentials/CredentialConfig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ogd/common/configs/storage/credentials/CredentialConfig.py b/src/ogd/common/configs/storage/credentials/CredentialConfig.py index ded8297..487aafa 100644 --- a/src/ogd/common/configs/storage/credentials/CredentialConfig.py +++ b/src/ogd/common/configs/storage/credentials/CredentialConfig.py @@ -12,5 +12,5 @@ class CredentialConfig(Config): # @overload # def __init__(self, name:str, other_elements:Dict[str, Any]): ... - def __init__(self, name:str, unparsed_elements:Dict[str, Any] | Any): - super().__init__(name=name, other_elements=unparsed_elements) + def __init__(self, name:str, other_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=other_elements) From 33fc6cb17afb508ef902934b1ec77c8f66c0c9ab Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 00:18:11 -0600 Subject: [PATCH 094/124] Create a config class for storing password credential. --- .../credentials/PasswordCredentialConfig.py | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py diff --git a/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py b/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py new file mode 100644 index 0000000..1e34b8a --- /dev/null +++ b/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py @@ -0,0 +1,101 @@ +# import standard libraries +import logging +from typing import Any, Dict, Optional +# import local files +from ogd.common.configs.storage.credentials.CredentialConfig import CredentialConfig +from ogd.common.utils.Logger import Logger + + +class PasswordCredential(CredentialConfig): + """Dumb struct to contain data pertaining to credentials for accessing a data source. + + In general, a credential can have a key, or a user-password combination. + """ + _DEFAULT_USER = "DEFAULT USER" + _DEFAULT_PASS = None + + def __init__(self, name:str, username:str, password:Optional[str], other_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=other_elements) + self._user = username + self._pass = password + + @property + def User(self) -> Optional[str]: + return self._user + + @property + def Pass(self) -> Optional[str]: + return self._pass + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + @property + def AsMarkdown(self) -> str: + ret_val : str + + ret_val = f"User : `{self.User}`\nPass: `****`" + return ret_val + + @classmethod + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "PasswordCredential": + _user : Optional[str] + _pass : Optional[str] + + if not isinstance(all_elements, dict): + all_elements = {} + _msg = f"For {name} password credential config, all_elements was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + _user = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["USER"], + parser_function=cls._parseUser, + default_value=cls._DEFAULT_USER + ) + _pass = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["PASS"], + parser_function=cls._parsePass, + default_value=cls._DEFAULT_PASS + ) + + _used = {"USER", "PASS"} + _leftovers = { key : val for key,val in all_elements.items() if key not in _used } + return PasswordCredential(name=name, username=_user, password=_pass, other_elements=_leftovers) + + @classmethod + def Default(cls) -> "PasswordCredential": + return PasswordCredential( + name="DefaultPasswordCredential", + username=cls._DEFAULT_USER, + password=cls._DEFAULT_PASS, + other_elements={} + ) + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + # *** PRIVATE STATICS *** + + @staticmethod + def _parseUser(user) -> Optional[str]: + ret_val : Optional[str] + if isinstance(user, str): + ret_val = user + else: + ret_val = str(user) + Logger.Log(f"SSH config for user was unexpected type {type(user)}, defaulting to str(user)={ret_val}.", logging.WARN) + return ret_val + + @staticmethod + def _parsePass(pw) -> Optional[str]: + ret_val : Optional[str] + if isinstance(pw, str): + ret_val = pw + else: + ret_val = str(pw) + Logger.Log(f"SSH config for password was unexpected type {type(pw)}, defaulting to str(pw)=***.", logging.WARN) + return ret_val + + # *** PRIVATE METHODS *** From 37353f32645f45fccd293a469d505ca15f87279f Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 09:39:04 -0600 Subject: [PATCH 095/124] Add a class for handling key credentials. --- .../credentials/KeyCredentialConfig.py | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py diff --git a/src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py b/src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py new file mode 100644 index 0000000..663dcb4 --- /dev/null +++ b/src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py @@ -0,0 +1,125 @@ +# import standard libraries +import logging +import os +from pathlib import Path +from typing import Any, Dict, Optional +# import local files +from ogd.common.configs.storage.credentials.CredentialConfig import CredentialConfig +from ogd.common.utils.Logger import Logger + + +class KeyCredential(CredentialConfig): + """Dumb struct to contain data pertaining to loading a key credential + """ + _DEFAULT_PATH = "./" + _DEFAULT_FILE = "key.txt" + + def __init__(self, name:str, filename:str, path:Path | str, other_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=other_elements) + if isinstance(path, str): + path = Path(path) + self._path : Path = path + self._file : str = filename + + @property + def File(self) -> str: + return self._file + + @property + def Folder(self) -> Path: + """The path to the folder containing the key credential file. + + :return: The path to the folder containing the key credential file. + :rtype: Path + """ + return self._path + + @property + def Filepath(self) -> Path: + """The full path to the key credential file. + + :return: The full path to the key credential file. + :rtype: Path + """ + return self.Folder / self.File + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + @property + def AsMarkdown(self) -> str: + ret_val : str + + ret_val = f"Key: {self.Filepath}" + return ret_val + + @classmethod + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "KeyCredential": + _file : Optional[str] + _path : Optional[Path] + + if not isinstance(all_elements, dict): + all_elements = {} + _msg = f"For {name} key credential config, all_elements was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + _file = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["FILE", "KEY"], + parser_function=cls._parseFile, + default_value=cls._DEFAULT_FILE + ) + _path = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["PATH"], + parser_function=cls._parsePath, + default_value=cls._DEFAULT_PATH + ) + # if we didn't find a PATH, but the FILE has a '/' in it, + # we should be able to get file separate from path. + if _path is None and _file is not None and "/" in _file: + _full_path = Path(_file) + _path = _full_path.parent + _file = _full_path.name + + _used = {"FILE", "KEY", "PATH"} + _leftovers = { key : val for key,val in all_elements.items() if key not in _used } + return KeyCredential(name=name, filename=_file, path=_path, other_elements=_leftovers) + + @classmethod + def Default(cls) -> "KeyCredential": + return KeyCredential( + name="DefaultKeyCredential", + filename=cls._DEFAULT_FILE, + path=cls._DEFAULT_PATH, + other_elements={} + ) + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + # *** PRIVATE STATICS *** + + @staticmethod + def _parseFile(file) -> str: + ret_val : Optional[str] + if isinstance(file, str): + ret_val = file + else: + ret_val = str(file) + Logger.Log(f"Filename for key credential was unexpected type {type(file)}, defaulting to str(file)={ret_val}.", logging.WARN) + return ret_val + + @staticmethod + def _parsePath(folder) -> Path: + ret_val : Path + if isinstance(folder, Path): + ret_val = folder + if isinstance(folder, str): + ret_val = Path(folder) + else: + ret_val = Path(str(folder)) + Logger.Log(f"Folder for key credential was unexpected type {type(folder)}, defaulting to Path(str(folder))={ret_val}.", logging.WARN) + return ret_val + + # *** PRIVATE METHODS *** From 7271648b0a6adc751645c572dc654fb12ef04522 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 09:51:55 -0600 Subject: [PATCH 096/124] Fix import paths for DataStoreConfig. --- src/ogd/common/configs/GameSourceSchema.py | 2 +- ...QuerySourceSchema.py => BigQueryConfig.py} | 2 +- .../configs/storage/FileSourceSchema.py | 2 +- .../configs/storage/MySQLSourceSchema.py | 24 +--- .../credentials/PasswordCredentialConfig.py | 4 +- .../schemas/tables/ElementMappingSchema.py | 103 ++++-------------- .../schemas/config/t_GameSourceSchema.py | 2 +- 7 files changed, 28 insertions(+), 111 deletions(-) rename src/ogd/common/configs/storage/{BigQuerySourceSchema.py => BigQueryConfig.py} (97%) diff --git a/src/ogd/common/configs/GameSourceSchema.py b/src/ogd/common/configs/GameSourceSchema.py index 9df800d..464b60b 100644 --- a/src/ogd/common/configs/GameSourceSchema.py +++ b/src/ogd/common/configs/GameSourceSchema.py @@ -2,7 +2,7 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema from ogd.common.schemas.Schema import Schema from ogd.common.schemas.tables.TableSchema import TableSchema diff --git a/src/ogd/common/configs/storage/BigQuerySourceSchema.py b/src/ogd/common/configs/storage/BigQueryConfig.py similarity index 97% rename from src/ogd/common/configs/storage/BigQuerySourceSchema.py rename to src/ogd/common/configs/storage/BigQueryConfig.py index ec083a9..3845c39 100644 --- a/src/ogd/common/configs/storage/BigQuerySourceSchema.py +++ b/src/ogd/common/configs/storage/BigQueryConfig.py @@ -3,7 +3,7 @@ import logging from typing import Any, Dict, Optional, Type # import local files -from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger class BigQuerySchema(DataSourceSchema): diff --git a/src/ogd/common/configs/storage/FileSourceSchema.py b/src/ogd/common/configs/storage/FileSourceSchema.py index a17e46d..8718f3b 100644 --- a/src/ogd/common/configs/storage/FileSourceSchema.py +++ b/src/ogd/common/configs/storage/FileSourceSchema.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Optional from pathlib import Path # import local files -from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger class FileSourceSchema(DataSourceSchema): diff --git a/src/ogd/common/configs/storage/MySQLSourceSchema.py b/src/ogd/common/configs/storage/MySQLSourceSchema.py index 8808dc9..15c8663 100644 --- a/src/ogd/common/configs/storage/MySQLSourceSchema.py +++ b/src/ogd/common/configs/storage/MySQLSourceSchema.py @@ -3,13 +3,11 @@ from typing import Any, Dict, Optional, Type # import local files from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger class SSHSchema(Schema): _DEFAULT_HOST = "127.0.0.1" - _DEFAULT_USER = "DEFAULT USER" - _DEFAULT_PASS = None _DEFAULT_PORT = 22 # *** BUILT-INS & PROPERTIES *** @@ -119,26 +117,6 @@ def _parseHost(host) -> Optional[str]: Logger.Log(f"SSH config for host was unexpected type {type(host)}, defaulting to str(host)={ret_val}.", logging.WARN) return ret_val - @staticmethod - def _parseUser(user) -> Optional[str]: - ret_val : Optional[str] - if isinstance(user, str): - ret_val = user - else: - ret_val = str(user) - Logger.Log(f"SSH config for user was unexpected type {type(user)}, defaulting to str(user)={ret_val}.", logging.WARN) - return ret_val - - @staticmethod - def _parsePass(pw) -> Optional[str]: - ret_val : Optional[str] - if isinstance(pw, str): - ret_val = pw - else: - ret_val = str(pw) - Logger.Log(f"SSH config for password was unexpected type {type(pw)}, defaulting to str(pw)=***.", logging.WARN) - return ret_val - @staticmethod def _parsePort(port) -> int: ret_val : int diff --git a/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py b/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py index 1e34b8a..33f7f0a 100644 --- a/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py +++ b/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py @@ -85,7 +85,7 @@ def _parseUser(user) -> Optional[str]: ret_val = user else: ret_val = str(user) - Logger.Log(f"SSH config for user was unexpected type {type(user)}, defaulting to str(user)={ret_val}.", logging.WARN) + Logger.Log(f"User for password credential was unexpected type {type(user)}, defaulting to str(user)={ret_val}.", logging.WARN) return ret_val @staticmethod @@ -95,7 +95,7 @@ def _parsePass(pw) -> Optional[str]: ret_val = pw else: ret_val = str(pw) - Logger.Log(f"SSH config for password was unexpected type {type(pw)}, defaulting to str(pw)=***.", logging.WARN) + Logger.Log(f"Password for password credential was unexpected type {type(pw)}, defaulting to str(pw)=***.", logging.WARN) return ret_val # *** PRIVATE METHODS *** diff --git a/src/ogd/common/schemas/tables/ElementMappingSchema.py b/src/ogd/common/schemas/tables/ElementMappingSchema.py index 8a9ef00..ce6b2b1 100644 --- a/src/ogd/common/schemas/tables/ElementMappingSchema.py +++ b/src/ogd/common/schemas/tables/ElementMappingSchema.py @@ -2,10 +2,14 @@ import logging from typing import Any, Dict, List, Optional, TypeAlias # import local files +from ogd.common.models.enums.ElementMappingType import ElementMappingType +from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map +ElementMap: TypeAlias = ColumnSchema | List[ColumnSchema] | Dict[str,ColumnSchema] + class ElementMappingSchema(Schema): """Simple struct-like class to define a mapping of one or more data table columns to a single GameData element. @@ -20,100 +24,35 @@ class ElementMappingSchema(Schema): "item2" : } ``` - - :param Schema: _description_ - :type Schema: _type_ - :return: _description_ - :rtype: _type_ """ - ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] _DEFAULT_MAP = {} _DEFAULT_COLUMN_NAMES = [] # *** BUILT-INS & PROPERTIES *** - def __init__(self, name:str, map:Dict[str, ColumnMapIndex], column_names:List[str], other_elements:Optional[Map]=None): - self._map : Dict[str, ElementMappingSchema.ColumnMapIndex] = map - self._column_names : List[str] = column_names - + def __init__(self, name:str, map:ElementMap, other_elements:Optional[Map]=None): + self._map : ElementMap = map + self._map_type : ElementMappingType + if isinstance(map, ColumnSchema): + self._map_type = ElementMappingType.SINGLE + elif isinstance(map, list): + self._map_type = ElementMappingType.LIST + elif isinstance(map, dict): + self._map_type = ElementMappingType.DICT + else: + raise TypeError(f"The map passed to ElementMappingSchema had invalide type {type(map)}") super().__init__(name=name, other_elements=other_elements) @property - def Map(self) -> Dict[str, ColumnMapIndex]: - """Mapping from Event element names to the indices of the database columns mapped to them. - There may be a single index, indicating a 1-to-1 mapping of a database column to the element; - There may be a list of indices, indicating multiple columns will be concatenated to form the element value; - There may be a further mapping of keys to indicies, indicating multiple columns will be joined into a JSON object, with keys mapped to values found at the columns with given indices. - - :return: The dictionary mapping of element names to indices. - :rtype: Dict[str, Union[int, List[int], Dict[str, int], None]] - """ + def Map(self) -> ElementMap: return self._map @property - def SessionID(self) -> ColumnMapIndex: - return self._map['session_id'] - - @property - def AppID(self) -> ColumnMapIndex: - return self._map['app_id'] - - @property - def Timestamp(self) -> ColumnMapIndex: - return self._map['timestamp'] - - @property - def EventName(self) -> ColumnMapIndex: - return self._map['event_name'] - - @property - def EventData(self) -> ColumnMapIndex: - return self._map['event_data'] - - @property - def EventSource(self) -> ColumnMapIndex: - return self._map['event_source'] - - @property - def AppVersion(self) -> ColumnMapIndex: - return self._map['app_version'] - - @property - def AppBranch(self) -> ColumnMapIndex: - return self._map['app_branch'] - - @property - def LogVersion(self) -> ColumnMapIndex: - return self._map['log_version'] - - @property - def TimeOffset(self) -> ColumnMapIndex: - return self._map['time_offset'] - - @property - def UserID(self) -> ColumnMapIndex: - return self._map['user_id'] - - @property - def UserData(self) -> ColumnMapIndex: - return self._map['user_data'] - - @property - def GameState(self) -> ColumnMapIndex: - return self._map['game_state'] - - @property - def EventSequenceIndex(self) -> ColumnMapIndex: - return self._map['event_sequence_index'] - - @property - def Elements(self) -> Dict[str, str]: - return self._other_elements - - @property - def ElementNames(self) -> List[str]: - return list(self._other_elements.keys()) + def ColumnNames(self) -> List[str]: + match self._map_type: + case ElementMappingType.SINGLE: + # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -155,7 +94,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], :return: _description_ :rtype: ColumnMapSchema """ - _map : Dict[str, ElementMappingSchema.ColumnMapIndex] = { + _map : Dict[str, ElementMappingSchema.ElementMapIndex] = { "session_id" : None, "app_id" : None, "timestamp" : None, diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 117818e..c7ca9b0 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -5,7 +5,7 @@ from typing import Any, Dict, Optional from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.storage.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger From ab49b82345d6ade5a45e2510aba70bf1600aa515 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 09:52:56 -0600 Subject: [PATCH 097/124] Rename FileSourceSchema -> FileStoreConfig.py --- .../{FileSourceSchema.py => FileStoreConfig.py} | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) rename src/ogd/common/configs/storage/{FileSourceSchema.py => FileStoreConfig.py} (88%) diff --git a/src/ogd/common/configs/storage/FileSourceSchema.py b/src/ogd/common/configs/storage/FileStoreConfig.py similarity index 88% rename from src/ogd/common/configs/storage/FileSourceSchema.py rename to src/ogd/common/configs/storage/FileStoreConfig.py index 8718f3b..00b42ae 100644 --- a/src/ogd/common/configs/storage/FileSourceSchema.py +++ b/src/ogd/common/configs/storage/FileStoreConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class FileSourceSchema(DataSourceSchema): +class FileStoreConfig(DataSourceSchema): _DEFAULT_FOLDER_PATH = Path('./data') _DEFAULT_FILE_NAME = "UNKNOWN.tsv" @@ -49,7 +49,7 @@ def AsConnectionInfo(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileSourceSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileStoreConfig": _folder_path : Path _file_name : str @@ -63,21 +63,21 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _folder_path = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["PATH"], parser_function=cls._parseFolder, - default_value=FileSourceSchema._DEFAULT_FOLDER_PATH + default_value=FileStoreConfig._DEFAULT_FOLDER_PATH ) _file_name = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["FILENAME"], parser_function=cls._parseFilename, - default_value=FileSourceSchema._DEFAULT_FILE_NAME + default_value=FileStoreConfig._DEFAULT_FILE_NAME ) _used = {"PATH", "FILENAME"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return FileSourceSchema(name=name, folder_path=_folder_path, file_name=_file_name, other_elements=_leftovers) + return FileStoreConfig(name=name, folder_path=_folder_path, file_name=_file_name, other_elements=_leftovers) @classmethod - def Default(cls) -> "FileSourceSchema": - return FileSourceSchema( + def Default(cls) -> "FileStoreConfig": + return FileStoreConfig( name="DefaultFileSourceSchema", folder_path=cls._DEFAULT_FOLDER_PATH, file_name=cls._DEFAULT_FILE_NAME, From 8c963906cf379000420952ac898e26e0a9f6505a Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 09:54:10 -0600 Subject: [PATCH 098/124] Update all the DataStoreConfig subclasses to use DataStoreConfig instead of DataSourceSchema as parent. --- src/ogd/common/configs/storage/BigQueryConfig.py | 2 +- src/ogd/common/configs/storage/FileStoreConfig.py | 2 +- src/ogd/common/configs/storage/MySQLSourceSchema.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ogd/common/configs/storage/BigQueryConfig.py b/src/ogd/common/configs/storage/BigQueryConfig.py index 3845c39..b754c10 100644 --- a/src/ogd/common/configs/storage/BigQueryConfig.py +++ b/src/ogd/common/configs/storage/BigQueryConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class BigQuerySchema(DataSourceSchema): +class BigQuerySchema(DataStoreConfig): _DEFAULT_PROJECT_ID = "wcer-field-day-ogd-1798" _DEFAULT_CREDENTIAL = "./config/ogd.json" diff --git a/src/ogd/common/configs/storage/FileStoreConfig.py b/src/ogd/common/configs/storage/FileStoreConfig.py index 00b42ae..243f3ef 100644 --- a/src/ogd/common/configs/storage/FileStoreConfig.py +++ b/src/ogd/common/configs/storage/FileStoreConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class FileStoreConfig(DataSourceSchema): +class FileStoreConfig(DataStoreConfig): _DEFAULT_FOLDER_PATH = Path('./data') _DEFAULT_FILE_NAME = "UNKNOWN.tsv" diff --git a/src/ogd/common/configs/storage/MySQLSourceSchema.py b/src/ogd/common/configs/storage/MySQLSourceSchema.py index 15c8663..e9a1eb2 100644 --- a/src/ogd/common/configs/storage/MySQLSourceSchema.py +++ b/src/ogd/common/configs/storage/MySQLSourceSchema.py @@ -131,7 +131,7 @@ def _parsePort(port) -> int: # *** PRIVATE METHODS *** -class MySQLSchema(DataSourceSchema): +class MySQLSchema(DataStoreConfig): _DEFAULT_HOST = "127.0.0.1" _DEFAULT_PORT = 22 _DEFAULT_USER = "DEFAULT USER" From a2a7465c2221efab48781ef7e943638df7fccee7 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 09:55:07 -0600 Subject: [PATCH 099/124] Rename MySQLSourceSchema -> MySQLConfig, similar for SSHSchema. --- .../{MySQLSourceSchema.py => MySQLConfig.py} | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) rename src/ogd/common/configs/storage/{MySQLSourceSchema.py => MySQLConfig.py} (93%) diff --git a/src/ogd/common/configs/storage/MySQLSourceSchema.py b/src/ogd/common/configs/storage/MySQLConfig.py similarity index 93% rename from src/ogd/common/configs/storage/MySQLSourceSchema.py rename to src/ogd/common/configs/storage/MySQLConfig.py index e9a1eb2..dd3384c 100644 --- a/src/ogd/common/configs/storage/MySQLSourceSchema.py +++ b/src/ogd/common/configs/storage/MySQLConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class SSHSchema(Schema): +class SSHConfig(Schema): _DEFAULT_HOST = "127.0.0.1" _DEFAULT_PORT = 22 @@ -52,7 +52,7 @@ def AsConnectionInfo(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SSHSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SSHConfig": _host : Optional[str] _user : Optional[str] _pass : Optional[str] @@ -88,11 +88,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"SSH_HOST", "SSH_USER", "SSH_PW", "SSH_PASS", "SSH_PORT"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return SSHSchema(name=name, ssh_host=_host, ssh_user=_user, ssh_pass=_pass, ssh_port=_port, other_elements=_leftovers) + return SSHConfig(name=name, ssh_host=_host, ssh_user=_user, ssh_pass=_pass, ssh_port=_port, other_elements=_leftovers) @classmethod - def Default(cls) -> "SSHSchema": - return SSHSchema( + def Default(cls) -> "SSHConfig": + return SSHConfig( name="DefaultMySQLSchema", ssh_host=cls._DEFAULT_HOST, ssh_user=cls._DEFAULT_USER, @@ -131,7 +131,7 @@ def _parsePort(port) -> int: # *** PRIVATE METHODS *** -class MySQLSchema(DataStoreConfig): +class MySQLConfig(DataStoreConfig): _DEFAULT_HOST = "127.0.0.1" _DEFAULT_PORT = 22 _DEFAULT_USER = "DEFAULT USER" @@ -139,12 +139,12 @@ class MySQLSchema(DataStoreConfig): # *** BUILT-INS & PROPERTIES *** - def __init__(self, name:str, db_host:str, db_port:int, db_user:str, db_pass:Optional[str], ssh_cfg:SSHSchema, other_elements:Dict[str, Any]): + def __init__(self, name:str, db_host:str, db_port:int, db_user:str, db_pass:Optional[str], ssh_cfg:SSHConfig, other_elements:Dict[str, Any]): self._db_host : str = db_host self._db_port : int = db_port self._db_user : str = db_user self._db_pass : Optional[str] = db_pass - self._ssh_cfg : SSHSchema = ssh_cfg + self._ssh_cfg : SSHConfig = ssh_cfg super().__init__(name=name, other_elements=other_elements) @property @@ -164,11 +164,11 @@ def DBPass(self) -> Optional[str]: return self._db_pass @property - def SSHConfig(self) -> SSHSchema: + def SSHConfig(self) -> SSHConfig: return self._ssh_cfg @property - def SSH(self) -> SSHSchema: + def SSH(self) -> SSHConfig: """Shortened alias for SSHConfig, convenient when using sub-elements of the SSHConfig. :return: The schema describing the configuration for an SSH connection to a data source. @@ -201,12 +201,12 @@ def AsConnectionInfo(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "MySQLSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "MySQLConfig": _db_host : str _db_port : int _db_user : str _db_pass : Optional[str] - _ssh_cfg : SSHSchema + _ssh_cfg : SSHConfig if not isinstance(all_elements, dict): all_elements = {} @@ -240,21 +240,21 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging # TODO : probably shouldn't have keys expected for SSH be hardcoded here, maybe need a way to get back what stuff it didn't use? _ssh_keys = {"SSH_HOST", "SSH_PORT", "SSH_USER", "SSH_PW", "SSH_PASS"} _ssh_elems = { key : all_elements.get(key) for key in _ssh_keys.intersection(all_elements.keys()) } - _ssh_cfg = SSHSchema.FromDict(name=f"{name}-SSH", all_elements=_ssh_elems, logger=logger) + _ssh_cfg = SSHConfig.FromDict(name=f"{name}-SSH", all_elements=_ssh_elems, logger=logger) _used = {"DB_HOST", "DB_PORT", "DB_USER", "DB_PW", "DB_PASS"}.union(_ssh_keys) _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return MySQLSchema(name=name, db_host=_db_host, db_port=_db_port, db_user=_db_user, db_pass=_db_pass, ssh_cfg=_ssh_cfg, other_elements=_leftovers) + return MySQLConfig(name=name, db_host=_db_host, db_port=_db_port, db_user=_db_user, db_pass=_db_pass, ssh_cfg=_ssh_cfg, other_elements=_leftovers) @classmethod - def Default(cls) -> "MySQLSchema": - return MySQLSchema( + def Default(cls) -> "MySQLConfig": + return MySQLConfig( name="DefaultMySQLSchema", db_host=cls._DEFAULT_HOST, db_port=cls._DEFAULT_PORT, db_user=cls._DEFAULT_USER, db_pass=cls._DEFAULT_PASS, - ssh_cfg=SSHSchema.Default(), + ssh_cfg=SSHConfig.Default(), other_elements={} ) From c9b060fc0d9f761a9128c57ffe51784d65377434 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 10:33:57 -0600 Subject: [PATCH 100/124] Replace other uses of DataSourceSchema. --- src/ogd/common/configs/GameSourceSchema.py | 12 ++++++------ tests/cases/schemas/config/t_GameSourceSchema.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ogd/common/configs/GameSourceSchema.py b/src/ogd/common/configs/GameSourceSchema.py index 464b60b..18fcbdc 100644 --- a/src/ogd/common/configs/GameSourceSchema.py +++ b/src/ogd/common/configs/GameSourceSchema.py @@ -39,12 +39,12 @@ class GameSourceSchema(Schema): :type Schema: _type_ """ def __init__(self, name:str, game_id:Optional[str], - source_name:str, source_schema:Optional[DataSourceSchema], + source_name:str, source_schema:Optional[DataStoreConfig], db_name:str, table_name:str, table_schema:str, other_elements:Dict[str, Any]): self._game_id : str self._source_name : str = source_name - self._source_schema : Optional[DataSourceSchema] = source_schema + self._source_schema : Optional[DataStoreConfig] = source_schema self._db_name : str = db_name self._table_name : str = table_name self._table_schema_name : str = table_schema @@ -66,7 +66,7 @@ def SourceName(self) -> str: return self._source_name @property - def Source(self) -> Optional[DataSourceSchema]: + def Source(self) -> Optional[DataStoreConfig]: return self._source_schema @property @@ -108,7 +108,7 @@ def Default(cls) -> "GameSourceSchema": ) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger], data_sources:Dict[str, DataSourceSchema]) -> "GameSourceSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger], data_sources:Dict[str, DataStoreConfig]) -> "GameSourceSchema": """Create a GameSourceSchema from a given dictionary :param name: _description_ @@ -118,12 +118,12 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging :param logger: _description_ :type logger: Optional[logging.Logger] :param data_sources: _description_ - :type data_sources: Dict[str, DataSourceSchema] + :type data_sources: Dict[str, DataStoreConfig] :return: _description_ :rtype: GameSourceSchema """ _source_name : str - _source_schema : Optional[DataSourceSchema] + _source_schema : Optional[DataStoreConfig] _db_name : str _table_schema : str _table_name : str diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index c7ca9b0..8a31e9f 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -92,7 +92,7 @@ def test_NonStandardElementNames(self): def test_FromDict(self): """Test case for whether the FromDict function is working properly. - TODO : Include assertion(s) for DataSourceSchema, as in implementation of test_Source (whenever that gets implemented) + TODO : Include assertion(s) for DataStoreConfig, as in implementation of test_Source (whenever that gets implemented) TODO : Possibly do additional cases where we check that default replacements for missing elements are correct. """ _dict = { @@ -106,13 +106,13 @@ def test_FromDict(self): "PROJECT_ID" : "aqualab-project", "PROJECT_KEY": "./key.txt" } - _sources : Dict[str, DataSourceSchema] = { "AQUALAB_BQ" : BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } + _sources : Dict[str, DataStoreConfig] = { "AQUALAB_BQ" : BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } _schema = GameSourceSchema.FromDict(name="AQUALAB", all_elements=_dict, logger=None, data_sources=_sources) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "AQUALAB") self.assertIsInstance(_schema.SourceName, str) self.assertEqual(_schema.SourceName, "AQUALAB_BQ") - # self.assertIsInstance(_schema.Source, DataSourceSchema) + # self.assertIsInstance(_schema.Source, DataStoreConfig) # self.assertEqual(_schema.Source, "AQUALAB") self.assertIsInstance(_schema.DatabaseName, str) self.assertEqual(_schema.DatabaseName, "aqualab") From fd797e893fba4a086bf040d5b9876a2e0e79f1fe Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 10:47:10 -0600 Subject: [PATCH 101/124] Update all uses of BigQuerySchema to BigQueryConfig. --- src/ogd/common/configs/GameSourceSchema.py | 4 ++-- src/ogd/common/configs/storage/BigQueryConfig.py | 16 ++++++++-------- .../connectors/interfaces/BigQueryInterface.py | 6 +++--- tests/cases/schemas/config/t_GameSourceSchema.py | 6 +++--- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/ogd/common/configs/GameSourceSchema.py b/src/ogd/common/configs/GameSourceSchema.py index 18fcbdc..6f4886e 100644 --- a/src/ogd/common/configs/GameSourceSchema.py +++ b/src/ogd/common/configs/GameSourceSchema.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Optional # import local files from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig -from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema +from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig from ogd.common.schemas.Schema import Schema from ogd.common.schemas.tables.TableSchema import TableSchema from ogd.common.utils.Logger import Logger @@ -100,7 +100,7 @@ def Default(cls) -> "GameSourceSchema": name="DefaultGameSourceSchema", game_id=cls._DEFAULT_GAME_ID, source_name=cls._DEFAULT_SOURCE_NAME, - source_schema=BigQuerySchema.Default(), + source_schema=BigQueryConfig.Default(), db_name=cls._DEFAULT_DB_NAME, table_name=cls._DEFAULT_TABLE_NAME, table_schema=cls._DEFAULT_TABLE_SCHEMA, diff --git a/src/ogd/common/configs/storage/BigQueryConfig.py b/src/ogd/common/configs/storage/BigQueryConfig.py index b754c10..e963a74 100644 --- a/src/ogd/common/configs/storage/BigQueryConfig.py +++ b/src/ogd/common/configs/storage/BigQueryConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class BigQuerySchema(DataStoreConfig): +class BigQueryConfig(DataStoreConfig): _DEFAULT_PROJECT_ID = "wcer-field-day-ogd-1798" _DEFAULT_CREDENTIAL = "./config/ogd.json" @@ -41,16 +41,16 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def Default(cls) -> "BigQuerySchema": - return BigQuerySchema( - name="DefaultBigQuerySchema", + def Default(cls) -> "BigQueryConfig": + return BigQueryConfig( + name="DefaultBigQueryConfig", project_id=cls._DEFAULT_PROJECT_ID, credential=cls._DEFAULT_CREDENTIAL, other_elements={} ) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]) -> "BigQuerySchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]) -> "BigQueryConfig": _project_id : str _credential : Optional[str] @@ -60,17 +60,17 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _project_id = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["PROJECT_ID", "DATASET_ID"], parser_function=cls._parseProjectID, - default_value=BigQuerySchema._DEFAULT_PROJECT_ID + default_value=BigQueryConfig._DEFAULT_PROJECT_ID ) _credential = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["PROJECT_KEY"], parser_function=cls._parseCredential, - default_value=BigQuerySchema._DEFAULT_CREDENTIAL + default_value=BigQueryConfig._DEFAULT_CREDENTIAL ) _used = {"PROJECT_ID", "DATASET_ID", "PROJECT_KEY"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return BigQuerySchema(name=name, project_id=_project_id, credential=_credential, other_elements=_leftovers) + return BigQueryConfig(name=name, project_id=_project_id, credential=_credential, other_elements=_leftovers) # *** PUBLIC STATICS *** diff --git a/src/ogd/common/connectors/interfaces/BigQueryInterface.py b/src/ogd/common/connectors/interfaces/BigQueryInterface.py index b48010e..a258c35 100644 --- a/src/ogd/common/connectors/interfaces/BigQueryInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryInterface.py @@ -9,7 +9,7 @@ from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema +from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig from ogd.common.utils.Logger import Logger AQUALAB_MIN_VERSION : Final[float] = 6.2 @@ -31,7 +31,7 @@ def _open(self, force_reopen: bool = False) -> bool: if not self._is_open: if "GITHUB_ACTIONS" in os.environ: self._client = bigquery.Client() - elif isinstance(self._config.Source, BigQuerySchema): + elif isinstance(self._config.Source, BigQueryConfig): os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._config.Source.Credential or "NO CREDENTIAL CONFIGURED!" or f"./{self._game_id}.json" self._client = bigquery.Client() else: @@ -190,7 +190,7 @@ def DBPath(self, min_date:Optional[date]=None, max_date:Optional[date]=None) -> :return: The full path from project ID to table name, if properly set in configuration, else the literal string "INVALID SOURCE SCHEMA". :rtype: str """ - if isinstance(self._config.Source, BigQuerySchema): + if isinstance(self._config.Source, BigQueryConfig): # _current_date = datetime.now().date() date_wildcard = "*" # if min_date is not None and max_date is not None: diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 8a31e9f..9fdde2b 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -6,7 +6,7 @@ from unittest import TestCase # import ogd libraries. from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig -from ogd.common.schemas.storage.BigQuerySourceSchema import BigQuerySchema +from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals @@ -34,7 +34,7 @@ def setUpClass(cls) -> None: name="Game Source Schema", game_id="AQUALAB", source_name="AQUALAB_BQ", - source_schema=BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None), + source_schema=BigQueryConfig.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None), db_name="aqualab", table_name="aqualab_daily", table_schema="OPENGAMEDATA_BIGQUERY", @@ -106,7 +106,7 @@ def test_FromDict(self): "PROJECT_ID" : "aqualab-project", "PROJECT_KEY": "./key.txt" } - _sources : Dict[str, DataStoreConfig] = { "AQUALAB_BQ" : BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } + _sources : Dict[str, DataStoreConfig] = { "AQUALAB_BQ" : BigQueryConfig.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } _schema = GameSourceSchema.FromDict(name="AQUALAB", all_elements=_dict, logger=None, data_sources=_sources) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "AQUALAB") From 1c8a4cc5492fb8ae2b6b41be8e94e3a08a0db6c0 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 10:56:49 -0600 Subject: [PATCH 102/124] Various places where MySQLSchema needed to be replaced by MySQLConfig. --- .../common/configs/storage/FileStoreConfig.py | 2 +- src/ogd/common/configs/storage/MySQLConfig.py | 4 ++-- .../connectors/interfaces/MySQLInterface.py | 22 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ogd/common/configs/storage/FileStoreConfig.py b/src/ogd/common/configs/storage/FileStoreConfig.py index 243f3ef..35736d4 100644 --- a/src/ogd/common/configs/storage/FileStoreConfig.py +++ b/src/ogd/common/configs/storage/FileStoreConfig.py @@ -78,7 +78,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "FileStoreConfig": return FileStoreConfig( - name="DefaultFileSourceSchema", + name="DefaultFileStoreConfig", folder_path=cls._DEFAULT_FOLDER_PATH, file_name=cls._DEFAULT_FILE_NAME, other_elements={} diff --git a/src/ogd/common/configs/storage/MySQLConfig.py b/src/ogd/common/configs/storage/MySQLConfig.py index dd3384c..9e07b60 100644 --- a/src/ogd/common/configs/storage/MySQLConfig.py +++ b/src/ogd/common/configs/storage/MySQLConfig.py @@ -93,7 +93,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "SSHConfig": return SSHConfig( - name="DefaultMySQLSchema", + name="DefaultSSHConfig", ssh_host=cls._DEFAULT_HOST, ssh_user=cls._DEFAULT_USER, ssh_pass=cls._DEFAULT_PASS, @@ -249,7 +249,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "MySQLConfig": return MySQLConfig( - name="DefaultMySQLSchema", + name="DefaultMySQLConfig", db_host=cls._DEFAULT_HOST, db_port=cls._DEFAULT_PORT, db_user=cls._DEFAULT_USER, diff --git a/src/ogd/common/connectors/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py index cb5b42f..c086b8f 100644 --- a/src/ogd/common/connectors/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -12,8 +12,8 @@ from ogd.common.models.enums.FilterMode import FilterMode from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.VersionType import VersionType -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.schemas.storage.MySQLSourceSchema import MySQLSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.storage.MySQLConfig import MySQLConfig from ogd.common.utils.Logger import Logger @@ -41,7 +41,7 @@ def ConnectDB(schema:GameSourceSchema) -> Tuple[Optional[sshtunnel.SSHTunnelForw tunnel : Optional[sshtunnel.SSHTunnelForwarder] = None db_conn : Optional[connection.MySQLConnection] = None # Logger.Log("Preparing database connection...", logging.INFO) - if schema.Source is not None and isinstance(schema.Source, MySQLSchema): + if schema.Source is not None and isinstance(schema.Source, MySQLConfig): if schema.Source.HasSSH: Logger.Log(f"Preparing to connect to MySQL via SSH, on host {schema.Source.SSH.Host}", level=logging.DEBUG) if (schema.Source.SSH.Host != "" and schema.Source.SSH.User != "" and schema.Source.SSH.Pass != ""): @@ -63,7 +63,7 @@ def ConnectDB(schema:GameSourceSchema) -> Tuple[Optional[sshtunnel.SSHTunnelForw # Function to help connect to a mySQL server. @staticmethod - def _connectToMySQL(login:MySQLSchema, db:str) -> Optional[connection.MySQLConnection]: + def _connectToMySQL(login:MySQLConfig, db:str) -> Optional[connection.MySQLConnection]: """Function to help connect to a mySQL server. Simply tries to make a connection, and prints an error in case of failure. @@ -90,7 +90,7 @@ def _connectToMySQL(login:MySQLSchema, db:str) -> Optional[connection.MySQLConne ## Function to help connect to a mySQL server over SSH. @staticmethod - def _connectToMySQLviaSSH(sql:MySQLSchema, db:str) -> Tuple[Optional[sshtunnel.SSHTunnelForwarder], Optional[connection.MySQLConnection]]: + def _connectToMySQLviaSSH(sql:MySQLConfig, db:str) -> Tuple[Optional[sshtunnel.SSHTunnelForwarder], Optional[connection.MySQLConnection]]: """Function to help connect to a mySQL server over SSH. Simply tries to make a connection, and prints an error in case of failure. @@ -256,7 +256,7 @@ def _open(self, force_reopen:bool = False) -> bool: self.Open(force_reopen=False) if not self._is_open: start = datetime.now() - if isinstance(self.GameSourceSchema.Source, MySQLSchema): + if isinstance(self.GameSourceSchema.Source, MySQLConfig): self._tunnel, self._db = SQL.ConnectDB(schema=self.GameSourceSchema) if self._db is not None: self._db_cursor = self._getCursor() @@ -282,7 +282,7 @@ def _close(self) -> bool: return True def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[str]: - if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): _db_name : str = self.GameSourceSchema.DatabaseName _table_name : str = self.GameSourceSchema.TableName @@ -311,7 +311,7 @@ def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version # def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> List[str]: # ret_val = [] - # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): # # alias long setting names. # _db_name : str = self.GameSourceSchema.DatabaseName # _table_name : str = self.GameSourceSchema.TableName @@ -343,7 +343,7 @@ def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version def _availableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Dict[str,datetime]: ret_val = {'min':datetime.now(), 'max':datetime.now()} - if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): _db_name : str = self.GameSourceSchema.DatabaseName _table_name : str = self.GameSourceSchema.TableName @@ -367,7 +367,7 @@ def _availableDates(self, id_filter:IDFilterCollection, version_filter:Versionin # def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Dict[str, datetime]: # ret_val = {'min':datetime.now(), 'max':datetime.now()} - # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): # # alias long setting names. # _db_name : str = self.GameSourceSchema.DatabaseName # _table_name : str = self.GameSourceSchema.TableName @@ -406,7 +406,7 @@ def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, dat def _getEventRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: ret_val = [] # grab data for the given session range. Sort by event time, so - if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): # filt = f"app_id='{self._game_id}' AND (session_id BETWEEN '{next_slice[0]}' AND '{next_slice[-1]}'){ver_filter}" _db_name : str = self.GameSourceSchema.DatabaseName _table_name : str = self.GameSourceSchema.TableName From f96721c5bf062a6534805c7162fdfdcfb79e470d Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Tue, 17 Dec 2024 11:15:51 -0600 Subject: [PATCH 103/124] Rename generator-related schema files to configs. --- .../configs/games/{AggregateSchema.py => AggregateConfig.py} | 0 .../common/configs/games/{DetectorSchema.py => DetectorConfig.py} | 0 .../configs/games/{DetectorMapSchema.py => DetectorMapConfig.py} | 0 .../common/configs/games/{FeatureSchema.py => FeatureConfig.py} | 0 .../configs/games/{FeatureMapSchema.py => FeatureMapConfig.py} | 0 .../configs/games/{GeneratorSchema.py => GeneratorConfig.py} | 0 .../common/configs/games/{PerCountSchema.py => PerCountConfig.py} | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename src/ogd/common/configs/games/{AggregateSchema.py => AggregateConfig.py} (100%) rename src/ogd/common/configs/games/{DetectorSchema.py => DetectorConfig.py} (100%) rename src/ogd/common/configs/games/{DetectorMapSchema.py => DetectorMapConfig.py} (100%) rename src/ogd/common/configs/games/{FeatureSchema.py => FeatureConfig.py} (100%) rename src/ogd/common/configs/games/{FeatureMapSchema.py => FeatureMapConfig.py} (100%) rename src/ogd/common/configs/games/{GeneratorSchema.py => GeneratorConfig.py} (100%) rename src/ogd/common/configs/games/{PerCountSchema.py => PerCountConfig.py} (100%) diff --git a/src/ogd/common/configs/games/AggregateSchema.py b/src/ogd/common/configs/games/AggregateConfig.py similarity index 100% rename from src/ogd/common/configs/games/AggregateSchema.py rename to src/ogd/common/configs/games/AggregateConfig.py diff --git a/src/ogd/common/configs/games/DetectorSchema.py b/src/ogd/common/configs/games/DetectorConfig.py similarity index 100% rename from src/ogd/common/configs/games/DetectorSchema.py rename to src/ogd/common/configs/games/DetectorConfig.py diff --git a/src/ogd/common/configs/games/DetectorMapSchema.py b/src/ogd/common/configs/games/DetectorMapConfig.py similarity index 100% rename from src/ogd/common/configs/games/DetectorMapSchema.py rename to src/ogd/common/configs/games/DetectorMapConfig.py diff --git a/src/ogd/common/configs/games/FeatureSchema.py b/src/ogd/common/configs/games/FeatureConfig.py similarity index 100% rename from src/ogd/common/configs/games/FeatureSchema.py rename to src/ogd/common/configs/games/FeatureConfig.py diff --git a/src/ogd/common/configs/games/FeatureMapSchema.py b/src/ogd/common/configs/games/FeatureMapConfig.py similarity index 100% rename from src/ogd/common/configs/games/FeatureMapSchema.py rename to src/ogd/common/configs/games/FeatureMapConfig.py diff --git a/src/ogd/common/configs/games/GeneratorSchema.py b/src/ogd/common/configs/games/GeneratorConfig.py similarity index 100% rename from src/ogd/common/configs/games/GeneratorSchema.py rename to src/ogd/common/configs/games/GeneratorConfig.py diff --git a/src/ogd/common/configs/games/PerCountSchema.py b/src/ogd/common/configs/games/PerCountConfig.py similarity index 100% rename from src/ogd/common/configs/games/PerCountSchema.py rename to src/ogd/common/configs/games/PerCountConfig.py From e2fe9d2832dea9e249919cb263bd27471c82fafc Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 18:23:28 -0600 Subject: [PATCH 104/124] Rename most of the feature-related schemas to config. --- .github/workflows/TEST_GameSchemas.yml | 2 +- .../common/configs/games/AggregateConfig.py | 14 ++++---- .../common/configs/games/DetectorConfig.py | 12 +++---- .../common/configs/games/DetectorMapConfig.py | 12 +++---- src/ogd/common/configs/games/FeatureConfig.py | 32 +++++++++---------- .../common/configs/games/FeatureMapConfig.py | 14 ++++---- .../common/configs/games/GeneratorConfig.py | 8 ++--- .../common/configs/games/PerCountConfig.py | 12 +++---- src/ogd/common/schemas/games/GameSchema.py | 12 +++---- 9 files changed, 59 insertions(+), 59 deletions(-) diff --git a/.github/workflows/TEST_GameSchemas.yml b/.github/workflows/TEST_GameSchemas.yml index 560006f..9c57b7a 100644 --- a/.github/workflows/TEST_GameSchemas.yml +++ b/.github/workflows/TEST_GameSchemas.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: testbed: [ - t_AggregateSchema, + t_AggregateConfig, t_DataElementSchema, t_DetectorMapSchema, t_DetectorSchema, diff --git a/src/ogd/common/configs/games/AggregateConfig.py b/src/ogd/common/configs/games/AggregateConfig.py index a4e32e0..65a378c 100644 --- a/src/ogd/common/configs/games/AggregateConfig.py +++ b/src/ogd/common/configs/games/AggregateConfig.py @@ -2,10 +2,10 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.FeatureSchema import FeatureSchema +from ogd.common.configs.games.FeatureConfig import FeatureConfig from ogd.common.utils.typing import Map -class AggregateSchema(FeatureSchema): +class AggregateConfig(FeatureSchema): def __init__(self, name:str, other_elements:Optional[Map]=None): super().__init__(name=name, other_elements=other_elements) @@ -21,12 +21,12 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "AggregateSchema": - return AggregateSchema(name=name, other_elements=all_elements) + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "AggregateConfig": + return AggregateConfig(name=name, other_elements=all_elements) @classmethod - def Default(cls) -> "AggregateSchema": - return AggregateSchema( - name="DefaultAggregateSchema", + def Default(cls) -> "AggregateConfig": + return AggregateConfig( + name="DefaultAggregateConfig", other_elements={} ) diff --git a/src/ogd/common/configs/games/DetectorConfig.py b/src/ogd/common/configs/games/DetectorConfig.py index 33adab5..0d4d8cf 100644 --- a/src/ogd/common/configs/games/DetectorConfig.py +++ b/src/ogd/common/configs/games/DetectorConfig.py @@ -3,10 +3,10 @@ from typing import Any, Dict, Optional # import local files from ogd.common.models.enums.ExtractionMode import ExtractionMode -from ogd.common.schemas.games.GeneratorSchema import GeneratorSchema +from ogd.common.configs.games.GeneratorConfig import GeneratorConfig from ogd.common.utils.typing import Map -class DetectorSchema(GeneratorSchema): +class DetectorConfig(GeneratorSchema): def __init__(self, name:str, other_elements:Optional[Map]=None): super().__init__(name=name, other_elements=other_elements) @@ -20,9 +20,9 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorSchema": - return DetectorSchema(name=name, other_elements=all_elements) + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorConfig": + return DetectorConfig(name=name, other_elements=all_elements) @classmethod - def Default(cls) -> "DetectorSchema": - return DetectorSchema(name="DefaultDetectorSchema", other_elements={}) + def Default(cls) -> "DetectorConfig": + return DetectorConfig(name="DefaultDetectorSchema", other_elements={}) diff --git a/src/ogd/common/configs/games/DetectorMapConfig.py b/src/ogd/common/configs/games/DetectorMapConfig.py index 440012e..bb7042c 100644 --- a/src/ogd/common/configs/games/DetectorMapConfig.py +++ b/src/ogd/common/configs/games/DetectorMapConfig.py @@ -2,12 +2,12 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.DetectorSchema import DetectorSchema +from ogd.common.configs.games.DetectorConfig import DetectorConfig from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class DetectorMapSchema(Schema): +class DetectorMapConfig(Schema): _DEFAULT_PERLEVEL_DETECTORS = {} _DEFAULT_PERCOUNT_DETECTORS = {} _DEFAULT_AGGREGATE_DETECTORS = {} @@ -59,7 +59,7 @@ def AsDict(self) -> Dict[str, Dict[str, DetectorSchema]]: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorMapSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorMapConfig": _perlevel_detectors : Dict[str, DetectorSchema] _percount_detectors : Dict[str, DetectorSchema] _aggregate_detectors : Dict[str, DetectorSchema] @@ -85,13 +85,13 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"perlevel", "per_level", "per_count", "percount", "aggregate"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return DetectorMapSchema(name=name, perlevel_detectors=_perlevel_detectors, + return DetectorMapConfig(name=name, perlevel_detectors=_perlevel_detectors, percount_detectors=_percount_detectors, aggregate_detectors=_aggregate_detectors, other_elements=_leftovers) @classmethod - def Default(cls) -> "DetectorMapSchema": - return DetectorMapSchema( + def Default(cls) -> "DetectorMapConfig": + return DetectorMapConfig( name="DefaultDetectorMapSchema", perlevel_detectors=cls._DEFAULT_PERLEVEL_DETECTORS, percount_detectors=cls._DEFAULT_PERCOUNT_DETECTORS, diff --git a/src/ogd/common/configs/games/FeatureConfig.py b/src/ogd/common/configs/games/FeatureConfig.py index db25f73..2ae3556 100644 --- a/src/ogd/common/configs/games/FeatureConfig.py +++ b/src/ogd/common/configs/games/FeatureConfig.py @@ -2,12 +2,12 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.GeneratorSchema import GeneratorSchema +from ogd.common.configs.games.GeneratorConfig import GeneratorConfig from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class SubfeatureSchema(Schema): +class SubfeatureConfig(Schema): _DEFAULT_RETURN_TYPE = "str" _DEFAULT_DESCRIPTION = "Default Subfeature schema object. Does not correspond to any actual data." @@ -37,7 +37,7 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SubfeatureSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SubfeatureConfig": _return_type : str _description : str @@ -58,11 +58,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"return_type", "description"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return SubfeatureSchema(name=name, return_type=_return_type, description=_description, other_elements=_leftovers) + return SubfeatureConfig(name=name, return_type=_return_type, description=_description, other_elements=_leftovers) @classmethod - def Default(cls) -> "SubfeatureSchema": - return SubfeatureSchema( + def Default(cls) -> "SubfeatureConfig": + return SubfeatureConfig( name="DefaultSubfeatureSchema", return_type=cls._DEFAULT_RETURN_TYPE, description=cls._DEFAULT_DESCRIPTION, @@ -97,28 +97,28 @@ def _parseDescription(description): # *** PRIVATE METHODS *** -class FeatureSchema(GeneratorSchema): +class FeatureConfig(GeneratorSchema): """Base class for all schemas related to defining feature Extractor configurations. """ # *** BUILT-INS & PROPERTIES *** def __init__(self, name:str, other_elements:Optional[Map]=None): - self._subfeatures : Dict[str, SubfeatureSchema] + self._subfeatures : Dict[str, SubfeatureConfig] self._return_type : str if not isinstance(other_elements, dict): other_elements = {} Logger.Log(f"For {name} Feature config, all_elements was not a dict, defaulting to empty dict", logging.WARN) - self._return_type = FeatureSchema.ElementFromDict(all_elements=other_elements, + self._return_type = FeatureConfig.ElementFromDict(all_elements=other_elements, element_names=["return_type"], - parser_function=FeatureSchema._parseReturnType, + parser_function=FeatureConfig._parseReturnType, default_value="UNKNOWN" ) - self._subfeatures = FeatureSchema.ElementFromDict(all_elements=other_elements, + self._subfeatures = FeatureConfig.ElementFromDict(all_elements=other_elements, element_names=["subfeatures"], - parser_function=FeatureSchema._parseSubfeatures, + parser_function=FeatureConfig._parseSubfeatures, default_value={} ) @@ -132,7 +132,7 @@ def ReturnType(self) -> str: return self._return_type @property - def Subfeatures(self) -> Dict[str, SubfeatureSchema]: + def Subfeatures(self) -> Dict[str, SubfeatureConfig]: return self._subfeatures # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -154,10 +154,10 @@ def _parseReturnType(return_type, feature_name:str=""): return ret_val @staticmethod - def _parseSubfeatures(subfeatures) -> Dict[str, SubfeatureSchema]: - ret_val : Dict[str, SubfeatureSchema] + def _parseSubfeatures(subfeatures) -> Dict[str, SubfeatureConfig]: + ret_val : Dict[str, SubfeatureConfig] if isinstance(subfeatures, dict): - ret_val = {name:SubfeatureSchema.FromDict(name=name, all_elements=elems) for name,elems in subfeatures.items()} + ret_val = {name:SubfeatureConfig.FromDict(name=name, all_elements=elems) for name,elems in subfeatures.items()} else: ret_val = {} Logger.Log(f"Extractor subfeatures was unexpected type {type(subfeatures)}, defaulting to empty list.", logging.WARN) diff --git a/src/ogd/common/configs/games/FeatureMapConfig.py b/src/ogd/common/configs/games/FeatureMapConfig.py index e0c56b2..66d2f71 100644 --- a/src/ogd/common/configs/games/FeatureMapConfig.py +++ b/src/ogd/common/configs/games/FeatureMapConfig.py @@ -2,13 +2,13 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.AggregateSchema import AggregateSchema -from ogd.common.schemas.games.PerCountSchema import PerCountSchema +from ogd.common.configs.games.AggregateConfig import AggregateConfig +from ogd.common.configs.games.PerCountConfig import PerCountConfig from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class FeatureMapSchema(Schema): +class FeatureMapConfig(Schema): """ Dumb struct to contain the specification and config of a set of features for a game. """ @@ -58,7 +58,7 @@ def AsMarkdown(self) -> str: return " \n\n".join(feature_summary + feature_list) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FeatureMapSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FeatureMapConfig": _legacy_mode : bool _legacy_perlevel_feats : Dict[str, PerCountSchema] _percount_feats : Dict[str, PerCountSchema] @@ -90,13 +90,13 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"legacy", "perlevel", "per_count", "aggregate"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return FeatureMapSchema(name=name, legacy_mode=_legacy_mode, legacy_perlevel_feats=_legacy_perlevel_feats, + return FeatureMapConfig(name=name, legacy_mode=_legacy_mode, legacy_perlevel_feats=_legacy_perlevel_feats, percount_feats=_percount_feats, aggregate_feats=_aggregate_feats, other_elements=_leftovers) @classmethod - def Default(cls) -> "FeatureMapSchema": - return FeatureMapSchema( + def Default(cls) -> "FeatureMapConfig": + return FeatureMapConfig( name="DefaultFeatureMapSchema", legacy_mode=cls._DEFAULT_LEGACY_MODE, legacy_perlevel_feats=cls._DEFAULT_LEGACY_FEATS, diff --git a/src/ogd/common/configs/games/GeneratorConfig.py b/src/ogd/common/configs/games/GeneratorConfig.py index b9d2652..185f4c2 100644 --- a/src/ogd/common/configs/games/GeneratorConfig.py +++ b/src/ogd/common/configs/games/GeneratorConfig.py @@ -7,7 +7,7 @@ from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class GeneratorSchema(Schema): +class GeneratorConfig(Schema): def __init__(self, name:str, other_elements:Optional[Map]=None): self._enabled : Set[ExtractionMode] self._type_name : str @@ -19,16 +19,16 @@ def __init__(self, name:str, other_elements:Optional[Map]=None): Logger.Log(f"For {name} Extractor config, all_elements was not a dict, defaulting to empty dict", logging.WARN) if "type" in _other_elements.keys(): - self._type_name = GeneratorSchema._parseType(_other_elements['type']) + self._type_name = GeneratorConfig._parseType(_other_elements['type']) else: self._type_name = name if "enabled" in _other_elements.keys(): - self._enabled = GeneratorSchema._parseEnabled(_other_elements['enabled']) + self._enabled = GeneratorConfig._parseEnabled(_other_elements['enabled']) else: self._enabled = {ExtractionMode.DETECTOR, ExtractionMode.SESSION, ExtractionMode.PLAYER, ExtractionMode.POPULATION} Logger.Log(f"{name} config does not have an 'enabled' element; defaulting to enabled=True", logging.WARN) if "description" in _other_elements.keys(): - self._description = GeneratorSchema._parseDescription(_other_elements['description']) + self._description = GeneratorConfig._parseDescription(_other_elements['description']) else: self._description = "No Description" Logger.Log(f"{name} config does not have an 'description' element; defaulting to description='{self._description}'", logging.WARN) diff --git a/src/ogd/common/configs/games/PerCountConfig.py b/src/ogd/common/configs/games/PerCountConfig.py index 374e9f0..458009e 100644 --- a/src/ogd/common/configs/games/PerCountConfig.py +++ b/src/ogd/common/configs/games/PerCountConfig.py @@ -2,11 +2,11 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.FeatureSchema import FeatureSchema +from ogd.common.configs.games.FeatureConfig import FeatureConfig from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class PerCountSchema(FeatureSchema): +class PerCountConfig(FeatureSchema): _DEFAULT_COUNT = 1 _DEFAULT_PREFIX = "pre" @@ -41,7 +41,7 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "PerCountSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "PerCountConfig": _count : int | str _prefix : str @@ -61,11 +61,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"count", "prefix"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return PerCountSchema(name=name, count=_count, prefix=_prefix, other_elements=_leftovers) + return PerCountConfig(name=name, count=_count, prefix=_prefix, other_elements=_leftovers) @classmethod - def Default(cls) -> "PerCountSchema": - return PerCountSchema( + def Default(cls) -> "PerCountConfig": + return PerCountConfig( name="DefaultPerCountSchema", count=cls._DEFAULT_COUNT, prefix=cls._DEFAULT_PREFIX, diff --git a/src/ogd/common/schemas/games/GameSchema.py b/src/ogd/common/schemas/games/GameSchema.py index 8361a6e..eee33e3 100644 --- a/src/ogd/common/schemas/games/GameSchema.py +++ b/src/ogd/common/schemas/games/GameSchema.py @@ -6,14 +6,14 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union # import local files from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.games.AggregateSchema import AggregateSchema -from ogd.common.schemas.games.DetectorSchema import DetectorSchema -from ogd.common.schemas.games.DetectorMapSchema import DetectorMapSchema +from ogd.common.configs.games.AggregateConfig import AggregateConfig +from ogd.common.configs.games.DetectorConfig import DetectorConfig +from ogd.common.configs.games.DetectorMapConfig import DetectorMapConfig from ogd.common.schemas.games.DataElementSchema import DataElementSchema from ogd.common.schemas.games.EventSchema import EventSchema -from ogd.common.schemas.games.PerCountSchema import PerCountSchema -from ogd.common.schemas.games.FeatureSchema import FeatureSchema -from ogd.common.schemas.games.FeatureMapSchema import FeatureMapSchema +from ogd.common.configs.games.PerCountConfig import PerCountConfig +from ogd.common.configs.games.FeatureConfig import FeatureConfig +from ogd.common.configs.games.FeatureMapConfig import FeatureMapConfig from ogd.common.models.enums.IterationMode import IterationMode from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils import fileio From 444f580588d91597d49d20eef630e52a35ef9e90 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 18:27:04 -0600 Subject: [PATCH 105/124] Search-and-replace occurrences of most of the game schemas to configs. --- .github/workflows/TEST_GameSchemas.yml | 8 +-- .../common/configs/games/AggregateConfig.py | 2 +- .../common/configs/games/DetectorConfig.py | 4 +- .../common/configs/games/DetectorMapConfig.py | 40 ++++++------- src/ogd/common/configs/games/FeatureConfig.py | 4 +- .../common/configs/games/FeatureMapConfig.py | 40 ++++++------- .../common/configs/games/PerCountConfig.py | 4 +- src/ogd/common/schemas/games/GameSchema.py | 58 +++++++++---------- src/ogd/common/schemas/games/__init__.py | 20 +++---- .../cases/schemas/games/t_AggregateSchema.py | 8 +-- tests/cases/schemas/games/t_DetectorSchema.py | 10 ++-- tests/cases/schemas/games/t_FeatureSchema.py | 10 ++-- .../cases/schemas/games/t_GeneratorSchema.py | 10 ++-- tests/cases/schemas/games/t_PerCountSchema.py | 10 ++-- 14 files changed, 114 insertions(+), 114 deletions(-) diff --git a/.github/workflows/TEST_GameSchemas.yml b/.github/workflows/TEST_GameSchemas.yml index 9c57b7a..a124fba 100644 --- a/.github/workflows/TEST_GameSchemas.yml +++ b/.github/workflows/TEST_GameSchemas.yml @@ -27,14 +27,14 @@ jobs: t_AggregateConfig, t_DataElementSchema, t_DetectorMapSchema, - t_DetectorSchema, + t_DetectorConfig, t_EventSchema, t_FeatureMapSchema, - t_FeatureSchema, + t_FeatureConfig, t_GameSchema, t_GameStateSchema, - t_GeneratorSchema, - t_PerCountSchema, + t_GeneratorConfig, + t_PerCountConfig, ] fail-fast: false # we don't want to cancel just because one testbed fails. max-parallel: 20 diff --git a/src/ogd/common/configs/games/AggregateConfig.py b/src/ogd/common/configs/games/AggregateConfig.py index 65a378c..79a0ce7 100644 --- a/src/ogd/common/configs/games/AggregateConfig.py +++ b/src/ogd/common/configs/games/AggregateConfig.py @@ -5,7 +5,7 @@ from ogd.common.configs.games.FeatureConfig import FeatureConfig from ogd.common.utils.typing import Map -class AggregateConfig(FeatureSchema): +class AggregateConfig(FeatureConfig): def __init__(self, name:str, other_elements:Optional[Map]=None): super().__init__(name=name, other_elements=other_elements) diff --git a/src/ogd/common/configs/games/DetectorConfig.py b/src/ogd/common/configs/games/DetectorConfig.py index 0d4d8cf..2350f0a 100644 --- a/src/ogd/common/configs/games/DetectorConfig.py +++ b/src/ogd/common/configs/games/DetectorConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.games.GeneratorConfig import GeneratorConfig from ogd.common.utils.typing import Map -class DetectorConfig(GeneratorSchema): +class DetectorConfig(GeneratorConfig): def __init__(self, name:str, other_elements:Optional[Map]=None): super().__init__(name=name, other_elements=other_elements) @@ -25,4 +25,4 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "DetectorConfig": - return DetectorConfig(name="DefaultDetectorSchema", other_elements={}) + return DetectorConfig(name="DefaultDetectorConfig", other_elements={}) diff --git a/src/ogd/common/configs/games/DetectorMapConfig.py b/src/ogd/common/configs/games/DetectorMapConfig.py index bb7042c..b6332bf 100644 --- a/src/ogd/common/configs/games/DetectorMapConfig.py +++ b/src/ogd/common/configs/games/DetectorMapConfig.py @@ -18,24 +18,24 @@ class DetectorMapConfig(Schema): Dumb struct to contain the specification and config of a set of features for a game. """ def __init__(self, name:str, - perlevel_detectors:Dict[str, DetectorSchema], percount_detectors:Dict[str, DetectorSchema], aggregate_detectors:Dict[str, DetectorSchema], + perlevel_detectors:Dict[str, DetectorConfig], percount_detectors:Dict[str, DetectorConfig], aggregate_detectors:Dict[str, DetectorConfig], other_elements:Optional[Map]=None): - self._perlevel_detectors : Dict[str, DetectorSchema] = perlevel_detectors - self._percount_detectors : Dict[str, DetectorSchema] = percount_detectors - self._aggregate_detectors : Dict[str, DetectorSchema] = aggregate_detectors + self._perlevel_detectors : Dict[str, DetectorConfig] = perlevel_detectors + self._percount_detectors : Dict[str, DetectorConfig] = percount_detectors + self._aggregate_detectors : Dict[str, DetectorConfig] = aggregate_detectors super().__init__(name=name, other_elements=other_elements) @property - def PerLevelDetectors(self) -> Dict[str, DetectorSchema]: + def PerLevelDetectors(self) -> Dict[str, DetectorConfig]: return self._perlevel_detectors @property - def PerCountDetectors(self) -> Dict[str, DetectorSchema]: + def PerCountDetectors(self) -> Dict[str, DetectorConfig]: return self._percount_detectors @property - def AggregateDetectors(self) -> Dict[str, DetectorSchema]: + def AggregateDetectors(self) -> Dict[str, DetectorConfig]: return self._aggregate_detectors # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -50,7 +50,7 @@ def AsMarkdown(self) -> str: return " \n\n".join(feature_summary + feature_list) @property - def AsDict(self) -> Dict[str, Dict[str, DetectorSchema]]: + def AsDict(self) -> Dict[str, Dict[str, DetectorConfig]]: ret_val = { "perlevel" : self.PerLevelDetectors, "per_count" : self.PerCountDetectors, @@ -60,9 +60,9 @@ def AsDict(self) -> Dict[str, Dict[str, DetectorSchema]]: @classmethod def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorMapConfig": - _perlevel_detectors : Dict[str, DetectorSchema] - _percount_detectors : Dict[str, DetectorSchema] - _aggregate_detectors : Dict[str, DetectorSchema] + _perlevel_detectors : Dict[str, DetectorConfig] + _percount_detectors : Dict[str, DetectorConfig] + _aggregate_detectors : Dict[str, DetectorConfig] if not isinstance(all_elements, dict): all_elements = {} @@ -115,30 +115,30 @@ def Default(cls) -> "DetectorMapConfig": # *** PRIVATE STATICS *** @staticmethod - def _parsePerLevelDetectors(perlevels) -> Dict[str, DetectorSchema]: - ret_val : Dict[str, DetectorSchema] + def _parsePerLevelDetectors(perlevels) -> Dict[str, DetectorConfig]: + ret_val : Dict[str, DetectorConfig] if isinstance(perlevels, dict): - ret_val = { key : DetectorSchema(name=key, all_elements=val) for key,val in perlevels.items() } + ret_val = { key : DetectorConfig(name=key, all_elements=val) for key,val in perlevels.items() } else: ret_val = {} Logger.Log("Per-level detectors map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parsePerCountDetectors(percounts) -> Dict[str, DetectorSchema]: - ret_val : Dict[str, DetectorSchema] + def _parsePerCountDetectors(percounts) -> Dict[str, DetectorConfig]: + ret_val : Dict[str, DetectorConfig] if isinstance(percounts, dict): - ret_val = { key : DetectorSchema(name=key, all_elements=val) for key,val in percounts.items() } + ret_val = { key : DetectorConfig(name=key, all_elements=val) for key,val in percounts.items() } else: ret_val = {} Logger.Log("Per-count detectors map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parseAggregateDetectors(aggregates) -> Dict[str, DetectorSchema]: - ret_val : Dict[str, DetectorSchema] + def _parseAggregateDetectors(aggregates) -> Dict[str, DetectorConfig]: + ret_val : Dict[str, DetectorConfig] if isinstance(aggregates, dict): - ret_val = {key : DetectorSchema(name=key, all_elements=val) for key,val in aggregates.items()} + ret_val = {key : DetectorConfig(name=key, all_elements=val) for key,val in aggregates.items()} else: ret_val = {} Logger.Log("Per-count detectors map was not a dict, defaulting to empty dict", logging.WARN) diff --git a/src/ogd/common/configs/games/FeatureConfig.py b/src/ogd/common/configs/games/FeatureConfig.py index 2ae3556..abd63d8 100644 --- a/src/ogd/common/configs/games/FeatureConfig.py +++ b/src/ogd/common/configs/games/FeatureConfig.py @@ -63,7 +63,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "SubfeatureConfig": return SubfeatureConfig( - name="DefaultSubfeatureSchema", + name="DefaultSubFeatureConfig", return_type=cls._DEFAULT_RETURN_TYPE, description=cls._DEFAULT_DESCRIPTION, other_elements={} @@ -97,7 +97,7 @@ def _parseDescription(description): # *** PRIVATE METHODS *** -class FeatureConfig(GeneratorSchema): +class FeatureConfig(GeneratorConfig): """Base class for all schemas related to defining feature Extractor configurations. """ diff --git a/src/ogd/common/configs/games/FeatureMapConfig.py b/src/ogd/common/configs/games/FeatureMapConfig.py index 66d2f71..762e627 100644 --- a/src/ogd/common/configs/games/FeatureMapConfig.py +++ b/src/ogd/common/configs/games/FeatureMapConfig.py @@ -20,13 +20,13 @@ class FeatureMapConfig(Schema): # *** BUILT-INS & PROPERTIES *** - def __init__(self, name:str, legacy_mode: bool, legacy_perlevel_feats:Dict[str, PerCountSchema], - percount_feats:Dict[str, PerCountSchema], aggregate_feats:Dict[str, AggregateSchema], + def __init__(self, name:str, legacy_mode: bool, legacy_perlevel_feats:Dict[str, PerCountConfig], + percount_feats:Dict[str, PerCountConfig], aggregate_feats:Dict[str, AggregateConfig], other_elements:Optional[Map]=None): self._legacy_mode : bool = legacy_mode - self._legacy_perlevel_feats : Dict[str, PerCountSchema] = legacy_perlevel_feats - self._percount_feats : Dict[str, PerCountSchema] = percount_feats - self._aggregate_feats : Dict[str, AggregateSchema] = aggregate_feats + self._legacy_perlevel_feats : Dict[str, PerCountConfig] = legacy_perlevel_feats + self._percount_feats : Dict[str, PerCountConfig] = percount_feats + self._aggregate_feats : Dict[str, AggregateConfig] = aggregate_feats super().__init__(name=name, other_elements=other_elements) @@ -35,15 +35,15 @@ def LegacyMode(self) -> bool: return self._legacy_mode @property - def LegacyPerLevelFeatures(self) -> Dict[str, PerCountSchema]: + def LegacyPerLevelFeatures(self) -> Dict[str, PerCountConfig]: return self._legacy_perlevel_feats @property - def PerCountFeatures(self) -> Dict[str, PerCountSchema]: + def PerCountFeatures(self) -> Dict[str, PerCountConfig]: return self._percount_feats @property - def AggregateFeatures(self) -> Dict[str, AggregateSchema]: + def AggregateFeatures(self) -> Dict[str, AggregateConfig]: return self._aggregate_feats # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -60,9 +60,9 @@ def AsMarkdown(self) -> str: @classmethod def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FeatureMapConfig": _legacy_mode : bool - _legacy_perlevel_feats : Dict[str, PerCountSchema] - _percount_feats : Dict[str, PerCountSchema] - _aggregate_feats : Dict[str, AggregateSchema] + _legacy_perlevel_feats : Dict[str, PerCountConfig] + _percount_feats : Dict[str, PerCountConfig] + _aggregate_feats : Dict[str, AggregateConfig] if not isinstance(all_elements, dict): all_elements = {} @@ -131,30 +131,30 @@ def _parseLegacyMode(legacy_element) -> bool: return ret_val @staticmethod - def _parsePerLevelFeatures(perlevels) -> Dict[str, PerCountSchema]: - ret_val : Dict[str, PerCountSchema] + def _parsePerLevelFeatures(perlevels) -> Dict[str, PerCountConfig]: + ret_val : Dict[str, PerCountConfig] if isinstance(perlevels, dict): - ret_val = { key : PerCountSchema.FromDict(name=key, all_elements=val) for key,val in perlevels.items() } + ret_val = { key : PerCountConfig.FromDict(name=key, all_elements=val) for key,val in perlevels.items() } else: ret_val = {} Logger.Log("Per-level features map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parsePerCountFeatures(percounts) -> Dict[str, PerCountSchema]: - ret_val : Dict[str, PerCountSchema] + def _parsePerCountFeatures(percounts) -> Dict[str, PerCountConfig]: + ret_val : Dict[str, PerCountConfig] if isinstance(percounts, dict): - ret_val = { key : PerCountSchema.FromDict(name=key, all_elements=val) for key,val in percounts.items() } + ret_val = { key : PerCountConfig.FromDict(name=key, all_elements=val) for key,val in percounts.items() } else: ret_val = {} Logger.Log("Per-count features map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parseAggregateFeatures(aggregates) -> Dict[str, AggregateSchema]: - ret_val : Dict[str, AggregateSchema] + def _parseAggregateFeatures(aggregates) -> Dict[str, AggregateConfig]: + ret_val : Dict[str, AggregateConfig] if isinstance(aggregates, dict): - ret_val = {key : AggregateSchema(name=key, other_elements=val) for key,val in aggregates.items()} + ret_val = {key : AggregateConfig(name=key, other_elements=val) for key,val in aggregates.items()} else: ret_val = {} Logger.Log("Per-count features map was not a dict, defaulting to empty dict", logging.WARN) diff --git a/src/ogd/common/configs/games/PerCountConfig.py b/src/ogd/common/configs/games/PerCountConfig.py index 458009e..0e39108 100644 --- a/src/ogd/common/configs/games/PerCountConfig.py +++ b/src/ogd/common/configs/games/PerCountConfig.py @@ -6,7 +6,7 @@ from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class PerCountConfig(FeatureSchema): +class PerCountConfig(FeatureConfig): _DEFAULT_COUNT = 1 _DEFAULT_PREFIX = "pre" @@ -66,7 +66,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "PerCountConfig": return PerCountConfig( - name="DefaultPerCountSchema", + name="DefaultPerCountConfig", count=cls._DEFAULT_COUNT, prefix=cls._DEFAULT_PREFIX, other_elements={} diff --git a/src/ogd/common/schemas/games/GameSchema.py b/src/ogd/common/schemas/games/GameSchema.py index eee33e3..3d7b8bb 100644 --- a/src/ogd/common/schemas/games/GameSchema.py +++ b/src/ogd/common/schemas/games/GameSchema.py @@ -47,9 +47,9 @@ class GameSchema(Schema): def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], game_state:Map, user_data:Map, event_list:List[EventSchema], - detector_map:Dict[str, Dict[str, DetectorSchema]], - aggregate_feats: Dict[str, AggregateSchema], percount_feats:Dict[str, PerCountSchema], - legacy_perlevel_feats: Dict[str, PerCountSchema], use_legacy_mode:bool, + detector_map:Dict[str, Dict[str, DetectorConfig]], + aggregate_feats: Dict[str, AggregateConfig], percount_feats:Dict[str, PerCountConfig], + legacy_perlevel_feats: Dict[str, PerCountConfig], use_legacy_mode:bool, config:Map, min_level:Optional[int], max_level:Optional[int], other_ranges:Dict[str, range], supported_vers:Optional[List[int]], other_elements:Optional[Map]=None): """Constructor for the GameSchema class. @@ -74,13 +74,13 @@ def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], :param event_list: _description_ :type event_list: List[EventSchema] :param detector_map: _description_ - :type detector_map: Dict[str, Dict[str, DetectorSchema]] + :type detector_map: Dict[str, Dict[str, DetectorConfig]] :param aggregate_feats: _description_ - :type aggregate_feats: Dict[str, AggregateSchema] + :type aggregate_feats: Dict[str, AggregateConfig] :param percount_feats: _description_ :type percount_feats: Dict[str, PerCountFeatures] :param legacy_perlevel_feats: _description_ - :type legacy_perlevel_feats: Dict[str, PerCountSchema] + :type legacy_perlevel_feats: Dict[str, PerCountConfig] :param use_legacy_mode: _description_ :type use_legacy_mode: bool :param config: _description_ @@ -104,10 +104,10 @@ def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], self._game_state : Map = game_state self._user_data : Map = user_data self._event_list : List[EventSchema] = event_list - self._detector_map : Dict[str, Dict[str, DetectorSchema]] = detector_map - self._aggregate_feats : Dict[str, AggregateSchema] = aggregate_feats - self._percount_feats : Dict[str, PerCountSchema] = percount_feats - self._legacy_perlevel_feats : Dict[str, PerCountSchema] = legacy_perlevel_feats + self._detector_map : Dict[str, Dict[str, DetectorConfig]] = detector_map + self._aggregate_feats : Dict[str, AggregateConfig] = aggregate_feats + self._percount_feats : Dict[str, PerCountConfig] = percount_feats + self._legacy_perlevel_feats : Dict[str, PerCountConfig] = legacy_perlevel_feats self._legacy_mode : bool = use_legacy_mode self._config : Map = config self._min_level : Optional[int] = min_level @@ -157,7 +157,7 @@ def EventTypes(self) -> List[str]: return [event.Name for event in self.Events] @property - def Detectors(self) -> Dict[str, Dict[str, DetectorSchema]]: + def Detectors(self) -> Dict[str, Dict[str, DetectorConfig]]: """Property for the dictionary of categorized detectors to extract. """ return self._detector_map @@ -172,19 +172,19 @@ def DetectorNames(self) -> List[str]: return ret_val @property - def PerCountDetectors(self) -> Dict[str, DetectorSchema]: + def PerCountDetectors(self) -> Dict[str, DetectorConfig]: """Property for the dictionary of per-custom-count detectors. """ return self.Detectors.get("per_count", {}) @property - def AggregateDetectors(self) -> Dict[str, DetectorSchema]: + def AggregateDetectors(self) -> Dict[str, DetectorConfig]: """Property for the dictionary of aggregate detectors. """ return self.Detectors.get("aggregate", {}) @property - def Features(self) -> Dict[str, Union[Dict[str, AggregateSchema], Dict[str, PerCountSchema]]]: + def Features(self) -> Dict[str, Union[Dict[str, AggregateConfig], Dict[str, PerCountConfig]]]: """Property for the dictionary of categorized features to extract. """ return { 'aggregate' : self._aggregate_feats, 'per_count' : self._percount_feats, 'perlevel' : self._legacy_perlevel_feats } @@ -199,19 +199,19 @@ def FeatureNames(self) -> List[str]: return ret_val @property - def LegacyPerLevelFeatures(self) -> Dict[str,PerCountSchema]: + def LegacyPerLevelFeatures(self) -> Dict[str,PerCountConfig]: """Property for the dictionary of legacy per-level features """ return self._legacy_perlevel_feats @property - def PerCountFeatures(self) -> Dict[str,PerCountSchema]: + def PerCountFeatures(self) -> Dict[str,PerCountConfig]: """Property for the dictionary of per-custom-count features. """ return self._percount_feats @property - def AggregateFeatures(self) -> Dict[str,AggregateSchema]: + def AggregateFeatures(self) -> Dict[str,AggregateConfig]: """Property for the dictionary of aggregate features. """ return self._aggregate_feats @@ -305,10 +305,10 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _game_state : Dict[str, Any] _user_data : Dict[str, Any] _event_list : List[EventSchema] - _detector_map : Dict[str, Dict[str, DetectorSchema]] - _aggregate_feats : Dict[str, AggregateSchema] = {} - _percount_feats : Dict[str, PerCountSchema] = {} - _legacy_perlevel_feats : Dict[str, PerCountSchema] = {} + _detector_map : Dict[str, Dict[str, DetectorConfig]] + _aggregate_feats : Dict[str, AggregateConfig] = {} + _percount_feats : Dict[str, PerCountConfig] = {} + _legacy_perlevel_feats : Dict[str, PerCountConfig] = {} _legacy_mode : bool _config : Dict[str, Any] _min_level : Optional[int] @@ -349,7 +349,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging parser_function=cls._parseDetectorMap, default_value=cls._DEFAULT_DETECTOR_MAP ) - _detector_map = _detector_map.AsDict # TODO : investigate weird Dict[str, Dict[str, DetectorSchema]] type inference + _detector_map = _detector_map.AsDict # TODO : investigate weird Dict[str, Dict[str, DetectorConfig]] type inference # 4. Get feature information _feat_map = cls.ElementFromDict(all_elements=all_elements, logger=logger, @@ -465,7 +465,7 @@ def DetectorEnabled(self, detector_name:str, iter_mode:IterationMode, extract_mo return False ret_val : bool - _detector_schema : Optional[DetectorSchema] + _detector_schema : Optional[DetectorConfig] match iter_mode: case IterationMode.AGGREGATE: _detector_schema = self.Detectors['aggregate'].get(detector_name) @@ -485,7 +485,7 @@ def FeatureEnabled(self, feature_name:str, iter_mode:IterationMode, extract_mode return feature_name == "legacy" ret_val : bool - _feature_schema : Optional[FeatureSchema] + _feature_schema : Optional[FeatureConfig] match iter_mode: case IterationMode.AGGREGATE: _feature_schema = self.AggregateFeatures.get(feature_name) @@ -500,10 +500,10 @@ def FeatureEnabled(self, feature_name:str, iter_mode:IterationMode, extract_mode ret_val = False return ret_val - def EnabledDetectors(self, iter_modes:Set[IterationMode], extract_modes:Set[ExtractionMode]=set()) -> Dict[str, DetectorSchema]: + def EnabledDetectors(self, iter_modes:Set[IterationMode], extract_modes:Set[ExtractionMode]=set()) -> Dict[str, DetectorConfig]: if self._legacy_mode: return {} - ret_val : Dict[str, DetectorSchema] = {} + ret_val : Dict[str, DetectorConfig] = {} if IterationMode.AGGREGATE in iter_modes: ret_val.update({key:val for key,val in self.AggregateDetectors.items() if val.Enabled.issuperset(extract_modes)}) @@ -511,10 +511,10 @@ def EnabledDetectors(self, iter_modes:Set[IterationMode], extract_modes:Set[Extr ret_val.update({key:val for key,val in self.PerCountDetectors.items() if val.Enabled.issuperset(extract_modes)}) return ret_val - def EnabledFeatures(self, iter_modes:Set[IterationMode]={IterationMode.AGGREGATE, IterationMode.PERCOUNT}, extract_modes:Set[ExtractionMode]=set()) -> Dict[str, FeatureSchema]: + def EnabledFeatures(self, iter_modes:Set[IterationMode]={IterationMode.AGGREGATE, IterationMode.PERCOUNT}, extract_modes:Set[ExtractionMode]=set()) -> Dict[str, FeatureConfig]: if self._legacy_mode: - return {"legacy" : AggregateSchema("legacy", {"type":"legacy", "return_type":None, "description":"", "enabled":True})} if IterationMode.AGGREGATE in iter_modes else {} - ret_val : Dict[str, FeatureSchema] = {} + return {"legacy" : AggregateConfig("legacy", {"type":"legacy", "return_type":None, "description":"", "enabled":True})} if IterationMode.AGGREGATE in iter_modes else {} + ret_val : Dict[str, FeatureConfig] = {} if IterationMode.AGGREGATE in iter_modes: ret_val.update({key:val for key,val in self.AggregateFeatures.items() if val.Enabled.issuperset(extract_modes)}) diff --git a/src/ogd/common/schemas/games/__init__.py b/src/ogd/common/schemas/games/__init__.py index c4f4cdb..83851af 100644 --- a/src/ogd/common/schemas/games/__init__.py +++ b/src/ogd/common/schemas/games/__init__.py @@ -1,17 +1,17 @@ __all__ = [ "EventSchema", - "GeneratorSchema", - "DetectorSchema", - "FeatureSchema", - "AggregateSchema", - "PerCountSchema", + "GeneratorConfig", + "DetectorConfig", + "FeatureConfig", + "AggregateConfig", + "PerCountConfig", "GameSchema" ] from . import EventSchema -from . import GeneratorSchema -from . import DetectorSchema -from . import FeatureSchema -from . import AggregateSchema -from . import PerCountSchema +from . import GeneratorConfig +from . import DetectorConfig +from . import FeatureConfig +from . import AggregateConfig +from . import PerCountConfig from . import GameSchema diff --git a/tests/cases/schemas/games/t_AggregateSchema.py b/tests/cases/schemas/games/t_AggregateSchema.py index 9aa71c8..a4c667e 100644 --- a/tests/cases/schemas/games/t_AggregateSchema.py +++ b/tests/cases/schemas/games/t_AggregateSchema.py @@ -7,10 +7,10 @@ from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.AggregateSchema import AggregateSchema +from src.ogd.common.configs.games.AggregateConfig import AggregateConfig from tests.config.t_config import settings -class t_AggregateSchema(TestCase): +class t_AggregateConfig(TestCase): """Testbed for the GameSourceSchema class. TODO : Test more 'enabled' options/combinations. @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - cls.test_schema = AggregateSchema( + cls.test_schema = AggregateConfig( name="ActiveTime Schema", other_elements=_elems ) @@ -113,7 +113,7 @@ def test_FromDict(self): } } _modes = { ExtractionMode.SESSION, ExtractionMode.PLAYER, ExtractionMode.POPULATION, ExtractionMode.DETECTOR } - _schema = AggregateSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + _schema = AggregateConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "ActiveTime Schema") self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_DetectorSchema.py b/tests/cases/schemas/games/t_DetectorSchema.py index 3c83edd..b4a8772 100644 --- a/tests/cases/schemas/games/t_DetectorSchema.py +++ b/tests/cases/schemas/games/t_DetectorSchema.py @@ -6,12 +6,12 @@ from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.DetectorSchema import DetectorSchema +from src.ogd.common.schemas.games.DetectorConfig import DetectorConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_DetectorSchema(TestCase): - """Testbed for the DetectorSchema class. +class t_DetectorConfig(TestCase): + """Testbed for the DetectorConfig class. TODO : Implement tests """ @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - cls.test_schema = DetectorSchema( + cls.test_schema = DetectorConfig( name="ActiveTime Schema", other_elements=_elems ) @@ -101,7 +101,7 @@ def test_FromDict(self): } } } - _schema = DetectorSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + _schema = DetectorConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "ActiveTime Schema") self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_FeatureSchema.py b/tests/cases/schemas/games/t_FeatureSchema.py index 2c0bb45..7ede35d 100644 --- a/tests/cases/schemas/games/t_FeatureSchema.py +++ b/tests/cases/schemas/games/t_FeatureSchema.py @@ -6,12 +6,12 @@ from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.FeatureSchema import FeatureSchema +from src.ogd.common.schemas.games.FeatureConfig import FeatureConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_FeatureSchema(TestCase): - """Testbed for the DetectorSchema class. +class t_FeatureConfig(TestCase): + """Testbed for the DetectorConfig class. TODO : Implement tests """ @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - # cls.test_schema = FeatureSchema( + # cls.test_schema = FeatureConfig( # name="ActiveTime Schema", # all_elements=_elems # ) @@ -128,7 +128,7 @@ def test_FromDict(self): } } } - # _schema = DetectorSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + # _schema = DetectorConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) # self.assertIsInstance(_schema.Name, str) # self.assertEqual(_schema.Name, "ActiveTime Schema") # self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_GeneratorSchema.py b/tests/cases/schemas/games/t_GeneratorSchema.py index e0574b2..19f00c0 100644 --- a/tests/cases/schemas/games/t_GeneratorSchema.py +++ b/tests/cases/schemas/games/t_GeneratorSchema.py @@ -6,12 +6,12 @@ from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.GeneratorSchema import GeneratorSchema +from src.ogd.common.schemas.games.GeneratorConfig import GeneratorConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_GeneratorSchema(TestCase): - """Testbed for the DetectorSchema class. +class t_GeneratorConfig(TestCase): + """Testbed for the DetectorConfig class. TODO : Implement tests TODO : Create a basic testing implementation @@ -38,7 +38,7 @@ def setUpClass(cls) -> None: } } } - # cls.test_schema = GeneratorSchema( + # cls.test_schema = GeneratorConfig( # name="ActiveTime Schema", # all_elements=_elems # ) @@ -108,7 +108,7 @@ def test_FromDict(self): } } } - # _schema = GeneratorSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + # _schema = GeneratorConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) # self.assertIsInstance(_schema.Name, str) # self.assertEqual(_schema.Name, "ActiveTime Schema") # self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_PerCountSchema.py b/tests/cases/schemas/games/t_PerCountSchema.py index 472e5dd..755576a 100644 --- a/tests/cases/schemas/games/t_PerCountSchema.py +++ b/tests/cases/schemas/games/t_PerCountSchema.py @@ -7,11 +7,11 @@ from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.PerCountSchema import PerCountSchema +from src.ogd.common.schemas.games.PerCountConfig import PerCountConfig from tests.config.t_config import settings -class t_PerCountSchema(TestCase): - """Testbed for the PerCountSchema class. +class t_PerCountConfig(TestCase): + """Testbed for the PerCountConfig class. TODO : Test more 'enabled' options/combinations. """ @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - cls.test_schema = PerCountSchema( + cls.test_schema = PerCountConfig( name="ActiveTime Schema", count=5, prefix="lvl", @@ -115,7 +115,7 @@ def test_FromDict(self): } } _modes = { ExtractionMode.SESSION, ExtractionMode.PLAYER, ExtractionMode.POPULATION, ExtractionMode.DETECTOR } - _schema = PerCountSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + _schema = PerCountConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "ActiveTime Schema") self.assertIsInstance(_schema.TypeName, str) From 92380dfa45b4b580fc40e6ab16e61fc8be6b7e4e Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 18:30:45 -0600 Subject: [PATCH 106/124] Do search-and-replace on more schemas-turned-configs. --- .github/workflows/TEST_GameSchemas.yml | 4 ++-- .../common/configs/games/DetectorMapConfig.py | 2 +- .../common/configs/games/FeatureMapConfig.py | 2 +- src/ogd/common/schemas/games/GameSchema.py | 18 +++++++++--------- .../cases/schemas/games/t_DetectorMapSchema.py | 8 ++++---- .../cases/schemas/games/t_FeatureMapSchema.py | 8 ++++---- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/TEST_GameSchemas.yml b/.github/workflows/TEST_GameSchemas.yml index a124fba..412e899 100644 --- a/.github/workflows/TEST_GameSchemas.yml +++ b/.github/workflows/TEST_GameSchemas.yml @@ -26,10 +26,10 @@ jobs: testbed: [ t_AggregateConfig, t_DataElementSchema, - t_DetectorMapSchema, + t_DetectorMapConfig, t_DetectorConfig, t_EventSchema, - t_FeatureMapSchema, + t_FeatureMapConfig, t_FeatureConfig, t_GameSchema, t_GameStateSchema, diff --git a/src/ogd/common/configs/games/DetectorMapConfig.py b/src/ogd/common/configs/games/DetectorMapConfig.py index b6332bf..f496fb3 100644 --- a/src/ogd/common/configs/games/DetectorMapConfig.py +++ b/src/ogd/common/configs/games/DetectorMapConfig.py @@ -92,7 +92,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "DetectorMapConfig": return DetectorMapConfig( - name="DefaultDetectorMapSchema", + name="DefaultDetectorMapConfig", perlevel_detectors=cls._DEFAULT_PERLEVEL_DETECTORS, percount_detectors=cls._DEFAULT_PERCOUNT_DETECTORS, aggregate_detectors=cls._DEFAULT_AGGREGATE_DETECTORS, diff --git a/src/ogd/common/configs/games/FeatureMapConfig.py b/src/ogd/common/configs/games/FeatureMapConfig.py index 762e627..c06c93d 100644 --- a/src/ogd/common/configs/games/FeatureMapConfig.py +++ b/src/ogd/common/configs/games/FeatureMapConfig.py @@ -97,7 +97,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def Default(cls) -> "FeatureMapConfig": return FeatureMapConfig( - name="DefaultFeatureMapSchema", + name="DefaultFeatureMapConfig", legacy_mode=cls._DEFAULT_LEGACY_MODE, legacy_perlevel_feats=cls._DEFAULT_LEGACY_FEATS, percount_feats=cls._DEFAULT_PERCOUNT_FEATS, diff --git a/src/ogd/common/schemas/games/GameSchema.py b/src/ogd/common/schemas/games/GameSchema.py index 3d7b8bb..ed86123 100644 --- a/src/ogd/common/schemas/games/GameSchema.py +++ b/src/ogd/common/schemas/games/GameSchema.py @@ -58,7 +58,7 @@ def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], all features to be extracted. TODO: need to get game_state from schema file, and use a GameStateSchema instead of general Map. - TODO: Use DetectorMapSchema and FeatureMapSchema instead of just dicts... I think. Depending how these all work together. + TODO: Use DetectorMapConfig and FeatureMapConfig instead of just dicts... I think. Depending how these all work together. TODO : make parser functions for config and versions, so we can do ElementFromDict for them as well. :param name: _description_ @@ -609,22 +609,22 @@ def _parseEventList(events_list:Dict[str, Any]) -> List[EventSchema]: return ret_val @staticmethod - def _parseDetectorMap(detector_map:Dict[str, Any]) -> DetectorMapSchema: - ret_val : DetectorMapSchema + def _parseDetectorMap(detector_map:Dict[str, Any]) -> DetectorMapConfig: + ret_val : DetectorMapConfig if isinstance(detector_map, dict): - ret_val = DetectorMapSchema.FromDict(name=f"Detectors", all_elements=detector_map) + ret_val = DetectorMapConfig.FromDict(name=f"Detectors", all_elements=detector_map) else: - ret_val = DetectorMapSchema.FromDict(name="Empty Features", all_elements={}) + ret_val = DetectorMapConfig.FromDict(name="Empty Features", all_elements={}) Logger.Log(f"detector_map was unexpected type {type(detector_map)}, defaulting to empty map.", logging.WARN) return ret_val @staticmethod - def _parseFeatureMap(feature_map:Dict[str, Any]) -> FeatureMapSchema: - ret_val : FeatureMapSchema + def _parseFeatureMap(feature_map:Dict[str, Any]) -> FeatureMapConfig: + ret_val : FeatureMapConfig if isinstance(feature_map, dict): - ret_val = FeatureMapSchema.FromDict(name=f"Features", all_elements=feature_map) + ret_val = FeatureMapConfig.FromDict(name=f"Features", all_elements=feature_map) else: - ret_val = FeatureMapSchema.FromDict(name="Empty Features", all_elements={}) + ret_val = FeatureMapConfig.FromDict(name="Empty Features", all_elements={}) Logger.Log(f"feature_map was unexpected type {type(feature_map)}, defaulting to empty map.", logging.WARN) return ret_val diff --git a/tests/cases/schemas/games/t_DetectorMapSchema.py b/tests/cases/schemas/games/t_DetectorMapSchema.py index e040cb6..5544f72 100644 --- a/tests/cases/schemas/games/t_DetectorMapSchema.py +++ b/tests/cases/schemas/games/t_DetectorMapSchema.py @@ -6,11 +6,11 @@ from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.DetectorMapSchema import DetectorMapSchema +from src.ogd.common.schemas.games.DetectorMapConfig import DetectorMapConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_DetectorMapSchema(TestCase): +class t_DetectorMapConfig(TestCase): """Testbed for the GameSourceSchema class. TODO : Implement and enable tests. @@ -24,7 +24,7 @@ def setUpClass(cls) -> None: Logger.std_logger.setLevel(_level) # 2. Set up local instance of testing class - cls.test_schema = DetectorMapSchema( + cls.test_schema = DetectorMapConfig( name="available_building Schema", perlevel_detectors={}, percount_detectors={}, @@ -93,7 +93,7 @@ def test_FromDict(self): }, "description" : "The buildings available for the player to construct" } - _schema = DetectorMapSchema.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) + _schema = DetectorMapConfig.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "available_buildings Schema") diff --git a/tests/cases/schemas/games/t_FeatureMapSchema.py b/tests/cases/schemas/games/t_FeatureMapSchema.py index 17fb436..3e87363 100644 --- a/tests/cases/schemas/games/t_FeatureMapSchema.py +++ b/tests/cases/schemas/games/t_FeatureMapSchema.py @@ -6,11 +6,11 @@ from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.FeatureMapSchema import FeatureMapSchema +from src.ogd.common.schemas.games.FeatureMapConfig import FeatureMapConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_FeatureMapSchema(TestCase): +class t_FeatureMapConfig(TestCase): """Testbed for the GameSourceSchema class. TODO : Implement and enable tests. @@ -24,7 +24,7 @@ def setUpClass(cls) -> None: Logger.std_logger.setLevel(_level) # 2. Set up local instance of testing class - cls.test_schema = FeatureMapSchema( + cls.test_schema = FeatureMapConfig( name="available_building Schema", legacy_mode=True, legacy_perlevel_feats={}, @@ -101,7 +101,7 @@ def test_FromDict(self): }, "description" : "The buildings available for the player to construct" } - _schema = FeatureMapSchema.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) + _schema = FeatureMapConfig.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "available_buildings Schema") From 4ff456053b4948d6c81cf14caaabdd0445dd5ad5 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 18:34:02 -0600 Subject: [PATCH 107/124] Update a bunch of import paths for games configs, and regular configs. --- src/ogd/common/connectors/StorageConnector.py | 2 +- src/ogd/common/connectors/interfaces/BQFirebaseInterface.py | 2 +- .../common/connectors/interfaces/BigQueryCodingInterface.py | 2 +- src/ogd/common/connectors/interfaces/BigQueryInterface.py | 2 +- src/ogd/common/connectors/interfaces/CSVInterface.py | 2 +- src/ogd/common/connectors/interfaces/Interface.py | 2 +- src/ogd/common/connectors/outerfaces/DataOuterface.py | 2 +- src/ogd/common/connectors/outerfaces/DebugOuterface.py | 2 +- src/ogd/common/connectors/outerfaces/DictionaryOuterface.py | 2 +- src/ogd/common/connectors/outerfaces/TSVOuterface.py | 4 ++-- tests/TestDriver.py | 2 +- tests/cases/interfaces/t_CSVInterface.py | 2 +- tests/cases/schemas/config/t_GameSourceSchema.py | 2 +- tests/cases/schemas/config/t_IndexingSchema.py | 2 +- tests/cases/schemas/config/t_TestConfigSchema.py | 2 +- tests/cases/schemas/games/t_AggregateSchema.py | 2 +- tests/cases/schemas/games/t_DataElementSchema.py | 2 +- tests/cases/schemas/games/t_DetectorMapSchema.py | 2 +- tests/cases/schemas/games/t_DetectorSchema.py | 2 +- tests/cases/schemas/games/t_EventSchema.py | 2 +- tests/cases/schemas/games/t_FeatureMapSchema.py | 2 +- tests/cases/schemas/games/t_FeatureSchema.py | 2 +- tests/cases/schemas/games/t_GameSchema.py | 2 +- tests/cases/schemas/games/t_GameStateSchema.py | 2 +- tests/cases/schemas/games/t_GeneratorSchema.py | 2 +- tests/cases/schemas/games/t_PerCountSchema.py | 2 +- tests/cases/schemas/t_Schema.py | 2 +- 27 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/ogd/common/connectors/StorageConnector.py b/src/ogd/common/connectors/StorageConnector.py index 4246031..9177eb4 100644 --- a/src/ogd/common/connectors/StorageConnector.py +++ b/src/ogd/common/connectors/StorageConnector.py @@ -6,7 +6,7 @@ import logging # import local files -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger class StorageConnector(abc.ABC): diff --git a/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py index d1a81ef..b2634fe 100644 --- a/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py +++ b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py @@ -6,7 +6,7 @@ # import locals from ogd.common.connectors.interfaces.BigQueryInterface import BigQueryInterface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger AQUALAB_MIN_VERSION : Final[float] = 6.2 diff --git a/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py index 01dc4c3..c53be27 100644 --- a/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py @@ -7,7 +7,7 @@ from ogd.common.models.coding.Coder import Coder from ogd.common.connectors.interfaces.CodingInterface import CodingInterface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger # TODO: see about merging this back into BigQueryInterface for a unified interface. diff --git a/src/ogd/common/connectors/interfaces/BigQueryInterface.py b/src/ogd/common/connectors/interfaces/BigQueryInterface.py index a258c35..9e1ee61 100644 --- a/src/ogd/common/connectors/interfaces/BigQueryInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryInterface.py @@ -8,7 +8,7 @@ # import locals from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/connectors/interfaces/CSVInterface.py b/src/ogd/common/connectors/interfaces/CSVInterface.py index e59dd60..0e16e04 100644 --- a/src/ogd/common/connectors/interfaces/CSVInterface.py +++ b/src/ogd/common/connectors/interfaces/CSVInterface.py @@ -7,7 +7,7 @@ ## import local files from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.tables.TableSchema import TableSchema from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py index f351310..1404c3c 100644 --- a/src/ogd/common/connectors/interfaces/Interface.py +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -16,7 +16,7 @@ from ogd.common.models.FeatureDataset import FeatureDataset from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.VersionType import VersionType -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.tables.EventTableSchema import EventTableSchema from ogd.common.schemas.tables.FeatureTableSchema import FeatureTableSchema from ogd.common.utils.SemanticVersion import SemanticVersion diff --git a/src/ogd/common/connectors/outerfaces/DataOuterface.py b/src/ogd/common/connectors/outerfaces/DataOuterface.py index 445ba4a..803f584 100644 --- a/src/ogd/common/connectors/outerfaces/DataOuterface.py +++ b/src/ogd/common/connectors/outerfaces/DataOuterface.py @@ -9,7 +9,7 @@ from ogd.common.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/src/ogd/common/connectors/outerfaces/DebugOuterface.py b/src/ogd/common/connectors/outerfaces/DebugOuterface.py index e7d349a..0f2b361 100644 --- a/src/ogd/common/connectors/outerfaces/DebugOuterface.py +++ b/src/ogd/common/connectors/outerfaces/DebugOuterface.py @@ -7,7 +7,7 @@ # import OGD files from ogd.common.interfaces.outerfaces.DataOuterface import DataOuterface from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py b/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py index 9797e4a..358d93b 100644 --- a/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py +++ b/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py @@ -5,7 +5,7 @@ # import local files from ogd.common.interfaces.outerfaces.DataOuterface import DataOuterface from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/src/ogd/common/connectors/outerfaces/TSVOuterface.py b/src/ogd/common/connectors/outerfaces/TSVOuterface.py index 70e139b..ddd1d8f 100644 --- a/src/ogd/common/connectors/outerfaces/TSVOuterface.py +++ b/src/ogd/common/connectors/outerfaces/TSVOuterface.py @@ -18,10 +18,10 @@ from ogd.common.interfaces.outerfaces.DataOuterface import DataOuterface from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.games.GameSchema import GameSchema from ogd.common.schemas.tables.TableSchema import TableSchema -from ogd.common.schemas.configs.IndexingSchema import FileIndexingSchema +from ogd.common.configs.IndexingSchema import FileIndexingSchema from ogd.common.utils import fileio from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/tests/TestDriver.py b/tests/TestDriver.py index 67e0ce2..aee4065 100644 --- a/tests/TestDriver.py +++ b/tests/TestDriver.py @@ -11,7 +11,7 @@ import logging from ogd.common.utils.Logger import Logger Logger.InitializeLogger(level=logging.INFO, use_logfile=False) -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from config.t_config import settings diff --git a/tests/cases/interfaces/t_CSVInterface.py b/tests/cases/interfaces/t_CSVInterface.py index d9905cb..95a1344 100644 --- a/tests/cases/interfaces/t_CSVInterface.py +++ b/tests/cases/interfaces/t_CSVInterface.py @@ -6,7 +6,7 @@ from zipfile import ZipFile # import locals from ogd.common.connectors.interfaces.CSVInterface import CSVInterface -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema class t_CSVInterface(TestCase): TEST_MIN_DATE : Final[datetime] = datetime(year=2021, month=2, day=1, hour= 0, minute=0, second=0) diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 9fdde2b..0bfa1c7 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -7,7 +7,7 @@ # import ogd libraries. from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema diff --git a/tests/cases/schemas/config/t_IndexingSchema.py b/tests/cases/schemas/config/t_IndexingSchema.py index 308793a..39490a7 100644 --- a/tests/cases/schemas/config/t_IndexingSchema.py +++ b/tests/cases/schemas/config/t_IndexingSchema.py @@ -4,7 +4,7 @@ from pathlib import Path from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.configs.IndexingSchema import FileIndexingSchema diff --git a/tests/cases/schemas/config/t_TestConfigSchema.py b/tests/cases/schemas/config/t_TestConfigSchema.py index bbd59f1..9f8c787 100644 --- a/tests/cases/schemas/config/t_TestConfigSchema.py +++ b/tests/cases/schemas/config/t_TestConfigSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema as TestConfigLocal diff --git a/tests/cases/schemas/games/t_AggregateSchema.py b/tests/cases/schemas/games/t_AggregateSchema.py index a4c667e..31a0e2d 100644 --- a/tests/cases/schemas/games/t_AggregateSchema.py +++ b/tests/cases/schemas/games/t_AggregateSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals diff --git a/tests/cases/schemas/games/t_DataElementSchema.py b/tests/cases/schemas/games/t_DataElementSchema.py index 1457b97..79432d8 100644 --- a/tests/cases/schemas/games/t_DataElementSchema.py +++ b/tests/cases/schemas/games/t_DataElementSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DataElementSchema import DataElementSchema diff --git a/tests/cases/schemas/games/t_DetectorMapSchema.py b/tests/cases/schemas/games/t_DetectorMapSchema.py index 5544f72..6c7192d 100644 --- a/tests/cases/schemas/games/t_DetectorMapSchema.py +++ b/tests/cases/schemas/games/t_DetectorMapSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DetectorMapConfig import DetectorMapConfig diff --git a/tests/cases/schemas/games/t_DetectorSchema.py b/tests/cases/schemas/games/t_DetectorSchema.py index b4a8772..1e30b93 100644 --- a/tests/cases/schemas/games/t_DetectorSchema.py +++ b/tests/cases/schemas/games/t_DetectorSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DetectorConfig import DetectorConfig diff --git a/tests/cases/schemas/games/t_EventSchema.py b/tests/cases/schemas/games/t_EventSchema.py index 0256c1b..6e50d7a 100644 --- a/tests/cases/schemas/games/t_EventSchema.py +++ b/tests/cases/schemas/games/t_EventSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.EventSchema import EventSchema diff --git a/tests/cases/schemas/games/t_FeatureMapSchema.py b/tests/cases/schemas/games/t_FeatureMapSchema.py index 3e87363..d827d53 100644 --- a/tests/cases/schemas/games/t_FeatureMapSchema.py +++ b/tests/cases/schemas/games/t_FeatureMapSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.FeatureMapConfig import FeatureMapConfig diff --git a/tests/cases/schemas/games/t_FeatureSchema.py b/tests/cases/schemas/games/t_FeatureSchema.py index 7ede35d..bb34450 100644 --- a/tests/cases/schemas/games/t_FeatureSchema.py +++ b/tests/cases/schemas/games/t_FeatureSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.FeatureConfig import FeatureConfig diff --git a/tests/cases/schemas/games/t_GameSchema.py b/tests/cases/schemas/games/t_GameSchema.py index f79a230..6814feb 100644 --- a/tests/cases/schemas/games/t_GameSchema.py +++ b/tests/cases/schemas/games/t_GameSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GameSchema import GameSchema diff --git a/tests/cases/schemas/games/t_GameStateSchema.py b/tests/cases/schemas/games/t_GameStateSchema.py index 3e7f862..0b6834d 100644 --- a/tests/cases/schemas/games/t_GameStateSchema.py +++ b/tests/cases/schemas/games/t_GameStateSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GameStateSchema import GameStateSchema diff --git a/tests/cases/schemas/games/t_GeneratorSchema.py b/tests/cases/schemas/games/t_GeneratorSchema.py index 19f00c0..ee08c55 100644 --- a/tests/cases/schemas/games/t_GeneratorSchema.py +++ b/tests/cases/schemas/games/t_GeneratorSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GeneratorConfig import GeneratorConfig diff --git a/tests/cases/schemas/games/t_PerCountSchema.py b/tests/cases/schemas/games/t_PerCountSchema.py index 755576a..6093645 100644 --- a/tests/cases/schemas/games/t_PerCountSchema.py +++ b/tests/cases/schemas/games/t_PerCountSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals diff --git a/tests/cases/schemas/t_Schema.py b/tests/cases/schemas/t_Schema.py index 3f6645c..9c545e1 100644 --- a/tests/cases/schemas/t_Schema.py +++ b/tests/cases/schemas/t_Schema.py @@ -5,7 +5,7 @@ from typing import Any, Dict, Optional from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfigSchema import TestConfigSchema from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.Schema import Schema From b1b72b7163611a0bd0c3e46fc22d62f376341169 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 18:35:53 -0600 Subject: [PATCH 108/124] Update names used for IndexingConfig and TestConfig. --- .github/workflows/TEST_ConfigSchemas.yml | 4 ++-- src/ogd/common/configs/IndexingConfig.py | 2 +- src/ogd/common/configs/TestConfig.py | 2 +- src/ogd/common/connectors/outerfaces/TSVOuterface.py | 6 +++--- tests/TestDriver.py | 4 ++-- tests/cases/schemas/config/t_GameSourceSchema.py | 4 ++-- tests/cases/schemas/config/t_IndexingSchema.py | 12 ++++++------ tests/cases/schemas/config/t_TestConfigSchema.py | 8 ++++---- tests/cases/schemas/games/t_AggregateSchema.py | 4 ++-- tests/cases/schemas/games/t_DataElementSchema.py | 4 ++-- tests/cases/schemas/games/t_DetectorMapSchema.py | 4 ++-- tests/cases/schemas/games/t_DetectorSchema.py | 4 ++-- tests/cases/schemas/games/t_EventSchema.py | 4 ++-- tests/cases/schemas/games/t_FeatureMapSchema.py | 4 ++-- tests/cases/schemas/games/t_FeatureSchema.py | 4 ++-- tests/cases/schemas/games/t_GameSchema.py | 4 ++-- tests/cases/schemas/games/t_GameStateSchema.py | 4 ++-- tests/cases/schemas/games/t_GeneratorSchema.py | 4 ++-- tests/cases/schemas/games/t_PerCountSchema.py | 4 ++-- tests/cases/schemas/t_Schema.py | 4 ++-- 20 files changed, 45 insertions(+), 45 deletions(-) diff --git a/.github/workflows/TEST_ConfigSchemas.yml b/.github/workflows/TEST_ConfigSchemas.yml index b25f2a8..90b68a2 100644 --- a/.github/workflows/TEST_ConfigSchemas.yml +++ b/.github/workflows/TEST_ConfigSchemas.yml @@ -25,8 +25,8 @@ jobs: matrix: testbed: [ t_GameSourceSchema, - t_IndexingSchema, - t_TestConfigSchema, + t_IndexingConfig, + t_TestConfig, ] fail-fast: false # we don't want to cancel just because one testbed fails. max-parallel: 20 diff --git a/src/ogd/common/configs/IndexingConfig.py b/src/ogd/common/configs/IndexingConfig.py index c6b1131..f7befd0 100644 --- a/src/ogd/common/configs/IndexingConfig.py +++ b/src/ogd/common/configs/IndexingConfig.py @@ -37,7 +37,7 @@ def TemplatesURL(self) -> str: @classmethod def Default(cls) -> "FileIndexingConfig": return FileIndexingConfig( - name = "DefaultFileIndexingSchema", + name = "DefaultFileIndexingConfig", local_dir = cls._DEFAULT_LOCAL_DIR, remote_url = cls._DEFAULT_REMOTE_URL, templates_url = cls._DEFAULT_TEMPLATE_URL, diff --git a/src/ogd/common/configs/TestConfig.py b/src/ogd/common/configs/TestConfig.py index 62f6a46..649eccf 100644 --- a/src/ogd/common/configs/TestConfig.py +++ b/src/ogd/common/configs/TestConfig.py @@ -1,5 +1,5 @@ """ -TestConfigSchema +TestConfig Contains a Schema class for managing config data for testing configurations. In particular, base testing config files always have a `"VERBOSE"` setting, diff --git a/src/ogd/common/connectors/outerfaces/TSVOuterface.py b/src/ogd/common/connectors/outerfaces/TSVOuterface.py index ddd1d8f..e9de96d 100644 --- a/src/ogd/common/connectors/outerfaces/TSVOuterface.py +++ b/src/ogd/common/connectors/outerfaces/TSVOuterface.py @@ -21,7 +21,7 @@ from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.games.GameSchema import GameSchema from ogd.common.schemas.tables.TableSchema import TableSchema -from ogd.common.configs.IndexingSchema import FileIndexingSchema +from ogd.common.configs.IndexingConfig import FileIndexingConfig from ogd.common.utils import fileio from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow @@ -31,12 +31,12 @@ class TSVOuterface(DataOuterface): # *** BUILT-INS & PROPERTIES *** - def __init__(self, game_id:str, config:GameSourceSchema, export_modes:Set[ExportMode], date_range:Dict[str,Optional[datetime]], file_indexing:FileIndexingSchema, extension:str="tsv", dataset_id:Optional[str]=None): + def __init__(self, game_id:str, config:GameSourceSchema, export_modes:Set[ExportMode], date_range:Dict[str,Optional[datetime]], file_indexing:FileIndexingConfig, extension:str="tsv", dataset_id:Optional[str]=None): super().__init__(game_id=game_id, config=config, export_modes=export_modes) self._file_paths : Dict[str,Optional[Path]] = {"population":None, "players":None, "sessions":None, "processed_events":None, "raw_events":None} self._zip_paths : Dict[str,Optional[Path]] = {"population":None, "players":None, "sessions":None, "processed_events":None, "raw_events":None} self._files : Dict[str,Optional[IO]] = {"population":None, "players":None, "sessions":None, "processed_events":None, "raw_events":None} - self._file_indexing : FileIndexingSchema = file_indexing + self._file_indexing : FileIndexingConfig = file_indexing self._data_dir : Path = Path(f"./{self._file_indexing.LocalDirectory}") self._game_data_dir : Path = self._data_dir / self._game_id self._readme_path : Path = self._game_data_dir / "README.md" diff --git a/tests/TestDriver.py b/tests/TestDriver.py index aee4065..042ac46 100644 --- a/tests/TestDriver.py +++ b/tests/TestDriver.py @@ -11,11 +11,11 @@ import logging from ogd.common.utils.Logger import Logger Logger.InitializeLogger(level=logging.INFO, use_logfile=False) -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from config.t_config import settings -_config = TestConfigSchema.FromDict(name="APIUtilsTestConfig", all_elements=settings, logger=None) +_config = TestConfig.FromDict(name="APIUtilsTestConfig", all_elements=settings, logger=None) # loader = TestLoader() # TODO : At the moment, this is just module-level, should eventually go to class-level selection. diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/schemas/config/t_GameSourceSchema.py index 0bfa1c7..650ad18 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/schemas/config/t_GameSourceSchema.py @@ -7,7 +7,7 @@ # import ogd libraries. from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema @@ -20,7 +20,7 @@ class t_GameSourceSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/config/t_IndexingSchema.py b/tests/cases/schemas/config/t_IndexingSchema.py index 39490a7..676c6a0 100644 --- a/tests/cases/schemas/config/t_IndexingSchema.py +++ b/tests/cases/schemas/config/t_IndexingSchema.py @@ -4,25 +4,25 @@ from pathlib import Path from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.IndexingSchema import FileIndexingSchema +from src.ogd.common.schemas.configs.IndexingConfig import FileIndexingConfig from tests.config.t_config import settings -class t_IndexingSchema(TestCase): +class t_IndexingConfig(TestCase): """Testbed for the GameSourceSchema class. """ @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) # 2. Set up local instance of testing class - cls.test_schema = FileIndexingSchema( + cls.test_schema = FileIndexingConfig( name="Indexing Schema", local_dir=Path("./data/"), remote_url="https://fieldday-web.ad.education.wisc.edu/opengamedata/", @@ -74,7 +74,7 @@ def test_FromDict(self): "REMOTE_URL" : "https://fieldday-web.ad.education.wisc.edu/opengamedata/", "TEMPLATES_URL" : "https://github.com/opengamedata/opengamedata-samples" } - _schema = FileIndexingSchema.FromDict(name="FILE_INDEXING", all_elements=_dict, logger=None) + _schema = FileIndexingConfig.FromDict(name="FILE_INDEXING", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "FILE_INDEXING") self.assertIsInstance(_schema.LocalDirectory, Path) diff --git a/tests/cases/schemas/config/t_TestConfigSchema.py b/tests/cases/schemas/config/t_TestConfigSchema.py index 9f8c787..19b639b 100644 --- a/tests/cases/schemas/config/t_TestConfigSchema.py +++ b/tests/cases/schemas/config/t_TestConfigSchema.py @@ -3,20 +3,20 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema as TestConfigLocal +from src.ogd.common.schemas.configs.TestConfig import TestConfig as TestConfigLocal from tests.config.t_config import settings -class t_TestConfigSchema(TestCase): +class t_TestConfig(TestCase): """Testbed for the GameSourceSchema class. """ @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_AggregateSchema.py b/tests/cases/schemas/games/t_AggregateSchema.py index 31a0e2d..3b330cf 100644 --- a/tests/cases/schemas/games/t_AggregateSchema.py +++ b/tests/cases/schemas/games/t_AggregateSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals @@ -19,7 +19,7 @@ class t_AggregateConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_DataElementSchema.py b/tests/cases/schemas/games/t_DataElementSchema.py index 79432d8..67fe511 100644 --- a/tests/cases/schemas/games/t_DataElementSchema.py +++ b/tests/cases/schemas/games/t_DataElementSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DataElementSchema import DataElementSchema @@ -18,7 +18,7 @@ class t_DataElementSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_DetectorMapSchema.py b/tests/cases/schemas/games/t_DetectorMapSchema.py index 6c7192d..cb3d2f5 100644 --- a/tests/cases/schemas/games/t_DetectorMapSchema.py +++ b/tests/cases/schemas/games/t_DetectorMapSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DetectorMapConfig import DetectorMapConfig @@ -19,7 +19,7 @@ class t_DetectorMapConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_DetectorSchema.py b/tests/cases/schemas/games/t_DetectorSchema.py index 1e30b93..9acb1d8 100644 --- a/tests/cases/schemas/games/t_DetectorSchema.py +++ b/tests/cases/schemas/games/t_DetectorSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DetectorConfig import DetectorConfig @@ -19,7 +19,7 @@ class t_DetectorConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_EventSchema.py b/tests/cases/schemas/games/t_EventSchema.py index 6e50d7a..d4e3339 100644 --- a/tests/cases/schemas/games/t_EventSchema.py +++ b/tests/cases/schemas/games/t_EventSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.EventSchema import EventSchema @@ -19,7 +19,7 @@ class t_EventSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_FeatureMapSchema.py b/tests/cases/schemas/games/t_FeatureMapSchema.py index d827d53..6c72c48 100644 --- a/tests/cases/schemas/games/t_FeatureMapSchema.py +++ b/tests/cases/schemas/games/t_FeatureMapSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.FeatureMapConfig import FeatureMapConfig @@ -19,7 +19,7 @@ class t_FeatureMapConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_FeatureSchema.py b/tests/cases/schemas/games/t_FeatureSchema.py index bb34450..fe39c1b 100644 --- a/tests/cases/schemas/games/t_FeatureSchema.py +++ b/tests/cases/schemas/games/t_FeatureSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.FeatureConfig import FeatureConfig @@ -19,7 +19,7 @@ class t_FeatureConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_GameSchema.py b/tests/cases/schemas/games/t_GameSchema.py index 6814feb..c37314e 100644 --- a/tests/cases/schemas/games/t_GameSchema.py +++ b/tests/cases/schemas/games/t_GameSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GameSchema import GameSchema @@ -19,7 +19,7 @@ class t_GameSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_GameStateSchema.py b/tests/cases/schemas/games/t_GameStateSchema.py index 0b6834d..3d518c6 100644 --- a/tests/cases/schemas/games/t_GameStateSchema.py +++ b/tests/cases/schemas/games/t_GameStateSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GameStateSchema import GameStateSchema @@ -19,7 +19,7 @@ class t_GameStateSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_GeneratorSchema.py b/tests/cases/schemas/games/t_GeneratorSchema.py index ee08c55..fbe0511 100644 --- a/tests/cases/schemas/games/t_GeneratorSchema.py +++ b/tests/cases/schemas/games/t_GeneratorSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GeneratorConfig import GeneratorConfig @@ -20,7 +20,7 @@ class t_GeneratorConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_PerCountSchema.py b/tests/cases/schemas/games/t_PerCountSchema.py index 6093645..27dd27e 100644 --- a/tests/cases/schemas/games/t_PerCountSchema.py +++ b/tests/cases/schemas/games/t_PerCountSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals @@ -19,7 +19,7 @@ class t_PerCountConfig(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/t_Schema.py b/tests/cases/schemas/t_Schema.py index 9c545e1..927aad4 100644 --- a/tests/cases/schemas/t_Schema.py +++ b/tests/cases/schemas/t_Schema.py @@ -5,7 +5,7 @@ from typing import Any, Dict, Optional from unittest import TestCase # import ogd libraries. -from ogd.common.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.Schema import Schema @@ -36,7 +36,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO _str_level = "DEBUG" if _testing_cfg.Verbose else "INFO" Logger.std_logger.setLevel(_level) From 4976aa8480dce076e3181893e32d8bdde0e981d4 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:17:29 -0600 Subject: [PATCH 109/124] After we split out a bunch of classes to configs, do the same for the corresponding testbeds. --- tests/cases/{schemas/config => configs}/__init__.py | 0 tests/cases/{schemas => configs}/games/t_AggregateSchema.py | 0 tests/cases/{schemas => configs}/games/t_DetectorMapSchema.py | 0 tests/cases/{schemas => configs}/games/t_DetectorSchema.py | 0 tests/cases/{schemas => configs}/games/t_FeatureMapSchema.py | 2 +- tests/cases/{schemas => configs}/games/t_FeatureSchema.py | 2 +- tests/cases/{schemas => configs}/games/t_GeneratorSchema.py | 2 +- tests/cases/{schemas => configs}/games/t_PerCountSchema.py | 0 tests/cases/{schemas/config => configs}/t_GameSourceSchema.py | 0 tests/cases/{schemas/config => configs}/t_IndexingSchema.py | 0 tests/cases/{schemas/config => configs}/t_TestConfigSchema.py | 0 11 files changed, 3 insertions(+), 3 deletions(-) rename tests/cases/{schemas/config => configs}/__init__.py (100%) rename tests/cases/{schemas => configs}/games/t_AggregateSchema.py (100%) rename tests/cases/{schemas => configs}/games/t_DetectorMapSchema.py (100%) rename tests/cases/{schemas => configs}/games/t_DetectorSchema.py (100%) rename tests/cases/{schemas => configs}/games/t_FeatureMapSchema.py (98%) rename tests/cases/{schemas => configs}/games/t_FeatureSchema.py (98%) rename tests/cases/{schemas => configs}/games/t_GeneratorSchema.py (98%) rename tests/cases/{schemas => configs}/games/t_PerCountSchema.py (100%) rename tests/cases/{schemas/config => configs}/t_GameSourceSchema.py (100%) rename tests/cases/{schemas/config => configs}/t_IndexingSchema.py (100%) rename tests/cases/{schemas/config => configs}/t_TestConfigSchema.py (100%) diff --git a/tests/cases/schemas/config/__init__.py b/tests/cases/configs/__init__.py similarity index 100% rename from tests/cases/schemas/config/__init__.py rename to tests/cases/configs/__init__.py diff --git a/tests/cases/schemas/games/t_AggregateSchema.py b/tests/cases/configs/games/t_AggregateSchema.py similarity index 100% rename from tests/cases/schemas/games/t_AggregateSchema.py rename to tests/cases/configs/games/t_AggregateSchema.py diff --git a/tests/cases/schemas/games/t_DetectorMapSchema.py b/tests/cases/configs/games/t_DetectorMapSchema.py similarity index 100% rename from tests/cases/schemas/games/t_DetectorMapSchema.py rename to tests/cases/configs/games/t_DetectorMapSchema.py diff --git a/tests/cases/schemas/games/t_DetectorSchema.py b/tests/cases/configs/games/t_DetectorSchema.py similarity index 100% rename from tests/cases/schemas/games/t_DetectorSchema.py rename to tests/cases/configs/games/t_DetectorSchema.py diff --git a/tests/cases/schemas/games/t_FeatureMapSchema.py b/tests/cases/configs/games/t_FeatureMapSchema.py similarity index 98% rename from tests/cases/schemas/games/t_FeatureMapSchema.py rename to tests/cases/configs/games/t_FeatureMapSchema.py index 6c72c48..41f118b 100644 --- a/tests/cases/schemas/games/t_FeatureMapSchema.py +++ b/tests/cases/configs/games/t_FeatureMapSchema.py @@ -6,7 +6,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.FeatureMapConfig import FeatureMapConfig +from src.ogd.common.configs.games.FeatureMapConfig import FeatureMapConfig from tests.config.t_config import settings @unittest.skip("Not implemented") diff --git a/tests/cases/schemas/games/t_FeatureSchema.py b/tests/cases/configs/games/t_FeatureSchema.py similarity index 98% rename from tests/cases/schemas/games/t_FeatureSchema.py rename to tests/cases/configs/games/t_FeatureSchema.py index fe39c1b..32de688 100644 --- a/tests/cases/schemas/games/t_FeatureSchema.py +++ b/tests/cases/configs/games/t_FeatureSchema.py @@ -6,7 +6,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.FeatureConfig import FeatureConfig +from src.ogd.common.configs.games.FeatureConfig import FeatureConfig from tests.config.t_config import settings @unittest.skip("Not implemented") diff --git a/tests/cases/schemas/games/t_GeneratorSchema.py b/tests/cases/configs/games/t_GeneratorSchema.py similarity index 98% rename from tests/cases/schemas/games/t_GeneratorSchema.py rename to tests/cases/configs/games/t_GeneratorSchema.py index fbe0511..1dcdc6e 100644 --- a/tests/cases/schemas/games/t_GeneratorSchema.py +++ b/tests/cases/configs/games/t_GeneratorSchema.py @@ -6,7 +6,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.GeneratorConfig import GeneratorConfig +from src.ogd.common.configs.games.GeneratorConfig import GeneratorConfig from tests.config.t_config import settings @unittest.skip("Not implemented") diff --git a/tests/cases/schemas/games/t_PerCountSchema.py b/tests/cases/configs/games/t_PerCountSchema.py similarity index 100% rename from tests/cases/schemas/games/t_PerCountSchema.py rename to tests/cases/configs/games/t_PerCountSchema.py diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/configs/t_GameSourceSchema.py similarity index 100% rename from tests/cases/schemas/config/t_GameSourceSchema.py rename to tests/cases/configs/t_GameSourceSchema.py diff --git a/tests/cases/schemas/config/t_IndexingSchema.py b/tests/cases/configs/t_IndexingSchema.py similarity index 100% rename from tests/cases/schemas/config/t_IndexingSchema.py rename to tests/cases/configs/t_IndexingSchema.py diff --git a/tests/cases/schemas/config/t_TestConfigSchema.py b/tests/cases/configs/t_TestConfigSchema.py similarity index 100% rename from tests/cases/schemas/config/t_TestConfigSchema.py rename to tests/cases/configs/t_TestConfigSchema.py From b2a4ef21045c31568ba4a5d3901899ba7b1fe2aa Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:17:56 -0600 Subject: [PATCH 110/124] Add an init for the interfaces test cases folder. --- tests/cases/interfaces/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/cases/interfaces/__init__.py diff --git a/tests/cases/interfaces/__init__.py b/tests/cases/interfaces/__init__.py new file mode 100644 index 0000000..e69de29 From 05261477120a900697cdd966c13b045c8ac031e8 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:22:49 -0600 Subject: [PATCH 111/124] Start work to update the GH workflows based on new naming. --- .../workflows/{TEST_ConfigSchemas.yml => TEST_Configs.yml} | 6 ++++-- tests/cases/configs/t_IndexingSchema.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) rename .github/workflows/{TEST_ConfigSchemas.yml => TEST_Configs.yml} (92%) diff --git a/.github/workflows/TEST_ConfigSchemas.yml b/.github/workflows/TEST_Configs.yml similarity index 92% rename from .github/workflows/TEST_ConfigSchemas.yml rename to .github/workflows/TEST_Configs.yml index 90b68a2..582c216 100644 --- a/.github/workflows/TEST_ConfigSchemas.yml +++ b/.github/workflows/TEST_Configs.yml @@ -6,11 +6,13 @@ on: workflow_call: push: paths: - - '.github/workflows/TEST_ConfigSchemas.yml' + # repo-wide dependencies - '.github/actions/test_config/**' - - 'tests/cases/schemas/config/*.py' - 'tests/config/**' - 'requirements.txt' + # specific dependencies + - '.github/workflows/TEST_Configs.yml' + - 'tests/cases/configs/*.py' concurrency: group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-ConfigSchemas diff --git a/tests/cases/configs/t_IndexingSchema.py b/tests/cases/configs/t_IndexingSchema.py index 676c6a0..ef79c5b 100644 --- a/tests/cases/configs/t_IndexingSchema.py +++ b/tests/cases/configs/t_IndexingSchema.py @@ -7,7 +7,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.IndexingConfig import FileIndexingConfig +from src.ogd.common.configs.IndexingConfig import FileIndexingConfig from tests.config.t_config import settings class t_IndexingConfig(TestCase): From c9dfc00b79095dabef8da07df615648bc5891872 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:23:34 -0600 Subject: [PATCH 112/124] Fix a couple bad imports I missed earlier. --- tests/cases/configs/t_GameSourceSchema.py | 2 +- tests/cases/configs/t_TestConfigSchema.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cases/configs/t_GameSourceSchema.py b/tests/cases/configs/t_GameSourceSchema.py index 650ad18..b0fcc7c 100644 --- a/tests/cases/configs/t_GameSourceSchema.py +++ b/tests/cases/configs/t_GameSourceSchema.py @@ -10,7 +10,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from src.ogd.common.configs.GameSourceSchema import GameSourceSchema from tests.config.t_config import settings class t_GameSourceSchema(TestCase): diff --git a/tests/cases/configs/t_TestConfigSchema.py b/tests/cases/configs/t_TestConfigSchema.py index 19b639b..9e8f65a 100644 --- a/tests/cases/configs/t_TestConfigSchema.py +++ b/tests/cases/configs/t_TestConfigSchema.py @@ -6,7 +6,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.TestConfig import TestConfig as TestConfigLocal +from src.ogd.common.configs.TestConfig import TestConfig as TestConfigLocal from tests.config.t_config import settings class t_TestConfig(TestCase): From a90db45c3a245acfc6fd2449a941246351804d8f Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:25:50 -0600 Subject: [PATCH 113/124] Work on renaming the actual files for our testbeds. --- .../configs/games/{t_AggregateSchema.py => t_AggregateConfig.py} | 0 .../configs/games/{t_DetectorSchema.py => t_DetectorConfig.py} | 0 .../games/{t_DetectorMapSchema.py => t_DetectorMapConfig.py} | 0 .../configs/games/{t_FeatureSchema.py => t_FeatureConfig.py} | 0 .../games/{t_FeatureMapSchema.py => t_FeatureMapConfig.py} | 0 .../configs/games/{t_GeneratorSchema.py => t_GeneratorConfig.py} | 0 .../configs/games/{t_PerCountSchema.py => t_PerCountConfig.py} | 0 tests/cases/configs/{t_IndexingSchema.py => t_IndexingConfig.py} | 0 tests/cases/configs/{t_TestConfigSchema.py => t_TestConfig.py} | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename tests/cases/configs/games/{t_AggregateSchema.py => t_AggregateConfig.py} (100%) rename tests/cases/configs/games/{t_DetectorSchema.py => t_DetectorConfig.py} (100%) rename tests/cases/configs/games/{t_DetectorMapSchema.py => t_DetectorMapConfig.py} (100%) rename tests/cases/configs/games/{t_FeatureSchema.py => t_FeatureConfig.py} (100%) rename tests/cases/configs/games/{t_FeatureMapSchema.py => t_FeatureMapConfig.py} (100%) rename tests/cases/configs/games/{t_GeneratorSchema.py => t_GeneratorConfig.py} (100%) rename tests/cases/configs/games/{t_PerCountSchema.py => t_PerCountConfig.py} (100%) rename tests/cases/configs/{t_IndexingSchema.py => t_IndexingConfig.py} (100%) rename tests/cases/configs/{t_TestConfigSchema.py => t_TestConfig.py} (100%) diff --git a/tests/cases/configs/games/t_AggregateSchema.py b/tests/cases/configs/games/t_AggregateConfig.py similarity index 100% rename from tests/cases/configs/games/t_AggregateSchema.py rename to tests/cases/configs/games/t_AggregateConfig.py diff --git a/tests/cases/configs/games/t_DetectorSchema.py b/tests/cases/configs/games/t_DetectorConfig.py similarity index 100% rename from tests/cases/configs/games/t_DetectorSchema.py rename to tests/cases/configs/games/t_DetectorConfig.py diff --git a/tests/cases/configs/games/t_DetectorMapSchema.py b/tests/cases/configs/games/t_DetectorMapConfig.py similarity index 100% rename from tests/cases/configs/games/t_DetectorMapSchema.py rename to tests/cases/configs/games/t_DetectorMapConfig.py diff --git a/tests/cases/configs/games/t_FeatureSchema.py b/tests/cases/configs/games/t_FeatureConfig.py similarity index 100% rename from tests/cases/configs/games/t_FeatureSchema.py rename to tests/cases/configs/games/t_FeatureConfig.py diff --git a/tests/cases/configs/games/t_FeatureMapSchema.py b/tests/cases/configs/games/t_FeatureMapConfig.py similarity index 100% rename from tests/cases/configs/games/t_FeatureMapSchema.py rename to tests/cases/configs/games/t_FeatureMapConfig.py diff --git a/tests/cases/configs/games/t_GeneratorSchema.py b/tests/cases/configs/games/t_GeneratorConfig.py similarity index 100% rename from tests/cases/configs/games/t_GeneratorSchema.py rename to tests/cases/configs/games/t_GeneratorConfig.py diff --git a/tests/cases/configs/games/t_PerCountSchema.py b/tests/cases/configs/games/t_PerCountConfig.py similarity index 100% rename from tests/cases/configs/games/t_PerCountSchema.py rename to tests/cases/configs/games/t_PerCountConfig.py diff --git a/tests/cases/configs/t_IndexingSchema.py b/tests/cases/configs/t_IndexingConfig.py similarity index 100% rename from tests/cases/configs/t_IndexingSchema.py rename to tests/cases/configs/t_IndexingConfig.py diff --git a/tests/cases/configs/t_TestConfigSchema.py b/tests/cases/configs/t_TestConfig.py similarity index 100% rename from tests/cases/configs/t_TestConfigSchema.py rename to tests/cases/configs/t_TestConfig.py From 640183865f942adabed6da156b68c7f5c14256e0 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:27:19 -0600 Subject: [PATCH 114/124] Fix a couple more imports. --- tests/cases/configs/games/t_DetectorConfig.py | 2 +- tests/cases/configs/games/t_DetectorMapConfig.py | 2 +- tests/cases/configs/games/t_PerCountConfig.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cases/configs/games/t_DetectorConfig.py b/tests/cases/configs/games/t_DetectorConfig.py index 9acb1d8..37a0bb2 100644 --- a/tests/cases/configs/games/t_DetectorConfig.py +++ b/tests/cases/configs/games/t_DetectorConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.DetectorConfig import DetectorConfig +from src.ogd.common.configs.games.DetectorConfig import DetectorConfig from tests.config.t_config import settings @unittest.skip("Not implemented") diff --git a/tests/cases/configs/games/t_DetectorMapConfig.py b/tests/cases/configs/games/t_DetectorMapConfig.py index cb3d2f5..12d031e 100644 --- a/tests/cases/configs/games/t_DetectorMapConfig.py +++ b/tests/cases/configs/games/t_DetectorMapConfig.py @@ -6,7 +6,7 @@ from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.DetectorMapConfig import DetectorMapConfig +from src.ogd.common.configs.games.DetectorMapConfig import DetectorMapConfig from tests.config.t_config import settings @unittest.skip("Not implemented") diff --git a/tests/cases/configs/games/t_PerCountConfig.py b/tests/cases/configs/games/t_PerCountConfig.py index 27dd27e..743a3a1 100644 --- a/tests/cases/configs/games/t_PerCountConfig.py +++ b/tests/cases/configs/games/t_PerCountConfig.py @@ -7,7 +7,7 @@ from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.PerCountConfig import PerCountConfig +from src.ogd.common.configs.games.PerCountConfig import PerCountConfig from tests.config.t_config import settings class t_PerCountConfig(TestCase): From 571cce73f1925df0ea2a228adc811edf9a3d36d4 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:31:51 -0600 Subject: [PATCH 115/124] Update workflows with new filenames, and split out GameConfigs from GameSchemas. --- .github/workflows/TEST_Configs.yml | 2 +- .github/workflows/TEST_GameConfigs.yml | 66 ++++++++++++++++++++++++++ .github/workflows/TEST_GameSchemas.yml | 15 ++---- 3 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/TEST_GameConfigs.yml diff --git a/.github/workflows/TEST_Configs.yml b/.github/workflows/TEST_Configs.yml index 582c216..4c45db3 100644 --- a/.github/workflows/TEST_Configs.yml +++ b/.github/workflows/TEST_Configs.yml @@ -55,7 +55,7 @@ jobs: - name: Execute ${{ matrix.testbed }} testbed uses: opengamedata/actions-execute-testbed@v1.0 with: - directory: "tests/cases/schemas/config" + directory: "tests/cases/configs" test_file: "${{ matrix.testbed }}.py" python_version: ${{ vars.OGD_PYTHON_VERSION }} diff --git a/.github/workflows/TEST_GameConfigs.yml b/.github/workflows/TEST_GameConfigs.yml new file mode 100644 index 0000000..25ccab2 --- /dev/null +++ b/.github/workflows/TEST_GameConfigs.yml @@ -0,0 +1,66 @@ +# Workflow to test the schemas from the `config` subfolder +name: Testbed - Game Configs +run-name: ${{ format('{0} - {1}', github.workflow, github.event_name == 'push' && github.event.head_commit.message || 'Manual Run') }} +on: + workflow_dispatch: + workflow_call: + push: + paths: + # repo-wide dependencies + - '.github/actions/test_config/**' + - 'tests/config/**' + - 'requirements.txt' + # specific dependencies + - '.github/workflows/TEST_GameConfigs.yml' + - 'tests/cases/configs/games/**' + +concurrency: + group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-GameSchemas + cancel-in-progress: true + +jobs: + + run_testbed_schema: + name: Run Game Schema Testbeds + runs-on: ubuntu-22.04 + strategy: + matrix: + testbed: [ + t_AggregateConfig, + t_DetectorMapConfig, + t_DetectorConfig, + t_FeatureMapConfig, + t_FeatureConfig, + t_GeneratorConfig, + t_PerCountConfig + ] + fail-fast: false # we don't want to cancel just because one testbed fails. + max-parallel: 20 + + steps: + # 1. Local checkout + - name: Checkout repository + uses: actions/checkout@v4 + - name: Get Dependencies + uses: opengamedata/setup-ogd-py-dependencies@v1.2 + with: + python_version: ${{ vars.OGD_PYTHON_VERSION }} + - name: Local self-install + run: python -m pip install -e . + - name: Set up Config File + uses: ./.github/actions/test_config + with: + verbose_output: "True" + with_schemas: "True" + + # 2. Build & configure remote environments + + # 3. Perform export + - name: Execute ${{ matrix.testbed }} testbed + uses: opengamedata/actions-execute-testbed@v1.0 + with: + directory: "tests/cases/schemas/games" + test_file: "${{ matrix.testbed }}.py" + python_version: ${{ vars.OGD_PYTHON_VERSION }} + + # 4. Cleanup & complete diff --git a/.github/workflows/TEST_GameSchemas.yml b/.github/workflows/TEST_GameSchemas.yml index 412e899..8a76ab7 100644 --- a/.github/workflows/TEST_GameSchemas.yml +++ b/.github/workflows/TEST_GameSchemas.yml @@ -6,11 +6,13 @@ on: workflow_call: push: paths: - - '.github/workflows/TEST_GameSchemas.yml' + # repo-wide dependencies - '.github/actions/test_config/**' - - 'tests/cases/schemas/games/**' - 'tests/config/**' - 'requirements.txt' + # specific dependencies + - '.github/workflows/TEST_GameSchemas.yml' + - 'tests/cases/schemas/games/**' concurrency: group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-GameSchemas @@ -24,17 +26,10 @@ jobs: strategy: matrix: testbed: [ - t_AggregateConfig, t_DataElementSchema, - t_DetectorMapConfig, - t_DetectorConfig, t_EventSchema, - t_FeatureMapConfig, - t_FeatureConfig, t_GameSchema, - t_GameStateSchema, - t_GeneratorConfig, - t_PerCountConfig, + t_GameStateSchema ] fail-fast: false # we don't want to cancel just because one testbed fails. max-parallel: 20 From 00f2390984fbcbf37948b1e6dda5d84007c2be0b Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:33:45 -0600 Subject: [PATCH 116/124] Update config and TestDriver with new paths. --- tests/TestDriver.py | 10 +++++++++- tests/config/t_config.py.template | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/TestDriver.py b/tests/TestDriver.py index 042ac46..2bd28f7 100644 --- a/tests/TestDriver.py +++ b/tests/TestDriver.py @@ -20,12 +20,20 @@ # loader = TestLoader() # TODO : At the moment, this is just module-level, should eventually go to class-level selection. suite = TestSuite() +if _config.EnabledTests.get('CONFIGS'): + print("***\nAdding configs:") + suite.addTest(defaultTestLoader.discover('./tests/cases/configs/', pattern="t_*.py", top_level_dir="./")) + print("Done\n***") if _config.EnabledTests.get('INTERFACES'): print("***\nAdding interfaces:") suite.addTest(defaultTestLoader.discover('./tests/cases/interfaces/', pattern="t_*.py", top_level_dir="./")) print("Done\n***") +if _config.EnabledTests.get('SCHEMAS'): + print("***\nAdding schemas:") + suite.addTest(defaultTestLoader.discover('./tests/cases/schemas/', pattern="t_*.py", top_level_dir="./")) + print("Done\n***") if _config.EnabledTests.get('UTILS'): - print("***\nAdding APIUtils:") + print("***\nAdding Utils:") suite.addTest(defaultTestLoader.discover('./tests/cases/utils/', pattern="t_*.py", top_level_dir="./")) print("Done\n***") diff --git a/tests/config/t_config.py.template b/tests/config/t_config.py.template index 4b2af3f..6b94bbe 100644 --- a/tests/config/t_config.py.template +++ b/tests/config/t_config.py.template @@ -1,6 +1,7 @@ settings = { "VERBOSE" : False, "ENABLED" : { + "CONFIGS":True, "INTERFACES":True, "SCHEMAS":True, "UTILS":True From ece4b65e4098c7c82a251749cf8f1c2364fd82bc Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:35:29 -0600 Subject: [PATCH 117/124] Remove non-existant old classes from schemas/games/__init__.py. --- src/ogd/common/schemas/games/__init__.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/ogd/common/schemas/games/__init__.py b/src/ogd/common/schemas/games/__init__.py index 83851af..d92c0ef 100644 --- a/src/ogd/common/schemas/games/__init__.py +++ b/src/ogd/common/schemas/games/__init__.py @@ -1,17 +1,7 @@ __all__ = [ "EventSchema", - "GeneratorConfig", - "DetectorConfig", - "FeatureConfig", - "AggregateConfig", - "PerCountConfig", "GameSchema" ] from . import EventSchema -from . import GeneratorConfig -from . import DetectorConfig -from . import FeatureConfig -from . import AggregateConfig -from . import PerCountConfig from . import GameSchema From 2aae1dee1a7bdd698ddc606054a2ee27b5271ad9 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:41:56 -0600 Subject: [PATCH 118/124] Add an init file for the game configs. --- src/ogd/common/configs/games/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/ogd/common/configs/games/__init__.py diff --git a/src/ogd/common/configs/games/__init__.py b/src/ogd/common/configs/games/__init__.py new file mode 100644 index 0000000..4cf766b --- /dev/null +++ b/src/ogd/common/configs/games/__init__.py @@ -0,0 +1,13 @@ +__all__ = [ + "GeneratorConfig", + "DetectorConfig", + "FeatureConfig", + "AggregateConfig", + "PerCountConfig", +] + +from . import GeneratorConfig +from . import DetectorConfig +from . import FeatureConfig +from . import AggregateConfig +from . import PerCountConfig From 8e60b9d5e4973b8249c85ed84113a1ed30acf4f2 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:44:10 -0600 Subject: [PATCH 119/124] Add a couple more init files, and fix naming of CredentialSchema -> CredentialConfig. --- src/ogd/common/configs/storage/DataStoreConfig.py | 4 ++-- src/ogd/common/configs/storage/__init__.py | 0 src/ogd/common/configs/storage/credentials/__init__.py | 0 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 src/ogd/common/configs/storage/__init__.py create mode 100644 src/ogd/common/configs/storage/credentials/__init__.py diff --git a/src/ogd/common/configs/storage/DataStoreConfig.py b/src/ogd/common/configs/storage/DataStoreConfig.py index cea4524..bd7ffd6 100644 --- a/src/ogd/common/configs/storage/DataStoreConfig.py +++ b/src/ogd/common/configs/storage/DataStoreConfig.py @@ -5,7 +5,7 @@ from typing import Any, Dict # , overload # import local files from ogd.common.configs.Config import Config -from ogd.common.configs.storage.CredentialSchema import CredentialSchema +from ogd.common.configs.storage.credentials.CredentialConfig import CredentialConfig from ogd.common.utils.Logger import Logger @@ -58,7 +58,7 @@ def Location(self) -> str | Path: @property @abc.abstractmethod - def Credential(self) -> CredentialSchema: + def Credential(self) -> CredentialConfig: pass @property diff --git a/src/ogd/common/configs/storage/__init__.py b/src/ogd/common/configs/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ogd/common/configs/storage/credentials/__init__.py b/src/ogd/common/configs/storage/credentials/__init__.py new file mode 100644 index 0000000..e69de29 From c8e51063d75870688b5d1ed54e7fad8cb280bfaa Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:56:58 -0600 Subject: [PATCH 120/124] Update test config action to include option for enabling configs tests. --- .github/actions/test_config/action.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/actions/test_config/action.yml b/.github/actions/test_config/action.yml index 7d02572..b3a86f1 100644 --- a/.github/actions/test_config/action.yml +++ b/.github/actions/test_config/action.yml @@ -9,14 +9,18 @@ inputs: description: "Whether to output extra debugging lines from tests." required: false default: "False" - with_schemas: - description: "Whether to run the Schema tests." + with_configs: + description: "Whether to run the Config tests." required: false default: "False" with_interfaces: description: "Whether to run the interface tests." required: false default: "False" + with_schemas: + description: "Whether to run the Schema tests." + required: false + default: "False" with_utils: description: "Whether to run the utils tests." required: false @@ -38,12 +42,15 @@ runs: - name: Set Output Verbosity run: sed -i 's@"VERBOSE"\s*:\s*False@"VERBOSE":${{ inputs.verbose_output }}@g' ${{ inputs.config_path }}/t_config.py shell: bash - - name: Set Schema test(s) to run or not - run: sed -i 's@"SCHEMAS"\s*:\s*True@"SCHEMAS":${{ inputs.with_schemas }}@g' ${{ inputs.config_path }}/t_config.py + - name: Set Config test(s) to run or not + run: sed -i 's@"CONFIGS"\s*:\s*True@"CONFIGS":${{ inputs.with_configs }}@g' ${{ inputs.config_path }}/t_config.py shell: bash - name: Set interface test(s) to run or not run: sed -i 's@"INTERFACES"\s*:\s*True@"INTERFACES":${{ inputs.with_interfaces }}@g' ${{ inputs.config_path }}/t_config.py shell: bash + - name: Set Schema test(s) to run or not + run: sed -i 's@"SCHEMAS"\s*:\s*True@"SCHEMAS":${{ inputs.with_schemas }}@g' ${{ inputs.config_path }}/t_config.py + shell: bash - name: Set utils test(s) to run or not run: sed -i 's@"UTILS"\s*:\s*True@"UTILS":${{ inputs.with_utils }}@g' ${{ inputs.config_path }}/t_config.py shell: bash From e164388e4868efb77025a16b4da4e6c7cd04f1cc Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 19:59:47 -0600 Subject: [PATCH 121/124] Update a couple parts of GameConfigs workflow to use 'configs' instead of 'schemas'. --- .github/workflows/TEST_GameConfigs.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/TEST_GameConfigs.yml b/.github/workflows/TEST_GameConfigs.yml index 25ccab2..1246b26 100644 --- a/.github/workflows/TEST_GameConfigs.yml +++ b/.github/workflows/TEST_GameConfigs.yml @@ -15,13 +15,13 @@ on: - 'tests/cases/configs/games/**' concurrency: - group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-GameSchemas + group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-GameConfigs cancel-in-progress: true jobs: run_testbed_schema: - name: Run Game Schema Testbeds + name: Run Game Config Testbeds runs-on: ubuntu-22.04 strategy: matrix: @@ -51,7 +51,7 @@ jobs: uses: ./.github/actions/test_config with: verbose_output: "True" - with_schemas: "True" + with_configs: "True" # 2. Build & configure remote environments @@ -59,7 +59,7 @@ jobs: - name: Execute ${{ matrix.testbed }} testbed uses: opengamedata/actions-execute-testbed@v1.0 with: - directory: "tests/cases/schemas/games" + directory: "tests/cases/configs/games" test_file: "${{ matrix.testbed }}.py" python_version: ${{ vars.OGD_PYTHON_VERSION }} From 30ddec975bc28eb0b4ea4b2e45141e7330233ef6 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 20:02:25 -0600 Subject: [PATCH 122/124] Update CI script to include all the individual test workflows. --- .github/workflows/CI_common.yml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/CI_common.yml b/.github/workflows/CI_common.yml index 56b0310..ceb85b8 100644 --- a/.github/workflows/CI_common.yml +++ b/.github/workflows/CI_common.yml @@ -20,24 +20,31 @@ jobs: with: with_caching: false - # Run testbeds in schema module + # Run testbeds in configs module + + testbed_configs: + name: Config Testbeds + needs: build + uses: ./.github/workflows/TEST_Configs.yml + + testbed_game_configs: + name: Game Config Testbeds + needs: build + uses: ./.github/workflows/TEST_GameConfigs.yml + + # Run testbeds in schemas module testbed_schema: name: Schema base class Testbed needs: build uses: ./.github/workflows/TEST_Schema.yml - testbed_config_schemas: - name: Config Schema Testbeds - needs: build - uses: ./.github/workflows/TEST_ConfigSchemas.yml - testbed_game_schemas: name: Game Schema Testbeds needs: build uses: ./.github/workflows/TEST_GameSchemas.yml - # Run testbeds in schema module + # Run testbeds in utils module testbed_fileio: name: FileIO Testbed From c9214251eaf7dbe6c111055fdbd02b6da2859219 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 20:06:20 -0600 Subject: [PATCH 123/124] Add init to game configs folder. --- tests/cases/configs/games/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/cases/configs/games/__init__.py diff --git a/tests/cases/configs/games/__init__.py b/tests/cases/configs/games/__init__.py new file mode 100644 index 0000000..e69de29 From 533e2967382fc0c8012fca0931d6e589263b9933 Mon Sep 17 00:00:00 2001 From: Luke Swanson Date: Wed, 18 Dec 2024 20:36:04 -0600 Subject: [PATCH 124/124] On second thought, don't even make Config a subclass of Schema, make it a straight-up type alias instead. --- src/ogd/common/configs/Config.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/ogd/common/configs/Config.py b/src/ogd/common/configs/Config.py index e274957..a900ce5 100644 --- a/src/ogd/common/configs/Config.py +++ b/src/ogd/common/configs/Config.py @@ -1,13 +1,8 @@ """Config Class Module """ ## import standard libraries -from typing import Any, Dict +from typing import TypeAlias # import local files from ogd.common.schemas.Schema import Schema -class Config(Schema): - """Thin layer over Schema base class to act as a base for all our Config-type classes. - """ - - def __init__(self, name: str, other_elements: Dict[str, Any] | None = None): - super().__init__(name, other_elements) \ No newline at end of file +Config : TypeAlias = Schema