diff --git a/.github/actions/test_config/action.yml b/.github/actions/test_config/action.yml index 7d02572..b3a86f1 100644 --- a/.github/actions/test_config/action.yml +++ b/.github/actions/test_config/action.yml @@ -9,14 +9,18 @@ inputs: description: "Whether to output extra debugging lines from tests." required: false default: "False" - with_schemas: - description: "Whether to run the Schema tests." + with_configs: + description: "Whether to run the Config tests." required: false default: "False" with_interfaces: description: "Whether to run the interface tests." required: false default: "False" + with_schemas: + description: "Whether to run the Schema tests." + required: false + default: "False" with_utils: description: "Whether to run the utils tests." required: false @@ -38,12 +42,15 @@ runs: - name: Set Output Verbosity run: sed -i 's@"VERBOSE"\s*:\s*False@"VERBOSE":${{ inputs.verbose_output }}@g' ${{ inputs.config_path }}/t_config.py shell: bash - - name: Set Schema test(s) to run or not - run: sed -i 's@"SCHEMAS"\s*:\s*True@"SCHEMAS":${{ inputs.with_schemas }}@g' ${{ inputs.config_path }}/t_config.py + - name: Set Config test(s) to run or not + run: sed -i 's@"CONFIGS"\s*:\s*True@"CONFIGS":${{ inputs.with_configs }}@g' ${{ inputs.config_path }}/t_config.py shell: bash - name: Set interface test(s) to run or not run: sed -i 's@"INTERFACES"\s*:\s*True@"INTERFACES":${{ inputs.with_interfaces }}@g' ${{ inputs.config_path }}/t_config.py shell: bash + - name: Set Schema test(s) to run or not + run: sed -i 's@"SCHEMAS"\s*:\s*True@"SCHEMAS":${{ inputs.with_schemas }}@g' ${{ inputs.config_path }}/t_config.py + shell: bash - name: Set utils test(s) to run or not run: sed -i 's@"UTILS"\s*:\s*True@"UTILS":${{ inputs.with_utils }}@g' ${{ inputs.config_path }}/t_config.py shell: bash diff --git a/.github/workflows/CI_common.yml b/.github/workflows/CI_common.yml index 56b0310..ceb85b8 100644 --- a/.github/workflows/CI_common.yml +++ b/.github/workflows/CI_common.yml @@ -20,24 +20,31 @@ jobs: with: with_caching: false - # Run testbeds in schema module + # Run testbeds in configs module + + testbed_configs: + name: Config Testbeds + needs: build + uses: ./.github/workflows/TEST_Configs.yml + + testbed_game_configs: + name: Game Config Testbeds + needs: build + uses: ./.github/workflows/TEST_GameConfigs.yml + + # Run testbeds in schemas module testbed_schema: name: Schema base class Testbed needs: build uses: ./.github/workflows/TEST_Schema.yml - testbed_config_schemas: - name: Config Schema Testbeds - needs: build - uses: ./.github/workflows/TEST_ConfigSchemas.yml - testbed_game_schemas: name: Game Schema Testbeds needs: build uses: ./.github/workflows/TEST_GameSchemas.yml - # Run testbeds in schema module + # Run testbeds in utils module testbed_fileio: name: FileIO Testbed diff --git a/.github/workflows/TEST_ConfigSchemas.yml b/.github/workflows/TEST_Configs.yml similarity index 87% rename from .github/workflows/TEST_ConfigSchemas.yml rename to .github/workflows/TEST_Configs.yml index b25f2a8..4c45db3 100644 --- a/.github/workflows/TEST_ConfigSchemas.yml +++ b/.github/workflows/TEST_Configs.yml @@ -6,11 +6,13 @@ on: workflow_call: push: paths: - - '.github/workflows/TEST_ConfigSchemas.yml' + # repo-wide dependencies - '.github/actions/test_config/**' - - 'tests/cases/schemas/config/*.py' - 'tests/config/**' - 'requirements.txt' + # specific dependencies + - '.github/workflows/TEST_Configs.yml' + - 'tests/cases/configs/*.py' concurrency: group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-ConfigSchemas @@ -25,8 +27,8 @@ jobs: matrix: testbed: [ t_GameSourceSchema, - t_IndexingSchema, - t_TestConfigSchema, + t_IndexingConfig, + t_TestConfig, ] fail-fast: false # we don't want to cancel just because one testbed fails. max-parallel: 20 @@ -53,7 +55,7 @@ jobs: - name: Execute ${{ matrix.testbed }} testbed uses: opengamedata/actions-execute-testbed@v1.0 with: - directory: "tests/cases/schemas/config" + directory: "tests/cases/configs" test_file: "${{ matrix.testbed }}.py" python_version: ${{ vars.OGD_PYTHON_VERSION }} diff --git a/.github/workflows/TEST_GameConfigs.yml b/.github/workflows/TEST_GameConfigs.yml new file mode 100644 index 0000000..1246b26 --- /dev/null +++ b/.github/workflows/TEST_GameConfigs.yml @@ -0,0 +1,66 @@ +# Workflow to test the schemas from the `config` subfolder +name: Testbed - Game Configs +run-name: ${{ format('{0} - {1}', github.workflow, github.event_name == 'push' && github.event.head_commit.message || 'Manual Run') }} +on: + workflow_dispatch: + workflow_call: + push: + paths: + # repo-wide dependencies + - '.github/actions/test_config/**' + - 'tests/config/**' + - 'requirements.txt' + # specific dependencies + - '.github/workflows/TEST_GameConfigs.yml' + - 'tests/cases/configs/games/**' + +concurrency: + group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-GameConfigs + cancel-in-progress: true + +jobs: + + run_testbed_schema: + name: Run Game Config Testbeds + runs-on: ubuntu-22.04 + strategy: + matrix: + testbed: [ + t_AggregateConfig, + t_DetectorMapConfig, + t_DetectorConfig, + t_FeatureMapConfig, + t_FeatureConfig, + t_GeneratorConfig, + t_PerCountConfig + ] + fail-fast: false # we don't want to cancel just because one testbed fails. + max-parallel: 20 + + steps: + # 1. Local checkout + - name: Checkout repository + uses: actions/checkout@v4 + - name: Get Dependencies + uses: opengamedata/setup-ogd-py-dependencies@v1.2 + with: + python_version: ${{ vars.OGD_PYTHON_VERSION }} + - name: Local self-install + run: python -m pip install -e . + - name: Set up Config File + uses: ./.github/actions/test_config + with: + verbose_output: "True" + with_configs: "True" + + # 2. Build & configure remote environments + + # 3. Perform export + - name: Execute ${{ matrix.testbed }} testbed + uses: opengamedata/actions-execute-testbed@v1.0 + with: + directory: "tests/cases/configs/games" + test_file: "${{ matrix.testbed }}.py" + python_version: ${{ vars.OGD_PYTHON_VERSION }} + + # 4. Cleanup & complete diff --git a/.github/workflows/TEST_GameSchemas.yml b/.github/workflows/TEST_GameSchemas.yml index 560006f..8a76ab7 100644 --- a/.github/workflows/TEST_GameSchemas.yml +++ b/.github/workflows/TEST_GameSchemas.yml @@ -6,11 +6,13 @@ on: workflow_call: push: paths: - - '.github/workflows/TEST_GameSchemas.yml' + # repo-wide dependencies - '.github/actions/test_config/**' - - 'tests/cases/schemas/games/**' - 'tests/config/**' - 'requirements.txt' + # specific dependencies + - '.github/workflows/TEST_GameSchemas.yml' + - 'tests/cases/schemas/games/**' concurrency: group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}-GameSchemas @@ -24,17 +26,10 @@ jobs: strategy: matrix: testbed: [ - t_AggregateSchema, t_DataElementSchema, - t_DetectorMapSchema, - t_DetectorSchema, t_EventSchema, - t_FeatureMapSchema, - t_FeatureSchema, t_GameSchema, - t_GameStateSchema, - t_GeneratorSchema, - t_PerCountSchema, + t_GameStateSchema ] fail-fast: false # we don't want to cancel just because one testbed fails. max-parallel: 20 diff --git a/src/ogd/common/configs/Config.py b/src/ogd/common/configs/Config.py new file mode 100644 index 0000000..a900ce5 --- /dev/null +++ b/src/ogd/common/configs/Config.py @@ -0,0 +1,8 @@ +"""Config Class Module +""" +## import standard libraries +from typing import TypeAlias +# import local files +from ogd.common.schemas.Schema import Schema + +Config : TypeAlias = Schema diff --git a/src/ogd/common/schemas/configs/GameSourceSchema.py b/src/ogd/common/configs/GameSourceSchema.py similarity index 70% rename from src/ogd/common/schemas/configs/GameSourceSchema.py rename to src/ogd/common/configs/GameSourceSchema.py index 8befe13..6f4886e 100644 --- a/src/ogd/common/schemas/configs/GameSourceSchema.py +++ b/src/ogd/common/configs/GameSourceSchema.py @@ -1,15 +1,17 @@ # import standard libraries import logging -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema -from ogd.common.schemas.configs.data_sources.BigQuerySourceSchema import BigQuerySchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig +from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig from ogd.common.schemas.Schema import Schema +from ogd.common.schemas.tables.TableSchema import TableSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map class GameSourceSchema(Schema): + _DEFAULT_GAME_ID = "UNKNOWN GAME" _DEFAULT_SOURCE_NAME = "OPENGAMEDATA_BQ" _DEFAULT_DB_NAME = "UNKNOWN GAME" _DEFAULT_TABLE_NAME = "_daily" @@ -31,25 +33,40 @@ class GameSourceSchema(Schema): - `TableName` : The neame of the specific table within the database holding the given game's data - `TableSchema` : A schema indicating the structure of the table containing the given game's data. + TODO : Implement and use a smart Load(...) function of TableSchema to load schema from given name, rather than FromFile. + :param Schema: _description_ :type Schema: _type_ """ - def __init__(self, name:str, source_name:str, source_schema:Optional[DataSourceSchema], + def __init__(self, name:str, game_id:Optional[str], + source_name:str, source_schema:Optional[DataStoreConfig], db_name:str, table_name:str, table_schema:str, - other_elements:Optional[Map]=None): - self._source_name : str = source_name - self._source_schema : Optional[DataSourceSchema] = source_schema - self._db_name : str = db_name - self._table_name : str = table_name - self._table_schema : str = table_schema + other_elements:Dict[str, Any]): + self._game_id : str + self._source_name : str = source_name + self._source_schema : Optional[DataStoreConfig] = source_schema + self._db_name : str = db_name + self._table_name : str = table_name + self._table_schema_name : str = table_schema + self._table_schema : TableSchema = TableSchema.FromFile(schema_name=self._table_schema_name) + + if game_id is not None: + self._game_id = game_id + else: + Logger.Log(f"GameSourceSchema did not receive a game_id, defaulting to {name}") + self._game_id = name super().__init__(name=name, other_elements=other_elements) + @property + def GameID(self) -> str: + return self._game_id + @property def SourceName(self) -> str: return self._source_name @property - def Source(self) -> Optional[DataSourceSchema]: + def Source(self) -> Optional[DataStoreConfig]: return self._source_schema @property @@ -61,24 +78,29 @@ def TableName(self) -> str: return self._table_name @property - def TableSchema(self) -> str: + def TableSchema(self) -> TableSchema: return self._table_schema + @property + def TableSchemaName(self) -> str: + return self._table_schema_name + # *** IMPLEMENT ABSTRACT FUNCTIONS *** @property def AsMarkdown(self) -> str: ret_val : str - ret_val = f"{self.Name}: _{self.TableSchema}_ format, source {self.Source.Name if self.Source else 'None'} : {self.DatabaseName}.{self.TableName}" + ret_val = f"{self.Name}: _{self.TableSchemaName}_ format, source {self.Source.Name if self.Source else 'None'} : {self.DatabaseName}.{self.TableName}" return ret_val @classmethod def Default(cls) -> "GameSourceSchema": return GameSourceSchema( name="DefaultGameSourceSchema", + game_id=cls._DEFAULT_GAME_ID, source_name=cls._DEFAULT_SOURCE_NAME, - source_schema=BigQuerySchema.Default(), + source_schema=BigQueryConfig.Default(), db_name=cls._DEFAULT_DB_NAME, table_name=cls._DEFAULT_TABLE_NAME, table_schema=cls._DEFAULT_TABLE_SCHEMA, @@ -86,7 +108,7 @@ def Default(cls) -> "GameSourceSchema": ) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger], data_sources:Dict[str, DataSourceSchema]) -> "GameSourceSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger], data_sources:Dict[str, DataStoreConfig]) -> "GameSourceSchema": """Create a GameSourceSchema from a given dictionary :param name: _description_ @@ -96,12 +118,12 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging :param logger: _description_ :type logger: Optional[logging.Logger] :param data_sources: _description_ - :type data_sources: Dict[str, DataSourceSchema] + :type data_sources: Dict[str, DataStoreConfig] :return: _description_ :rtype: GameSourceSchema """ _source_name : str - _source_schema : Optional[DataSourceSchema] + _source_schema : Optional[DataStoreConfig] _db_name : str _table_schema : str _table_name : str @@ -113,6 +135,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging logger.warning(_msg) else: Logger.Log(_msg, logging.WARN) + _game_id = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["game", "game_id"], + parser_function=cls._parseGameID, + default_value=name + ) _source_name = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["source"], parser_function=cls._parseSource, @@ -142,7 +169,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"source", "database", "table", "schema"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return GameSourceSchema(name=name, source_name=_source_name, source_schema=_source_schema, + return GameSourceSchema(name=name, game_id=_game_id, source_name=_source_name, source_schema=_source_schema, db_name=_db_name, table_name=_table_name, table_schema=_table_schema, other_elements=_leftovers) @@ -150,7 +177,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @staticmethod def EmptySchema() -> "GameSourceSchema": - return GameSourceSchema(name="NOT FOUND", source_name="NOT FOUND", source_schema=None, db_name="NOT FOUND", + return GameSourceSchema(name="NOT FOUND", game_id="NOT FOUND", source_name="NOT FOUND", source_schema=None, db_name="NOT FOUND", table_name="NOT FOUND", table_schema="NOT FOUND", other_elements={}) # *** PUBLIC METHODS *** @@ -167,6 +194,16 @@ def _parseSource(source) -> str: Logger.Log(f"Game Source source name was unexpected type {type(source)}, defaulting to str(source)={ret_val}.", logging.WARN) return ret_val + @staticmethod + def _parseGameID(game_id) -> str: + ret_val : str + if isinstance(game_id, str): + ret_val = game_id + else: + ret_val = str(game_id) + Logger.Log(f"Game Source app ID was unexpected type {type(game_id)}, defaulting to str(game_id)={ret_val}.", logging.WARN) + return ret_val + @staticmethod def _parseDBName(db_name) -> str: ret_val : str diff --git a/src/ogd/common/schemas/configs/IndexingSchema.py b/src/ogd/common/configs/IndexingConfig.py similarity index 88% rename from src/ogd/common/schemas/configs/IndexingSchema.py rename to src/ogd/common/configs/IndexingConfig.py index 3c50224..f7befd0 100644 --- a/src/ogd/common/schemas/configs/IndexingSchema.py +++ b/src/ogd/common/configs/IndexingConfig.py @@ -3,11 +3,11 @@ from pathlib import Path from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.Schema import Schema +from ogd.common.configs.Config import Config from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class FileIndexingSchema(Schema): +class FileIndexingConfig(Config): _DEFAULT_LOCAL_DIR = Path("./data/") _DEFAULT_REMOTE_URL = "https://fieldday-web.ad.education.wisc.edu/opengamedata/" _DEFAULT_TEMPLATE_URL = "https://github.com/opengamedata/opengamedata-samples" @@ -35,9 +35,9 @@ def TemplatesURL(self) -> str: # *** IMPLEMENT ABSTRACT FUNCTIONS *** @classmethod - def Default(cls) -> "FileIndexingSchema": - return FileIndexingSchema( - name = "DefaultFileIndexingSchema", + def Default(cls) -> "FileIndexingConfig": + return FileIndexingConfig( + name = "DefaultFileIndexingConfig", local_dir = cls._DEFAULT_LOCAL_DIR, remote_url = cls._DEFAULT_REMOTE_URL, templates_url = cls._DEFAULT_TEMPLATE_URL, @@ -45,7 +45,7 @@ def Default(cls) -> "FileIndexingSchema": ) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileIndexingSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileIndexingConfig": _local_dir : Path _remote_url : Optional[str] _templates_url : str @@ -60,22 +60,22 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _local_dir = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["LOCAL_DIR"], parser_function=cls._parseLocalDir, - default_value=FileIndexingSchema._DEFAULT_LOCAL_DIR + default_value=FileIndexingConfig._DEFAULT_LOCAL_DIR ) _remote_url = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["REMOTE_URL"], parser_function=cls._parseRemoteURL, - default_value=FileIndexingSchema._DEFAULT_REMOTE_URL + default_value=FileIndexingConfig._DEFAULT_REMOTE_URL ) _templates_url = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["TEMPLATES_URL"], parser_function=cls._parseTemplatesURL, - default_value=FileIndexingSchema._DEFAULT_TEMPLATE_URL + default_value=FileIndexingConfig._DEFAULT_TEMPLATE_URL ) _used = {"LOCAL_DIR", "REMOTE_URL", "TEMPLATES_URL"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return FileIndexingSchema(name=name, local_dir=_local_dir, remote_url=_remote_url, templates_url=_templates_url, other_elements=_leftovers) + return FileIndexingConfig(name=name, local_dir=_local_dir, remote_url=_remote_url, templates_url=_templates_url, other_elements=_leftovers) @property diff --git a/src/ogd/common/schemas/configs/TestConfigSchema.py b/src/ogd/common/configs/TestConfig.py similarity index 90% rename from src/ogd/common/schemas/configs/TestConfigSchema.py rename to src/ogd/common/configs/TestConfig.py index 516bc27..649eccf 100644 --- a/src/ogd/common/schemas/configs/TestConfigSchema.py +++ b/src/ogd/common/configs/TestConfig.py @@ -1,5 +1,5 @@ """ -TestConfigSchema +TestConfig Contains a Schema class for managing config data for testing configurations. In particular, base testing config files always have a `"VERBOSE"` setting, @@ -13,13 +13,13 @@ # import 3rd-party libraries # import OGD libraries -from ogd.common.schemas.Schema import Schema +from ogd.common.configs.Config import Config from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map # import local files -class TestConfigSchema(Schema): +class TestConfig(Config): _DEFAULT_VERBOSE = False _DEFAULT_ENABLED_TESTS = {} @@ -48,8 +48,8 @@ def AsMarkdown(self) -> str: # *** IMPLEMENT ABSTRACT FUNCTIONS *** @classmethod - def Default(cls) -> "TestConfigSchema": - return TestConfigSchema( + def Default(cls) -> "TestConfig": + return TestConfig( name = "DefaultTestConfig", verbose = cls._DEFAULT_VERBOSE, enabled_tests = cls._DEFAULT_ENABLED_TESTS @@ -58,7 +58,7 @@ def Default(cls) -> "TestConfigSchema": # *** PUBLIC STATICS *** @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TestConfigSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TestConfig": _verbose : bool _enabled_tests : Dict[str, bool] @@ -82,7 +82,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"VERBOSE", "ENABLED"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return TestConfigSchema(name=name, verbose=_verbose, enabled_tests=_enabled_tests, other_elements=_leftovers) + return TestConfig(name=name, verbose=_verbose, enabled_tests=_enabled_tests, other_elements=_leftovers) # *** PUBLIC METHODS *** @@ -114,7 +114,7 @@ def _parseEnabledTests(enabled, logger:Optional[logging.Logger]=None) -> Dict[st if isinstance(enabled, dict): ret_val = { str(key) : bool(val) for key, val in enabled.items() } else: - ret_val = TestConfigSchema.Default().EnabledTests + ret_val = TestConfig.Default().EnabledTests _msg = f"Config 'enabled tests' setting was unexpected type {type(enabled)}, defaulting to class default = {ret_val}." if logger: logger.warn(_msg, logging.WARN) diff --git a/src/ogd/common/interfaces/__init__.py b/src/ogd/common/configs/__init__.py similarity index 100% rename from src/ogd/common/interfaces/__init__.py rename to src/ogd/common/configs/__init__.py diff --git a/src/ogd/common/schemas/games/AggregateSchema.py b/src/ogd/common/configs/games/AggregateConfig.py similarity index 75% rename from src/ogd/common/schemas/games/AggregateSchema.py rename to src/ogd/common/configs/games/AggregateConfig.py index a4e32e0..79a0ce7 100644 --- a/src/ogd/common/schemas/games/AggregateSchema.py +++ b/src/ogd/common/configs/games/AggregateConfig.py @@ -2,10 +2,10 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.FeatureSchema import FeatureSchema +from ogd.common.configs.games.FeatureConfig import FeatureConfig from ogd.common.utils.typing import Map -class AggregateSchema(FeatureSchema): +class AggregateConfig(FeatureConfig): def __init__(self, name:str, other_elements:Optional[Map]=None): super().__init__(name=name, other_elements=other_elements) @@ -21,12 +21,12 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "AggregateSchema": - return AggregateSchema(name=name, other_elements=all_elements) + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "AggregateConfig": + return AggregateConfig(name=name, other_elements=all_elements) @classmethod - def Default(cls) -> "AggregateSchema": - return AggregateSchema( - name="DefaultAggregateSchema", + def Default(cls) -> "AggregateConfig": + return AggregateConfig( + name="DefaultAggregateConfig", other_elements={} ) diff --git a/src/ogd/common/schemas/games/DetectorSchema.py b/src/ogd/common/configs/games/DetectorConfig.py similarity index 72% rename from src/ogd/common/schemas/games/DetectorSchema.py rename to src/ogd/common/configs/games/DetectorConfig.py index 33adab5..2350f0a 100644 --- a/src/ogd/common/schemas/games/DetectorSchema.py +++ b/src/ogd/common/configs/games/DetectorConfig.py @@ -3,10 +3,10 @@ from typing import Any, Dict, Optional # import local files from ogd.common.models.enums.ExtractionMode import ExtractionMode -from ogd.common.schemas.games.GeneratorSchema import GeneratorSchema +from ogd.common.configs.games.GeneratorConfig import GeneratorConfig from ogd.common.utils.typing import Map -class DetectorSchema(GeneratorSchema): +class DetectorConfig(GeneratorConfig): def __init__(self, name:str, other_elements:Optional[Map]=None): super().__init__(name=name, other_elements=other_elements) @@ -20,9 +20,9 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorSchema": - return DetectorSchema(name=name, other_elements=all_elements) + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorConfig": + return DetectorConfig(name=name, other_elements=all_elements) @classmethod - def Default(cls) -> "DetectorSchema": - return DetectorSchema(name="DefaultDetectorSchema", other_elements={}) + def Default(cls) -> "DetectorConfig": + return DetectorConfig(name="DefaultDetectorConfig", other_elements={}) diff --git a/src/ogd/common/schemas/games/DetectorMapSchema.py b/src/ogd/common/configs/games/DetectorMapConfig.py similarity index 76% rename from src/ogd/common/schemas/games/DetectorMapSchema.py rename to src/ogd/common/configs/games/DetectorMapConfig.py index 440012e..f496fb3 100644 --- a/src/ogd/common/schemas/games/DetectorMapSchema.py +++ b/src/ogd/common/configs/games/DetectorMapConfig.py @@ -2,12 +2,12 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.DetectorSchema import DetectorSchema +from ogd.common.configs.games.DetectorConfig import DetectorConfig from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class DetectorMapSchema(Schema): +class DetectorMapConfig(Schema): _DEFAULT_PERLEVEL_DETECTORS = {} _DEFAULT_PERCOUNT_DETECTORS = {} _DEFAULT_AGGREGATE_DETECTORS = {} @@ -18,24 +18,24 @@ class DetectorMapSchema(Schema): Dumb struct to contain the specification and config of a set of features for a game. """ def __init__(self, name:str, - perlevel_detectors:Dict[str, DetectorSchema], percount_detectors:Dict[str, DetectorSchema], aggregate_detectors:Dict[str, DetectorSchema], + perlevel_detectors:Dict[str, DetectorConfig], percount_detectors:Dict[str, DetectorConfig], aggregate_detectors:Dict[str, DetectorConfig], other_elements:Optional[Map]=None): - self._perlevel_detectors : Dict[str, DetectorSchema] = perlevel_detectors - self._percount_detectors : Dict[str, DetectorSchema] = percount_detectors - self._aggregate_detectors : Dict[str, DetectorSchema] = aggregate_detectors + self._perlevel_detectors : Dict[str, DetectorConfig] = perlevel_detectors + self._percount_detectors : Dict[str, DetectorConfig] = percount_detectors + self._aggregate_detectors : Dict[str, DetectorConfig] = aggregate_detectors super().__init__(name=name, other_elements=other_elements) @property - def PerLevelDetectors(self) -> Dict[str, DetectorSchema]: + def PerLevelDetectors(self) -> Dict[str, DetectorConfig]: return self._perlevel_detectors @property - def PerCountDetectors(self) -> Dict[str, DetectorSchema]: + def PerCountDetectors(self) -> Dict[str, DetectorConfig]: return self._percount_detectors @property - def AggregateDetectors(self) -> Dict[str, DetectorSchema]: + def AggregateDetectors(self) -> Dict[str, DetectorConfig]: return self._aggregate_detectors # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -50,7 +50,7 @@ def AsMarkdown(self) -> str: return " \n\n".join(feature_summary + feature_list) @property - def AsDict(self) -> Dict[str, Dict[str, DetectorSchema]]: + def AsDict(self) -> Dict[str, Dict[str, DetectorConfig]]: ret_val = { "perlevel" : self.PerLevelDetectors, "per_count" : self.PerCountDetectors, @@ -59,10 +59,10 @@ def AsDict(self) -> Dict[str, Dict[str, DetectorSchema]]: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorMapSchema": - _perlevel_detectors : Dict[str, DetectorSchema] - _percount_detectors : Dict[str, DetectorSchema] - _aggregate_detectors : Dict[str, DetectorSchema] + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "DetectorMapConfig": + _perlevel_detectors : Dict[str, DetectorConfig] + _percount_detectors : Dict[str, DetectorConfig] + _aggregate_detectors : Dict[str, DetectorConfig] if not isinstance(all_elements, dict): all_elements = {} @@ -85,14 +85,14 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"perlevel", "per_level", "per_count", "percount", "aggregate"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return DetectorMapSchema(name=name, perlevel_detectors=_perlevel_detectors, + return DetectorMapConfig(name=name, perlevel_detectors=_perlevel_detectors, percount_detectors=_percount_detectors, aggregate_detectors=_aggregate_detectors, other_elements=_leftovers) @classmethod - def Default(cls) -> "DetectorMapSchema": - return DetectorMapSchema( - name="DefaultDetectorMapSchema", + def Default(cls) -> "DetectorMapConfig": + return DetectorMapConfig( + name="DefaultDetectorMapConfig", perlevel_detectors=cls._DEFAULT_PERLEVEL_DETECTORS, percount_detectors=cls._DEFAULT_PERCOUNT_DETECTORS, aggregate_detectors=cls._DEFAULT_AGGREGATE_DETECTORS, @@ -115,30 +115,30 @@ def Default(cls) -> "DetectorMapSchema": # *** PRIVATE STATICS *** @staticmethod - def _parsePerLevelDetectors(perlevels) -> Dict[str, DetectorSchema]: - ret_val : Dict[str, DetectorSchema] + def _parsePerLevelDetectors(perlevels) -> Dict[str, DetectorConfig]: + ret_val : Dict[str, DetectorConfig] if isinstance(perlevels, dict): - ret_val = { key : DetectorSchema(name=key, all_elements=val) for key,val in perlevels.items() } + ret_val = { key : DetectorConfig(name=key, all_elements=val) for key,val in perlevels.items() } else: ret_val = {} Logger.Log("Per-level detectors map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parsePerCountDetectors(percounts) -> Dict[str, DetectorSchema]: - ret_val : Dict[str, DetectorSchema] + def _parsePerCountDetectors(percounts) -> Dict[str, DetectorConfig]: + ret_val : Dict[str, DetectorConfig] if isinstance(percounts, dict): - ret_val = { key : DetectorSchema(name=key, all_elements=val) for key,val in percounts.items() } + ret_val = { key : DetectorConfig(name=key, all_elements=val) for key,val in percounts.items() } else: ret_val = {} Logger.Log("Per-count detectors map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parseAggregateDetectors(aggregates) -> Dict[str, DetectorSchema]: - ret_val : Dict[str, DetectorSchema] + def _parseAggregateDetectors(aggregates) -> Dict[str, DetectorConfig]: + ret_val : Dict[str, DetectorConfig] if isinstance(aggregates, dict): - ret_val = {key : DetectorSchema(name=key, all_elements=val) for key,val in aggregates.items()} + ret_val = {key : DetectorConfig(name=key, all_elements=val) for key,val in aggregates.items()} else: ret_val = {} Logger.Log("Per-count detectors map was not a dict, defaulting to empty dict", logging.WARN) diff --git a/src/ogd/common/schemas/games/FeatureSchema.py b/src/ogd/common/configs/games/FeatureConfig.py similarity index 84% rename from src/ogd/common/schemas/games/FeatureSchema.py rename to src/ogd/common/configs/games/FeatureConfig.py index db25f73..abd63d8 100644 --- a/src/ogd/common/schemas/games/FeatureSchema.py +++ b/src/ogd/common/configs/games/FeatureConfig.py @@ -2,12 +2,12 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.GeneratorSchema import GeneratorSchema +from ogd.common.configs.games.GeneratorConfig import GeneratorConfig from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class SubfeatureSchema(Schema): +class SubfeatureConfig(Schema): _DEFAULT_RETURN_TYPE = "str" _DEFAULT_DESCRIPTION = "Default Subfeature schema object. Does not correspond to any actual data." @@ -37,7 +37,7 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SubfeatureSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SubfeatureConfig": _return_type : str _description : str @@ -58,12 +58,12 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"return_type", "description"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return SubfeatureSchema(name=name, return_type=_return_type, description=_description, other_elements=_leftovers) + return SubfeatureConfig(name=name, return_type=_return_type, description=_description, other_elements=_leftovers) @classmethod - def Default(cls) -> "SubfeatureSchema": - return SubfeatureSchema( - name="DefaultSubfeatureSchema", + def Default(cls) -> "SubfeatureConfig": + return SubfeatureConfig( + name="DefaultSubFeatureConfig", return_type=cls._DEFAULT_RETURN_TYPE, description=cls._DEFAULT_DESCRIPTION, other_elements={} @@ -97,28 +97,28 @@ def _parseDescription(description): # *** PRIVATE METHODS *** -class FeatureSchema(GeneratorSchema): +class FeatureConfig(GeneratorConfig): """Base class for all schemas related to defining feature Extractor configurations. """ # *** BUILT-INS & PROPERTIES *** def __init__(self, name:str, other_elements:Optional[Map]=None): - self._subfeatures : Dict[str, SubfeatureSchema] + self._subfeatures : Dict[str, SubfeatureConfig] self._return_type : str if not isinstance(other_elements, dict): other_elements = {} Logger.Log(f"For {name} Feature config, all_elements was not a dict, defaulting to empty dict", logging.WARN) - self._return_type = FeatureSchema.ElementFromDict(all_elements=other_elements, + self._return_type = FeatureConfig.ElementFromDict(all_elements=other_elements, element_names=["return_type"], - parser_function=FeatureSchema._parseReturnType, + parser_function=FeatureConfig._parseReturnType, default_value="UNKNOWN" ) - self._subfeatures = FeatureSchema.ElementFromDict(all_elements=other_elements, + self._subfeatures = FeatureConfig.ElementFromDict(all_elements=other_elements, element_names=["subfeatures"], - parser_function=FeatureSchema._parseSubfeatures, + parser_function=FeatureConfig._parseSubfeatures, default_value={} ) @@ -132,7 +132,7 @@ def ReturnType(self) -> str: return self._return_type @property - def Subfeatures(self) -> Dict[str, SubfeatureSchema]: + def Subfeatures(self) -> Dict[str, SubfeatureConfig]: return self._subfeatures # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -154,10 +154,10 @@ def _parseReturnType(return_type, feature_name:str=""): return ret_val @staticmethod - def _parseSubfeatures(subfeatures) -> Dict[str, SubfeatureSchema]: - ret_val : Dict[str, SubfeatureSchema] + def _parseSubfeatures(subfeatures) -> Dict[str, SubfeatureConfig]: + ret_val : Dict[str, SubfeatureConfig] if isinstance(subfeatures, dict): - ret_val = {name:SubfeatureSchema.FromDict(name=name, all_elements=elems) for name,elems in subfeatures.items()} + ret_val = {name:SubfeatureConfig.FromDict(name=name, all_elements=elems) for name,elems in subfeatures.items()} else: ret_val = {} Logger.Log(f"Extractor subfeatures was unexpected type {type(subfeatures)}, defaulting to empty list.", logging.WARN) diff --git a/src/ogd/common/schemas/games/FeatureMapSchema.py b/src/ogd/common/configs/games/FeatureMapConfig.py similarity index 77% rename from src/ogd/common/schemas/games/FeatureMapSchema.py rename to src/ogd/common/configs/games/FeatureMapConfig.py index e0c56b2..c06c93d 100644 --- a/src/ogd/common/schemas/games/FeatureMapSchema.py +++ b/src/ogd/common/configs/games/FeatureMapConfig.py @@ -2,13 +2,13 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.AggregateSchema import AggregateSchema -from ogd.common.schemas.games.PerCountSchema import PerCountSchema +from ogd.common.configs.games.AggregateConfig import AggregateConfig +from ogd.common.configs.games.PerCountConfig import PerCountConfig from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class FeatureMapSchema(Schema): +class FeatureMapConfig(Schema): """ Dumb struct to contain the specification and config of a set of features for a game. """ @@ -20,13 +20,13 @@ class FeatureMapSchema(Schema): # *** BUILT-INS & PROPERTIES *** - def __init__(self, name:str, legacy_mode: bool, legacy_perlevel_feats:Dict[str, PerCountSchema], - percount_feats:Dict[str, PerCountSchema], aggregate_feats:Dict[str, AggregateSchema], + def __init__(self, name:str, legacy_mode: bool, legacy_perlevel_feats:Dict[str, PerCountConfig], + percount_feats:Dict[str, PerCountConfig], aggregate_feats:Dict[str, AggregateConfig], other_elements:Optional[Map]=None): self._legacy_mode : bool = legacy_mode - self._legacy_perlevel_feats : Dict[str, PerCountSchema] = legacy_perlevel_feats - self._percount_feats : Dict[str, PerCountSchema] = percount_feats - self._aggregate_feats : Dict[str, AggregateSchema] = aggregate_feats + self._legacy_perlevel_feats : Dict[str, PerCountConfig] = legacy_perlevel_feats + self._percount_feats : Dict[str, PerCountConfig] = percount_feats + self._aggregate_feats : Dict[str, AggregateConfig] = aggregate_feats super().__init__(name=name, other_elements=other_elements) @@ -35,15 +35,15 @@ def LegacyMode(self) -> bool: return self._legacy_mode @property - def LegacyPerLevelFeatures(self) -> Dict[str, PerCountSchema]: + def LegacyPerLevelFeatures(self) -> Dict[str, PerCountConfig]: return self._legacy_perlevel_feats @property - def PerCountFeatures(self) -> Dict[str, PerCountSchema]: + def PerCountFeatures(self) -> Dict[str, PerCountConfig]: return self._percount_feats @property - def AggregateFeatures(self) -> Dict[str, AggregateSchema]: + def AggregateFeatures(self) -> Dict[str, AggregateConfig]: return self._aggregate_feats # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -58,11 +58,11 @@ def AsMarkdown(self) -> str: return " \n\n".join(feature_summary + feature_list) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FeatureMapSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FeatureMapConfig": _legacy_mode : bool - _legacy_perlevel_feats : Dict[str, PerCountSchema] - _percount_feats : Dict[str, PerCountSchema] - _aggregate_feats : Dict[str, AggregateSchema] + _legacy_perlevel_feats : Dict[str, PerCountConfig] + _percount_feats : Dict[str, PerCountConfig] + _aggregate_feats : Dict[str, AggregateConfig] if not isinstance(all_elements, dict): all_elements = {} @@ -90,14 +90,14 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"legacy", "perlevel", "per_count", "aggregate"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return FeatureMapSchema(name=name, legacy_mode=_legacy_mode, legacy_perlevel_feats=_legacy_perlevel_feats, + return FeatureMapConfig(name=name, legacy_mode=_legacy_mode, legacy_perlevel_feats=_legacy_perlevel_feats, percount_feats=_percount_feats, aggregate_feats=_aggregate_feats, other_elements=_leftovers) @classmethod - def Default(cls) -> "FeatureMapSchema": - return FeatureMapSchema( - name="DefaultFeatureMapSchema", + def Default(cls) -> "FeatureMapConfig": + return FeatureMapConfig( + name="DefaultFeatureMapConfig", legacy_mode=cls._DEFAULT_LEGACY_MODE, legacy_perlevel_feats=cls._DEFAULT_LEGACY_FEATS, percount_feats=cls._DEFAULT_PERCOUNT_FEATS, @@ -131,30 +131,30 @@ def _parseLegacyMode(legacy_element) -> bool: return ret_val @staticmethod - def _parsePerLevelFeatures(perlevels) -> Dict[str, PerCountSchema]: - ret_val : Dict[str, PerCountSchema] + def _parsePerLevelFeatures(perlevels) -> Dict[str, PerCountConfig]: + ret_val : Dict[str, PerCountConfig] if isinstance(perlevels, dict): - ret_val = { key : PerCountSchema.FromDict(name=key, all_elements=val) for key,val in perlevels.items() } + ret_val = { key : PerCountConfig.FromDict(name=key, all_elements=val) for key,val in perlevels.items() } else: ret_val = {} Logger.Log("Per-level features map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parsePerCountFeatures(percounts) -> Dict[str, PerCountSchema]: - ret_val : Dict[str, PerCountSchema] + def _parsePerCountFeatures(percounts) -> Dict[str, PerCountConfig]: + ret_val : Dict[str, PerCountConfig] if isinstance(percounts, dict): - ret_val = { key : PerCountSchema.FromDict(name=key, all_elements=val) for key,val in percounts.items() } + ret_val = { key : PerCountConfig.FromDict(name=key, all_elements=val) for key,val in percounts.items() } else: ret_val = {} Logger.Log("Per-count features map was not a dict, defaulting to empty dict", logging.WARN) return ret_val @staticmethod - def _parseAggregateFeatures(aggregates) -> Dict[str, AggregateSchema]: - ret_val : Dict[str, AggregateSchema] + def _parseAggregateFeatures(aggregates) -> Dict[str, AggregateConfig]: + ret_val : Dict[str, AggregateConfig] if isinstance(aggregates, dict): - ret_val = {key : AggregateSchema(name=key, other_elements=val) for key,val in aggregates.items()} + ret_val = {key : AggregateConfig(name=key, other_elements=val) for key,val in aggregates.items()} else: ret_val = {} Logger.Log("Per-count features map was not a dict, defaulting to empty dict", logging.WARN) diff --git a/src/ogd/common/schemas/games/GeneratorSchema.py b/src/ogd/common/configs/games/GeneratorConfig.py similarity index 94% rename from src/ogd/common/schemas/games/GeneratorSchema.py rename to src/ogd/common/configs/games/GeneratorConfig.py index b9d2652..185f4c2 100644 --- a/src/ogd/common/schemas/games/GeneratorSchema.py +++ b/src/ogd/common/configs/games/GeneratorConfig.py @@ -7,7 +7,7 @@ from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class GeneratorSchema(Schema): +class GeneratorConfig(Schema): def __init__(self, name:str, other_elements:Optional[Map]=None): self._enabled : Set[ExtractionMode] self._type_name : str @@ -19,16 +19,16 @@ def __init__(self, name:str, other_elements:Optional[Map]=None): Logger.Log(f"For {name} Extractor config, all_elements was not a dict, defaulting to empty dict", logging.WARN) if "type" in _other_elements.keys(): - self._type_name = GeneratorSchema._parseType(_other_elements['type']) + self._type_name = GeneratorConfig._parseType(_other_elements['type']) else: self._type_name = name if "enabled" in _other_elements.keys(): - self._enabled = GeneratorSchema._parseEnabled(_other_elements['enabled']) + self._enabled = GeneratorConfig._parseEnabled(_other_elements['enabled']) else: self._enabled = {ExtractionMode.DETECTOR, ExtractionMode.SESSION, ExtractionMode.PLAYER, ExtractionMode.POPULATION} Logger.Log(f"{name} config does not have an 'enabled' element; defaulting to enabled=True", logging.WARN) if "description" in _other_elements.keys(): - self._description = GeneratorSchema._parseDescription(_other_elements['description']) + self._description = GeneratorConfig._parseDescription(_other_elements['description']) else: self._description = "No Description" Logger.Log(f"{name} config does not have an 'description' element; defaulting to description='{self._description}'", logging.WARN) diff --git a/src/ogd/common/schemas/games/PerCountSchema.py b/src/ogd/common/configs/games/PerCountConfig.py similarity index 90% rename from src/ogd/common/schemas/games/PerCountSchema.py rename to src/ogd/common/configs/games/PerCountConfig.py index 374e9f0..0e39108 100644 --- a/src/ogd/common/schemas/games/PerCountSchema.py +++ b/src/ogd/common/configs/games/PerCountConfig.py @@ -2,11 +2,11 @@ import logging from typing import Any, Dict, Optional # import local files -from ogd.common.schemas.games.FeatureSchema import FeatureSchema +from ogd.common.configs.games.FeatureConfig import FeatureConfig from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class PerCountSchema(FeatureSchema): +class PerCountConfig(FeatureConfig): _DEFAULT_COUNT = 1 _DEFAULT_PREFIX = "pre" @@ -41,7 +41,7 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "PerCountSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "PerCountConfig": _count : int | str _prefix : str @@ -61,12 +61,12 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"count", "prefix"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return PerCountSchema(name=name, count=_count, prefix=_prefix, other_elements=_leftovers) + return PerCountConfig(name=name, count=_count, prefix=_prefix, other_elements=_leftovers) @classmethod - def Default(cls) -> "PerCountSchema": - return PerCountSchema( - name="DefaultPerCountSchema", + def Default(cls) -> "PerCountConfig": + return PerCountConfig( + name="DefaultPerCountConfig", count=cls._DEFAULT_COUNT, prefix=cls._DEFAULT_PREFIX, other_elements={} diff --git a/src/ogd/common/configs/games/__init__.py b/src/ogd/common/configs/games/__init__.py new file mode 100644 index 0000000..4cf766b --- /dev/null +++ b/src/ogd/common/configs/games/__init__.py @@ -0,0 +1,13 @@ +__all__ = [ + "GeneratorConfig", + "DetectorConfig", + "FeatureConfig", + "AggregateConfig", + "PerCountConfig", +] + +from . import GeneratorConfig +from . import DetectorConfig +from . import FeatureConfig +from . import AggregateConfig +from . import PerCountConfig diff --git a/src/ogd/common/schemas/configs/data_sources/BigQuerySourceSchema.py b/src/ogd/common/configs/storage/BigQueryConfig.py similarity index 86% rename from src/ogd/common/schemas/configs/data_sources/BigQuerySourceSchema.py rename to src/ogd/common/configs/storage/BigQueryConfig.py index 2a99a80..e963a74 100644 --- a/src/ogd/common/schemas/configs/data_sources/BigQuerySourceSchema.py +++ b/src/ogd/common/configs/storage/BigQueryConfig.py @@ -3,10 +3,10 @@ import logging from typing import Any, Dict, Optional, Type # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class BigQuerySchema(DataSourceSchema): +class BigQueryConfig(DataStoreConfig): _DEFAULT_PROJECT_ID = "wcer-field-day-ogd-1798" _DEFAULT_CREDENTIAL = "./config/ogd.json" @@ -41,16 +41,16 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def Default(cls) -> "BigQuerySchema": - return BigQuerySchema( - name="DefaultBigQuerySchema", + def Default(cls) -> "BigQueryConfig": + return BigQueryConfig( + name="DefaultBigQueryConfig", project_id=cls._DEFAULT_PROJECT_ID, credential=cls._DEFAULT_CREDENTIAL, other_elements={} ) @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]) -> "BigQuerySchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]) -> "BigQueryConfig": _project_id : str _credential : Optional[str] @@ -60,17 +60,17 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _project_id = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["PROJECT_ID", "DATASET_ID"], parser_function=cls._parseProjectID, - default_value=BigQuerySchema._DEFAULT_PROJECT_ID + default_value=BigQueryConfig._DEFAULT_PROJECT_ID ) _credential = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["PROJECT_KEY"], parser_function=cls._parseCredential, - default_value=BigQuerySchema._DEFAULT_CREDENTIAL + default_value=BigQueryConfig._DEFAULT_CREDENTIAL ) _used = {"PROJECT_ID", "DATASET_ID", "PROJECT_KEY"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return BigQuerySchema(name=name, project_id=_project_id, credential=_credential, other_elements=_leftovers) + return BigQueryConfig(name=name, project_id=_project_id, credential=_credential, other_elements=_leftovers) # *** PUBLIC STATICS *** diff --git a/src/ogd/common/configs/storage/DataStoreConfig.py b/src/ogd/common/configs/storage/DataStoreConfig.py new file mode 100644 index 0000000..bd7ffd6 --- /dev/null +++ b/src/ogd/common/configs/storage/DataStoreConfig.py @@ -0,0 +1,77 @@ +# import standard libraries +import abc +import logging +from pathlib import Path +from typing import Any, Dict # , overload +# import local files +from ogd.common.configs.Config import Config +from ogd.common.configs.storage.credentials.CredentialConfig import CredentialConfig +from ogd.common.utils.Logger import Logger + + +class DataStoreConfig(Config): + """Dumb struct to contain data pertaining to a data source, which a StorageConnector can connect to. + + Every source has: + - A named "type" to inform what StorageConnector should be instantiated + - A config "name" for use within ogd software for identifying a particular data source config + - A resource "location" for use by the StorageConnector (such as a filename, cloud project name, or database host) + """ + # @overload + # def __init__(self, name:str, other_elements:Dict[str, Any]): ... + + def __init__(self, name:str, other_elements:Dict[str, Any] | Any): + self._source_type : str + # 1. Ensure we've actually got a dict to parse from + if not isinstance(other_elements, dict): + other_elements = {} + Logger.Log(f"For {name} Data Source config, other_elements was not a dict, defaulting to empty dict", logging.WARN) + # 2. Parse standard elements, with legacy elements nested under "else" case. + if "SOURCE_TYPE" in other_elements.keys(): + self._source_type = DataStoreConfig._parseSourceType(other_elements["SOURCE_TYPE"]) + else: + if "DB_TYPE" in other_elements.keys(): + self._source_type = DataStoreConfig._parseSourceType(other_elements["DB_TYPE"]) + else: + self._source_type = "UNKNOWN" + Logger.Log(f"{name} config does not have a 'SOURCE_TYPE' element; defaulting to db_name={self._source_type}", logging.WARN) + + _used = {"SOURCE_TYPE", "DB_TYPE"} + _leftovers = { key : val for key,val in other_elements.items() if key not in _used } + super().__init__(name=name, other_elements=_leftovers) + + @property + def Type(self) -> str: + """The type of source indicated by the data source schema. + + This includes but is not limited to "FIREBASE", "BIGQUERY", and "MySQL" + + :return: A string describing the type of the data source + :rtype: str + """ + return self._source_type + + @property + @abc.abstractmethod + def Location(self) -> str | Path: + pass + + @property + @abc.abstractmethod + def Credential(self) -> CredentialConfig: + pass + + @property + @abc.abstractmethod + def AsConnectionInfo(self) -> str: + pass + + @staticmethod + def _parseSourceType(source_type) -> str: + ret_val : str + if isinstance(source_type, str): + ret_val = source_type + else: + ret_val = str(source_type) + Logger.Log(f"Data Source typename was unexpected type {type(source_type)}, defaulting to str(source_type)={ret_val}.", logging.WARN) + return ret_val diff --git a/src/ogd/common/schemas/configs/data_sources/FileSourceSchema.py b/src/ogd/common/configs/storage/FileStoreConfig.py similarity index 85% rename from src/ogd/common/schemas/configs/data_sources/FileSourceSchema.py rename to src/ogd/common/configs/storage/FileStoreConfig.py index 620eaa9..35736d4 100644 --- a/src/ogd/common/schemas/configs/data_sources/FileSourceSchema.py +++ b/src/ogd/common/configs/storage/FileStoreConfig.py @@ -3,10 +3,10 @@ from typing import Any, Dict, Optional from pathlib import Path # import local files -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class FileSourceSchema(DataSourceSchema): +class FileStoreConfig(DataStoreConfig): _DEFAULT_FOLDER_PATH = Path('./data') _DEFAULT_FILE_NAME = "UNKNOWN.tsv" @@ -49,7 +49,7 @@ def AsConnectionInfo(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileSourceSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "FileStoreConfig": _folder_path : Path _file_name : str @@ -63,22 +63,22 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _folder_path = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["PATH"], parser_function=cls._parseFolder, - default_value=FileSourceSchema._DEFAULT_FOLDER_PATH + default_value=FileStoreConfig._DEFAULT_FOLDER_PATH ) _file_name = cls.ElementFromDict(all_elements=all_elements, logger=logger, element_names=["FILENAME"], parser_function=cls._parseFilename, - default_value=FileSourceSchema._DEFAULT_FILE_NAME + default_value=FileStoreConfig._DEFAULT_FILE_NAME ) _used = {"PATH", "FILENAME"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return FileSourceSchema(name=name, folder_path=_folder_path, file_name=_file_name, other_elements=_leftovers) + return FileStoreConfig(name=name, folder_path=_folder_path, file_name=_file_name, other_elements=_leftovers) @classmethod - def Default(cls) -> "FileSourceSchema": - return FileSourceSchema( - name="DefaultFileSourceSchema", + def Default(cls) -> "FileStoreConfig": + return FileStoreConfig( + name="DefaultFileStoreConfig", folder_path=cls._DEFAULT_FOLDER_PATH, file_name=cls._DEFAULT_FILE_NAME, other_elements={} diff --git a/src/ogd/common/schemas/configs/data_sources/MySQLSourceSchema.py b/src/ogd/common/configs/storage/MySQLConfig.py similarity index 85% rename from src/ogd/common/schemas/configs/data_sources/MySQLSourceSchema.py rename to src/ogd/common/configs/storage/MySQLConfig.py index eee00b1..9e07b60 100644 --- a/src/ogd/common/schemas/configs/data_sources/MySQLSourceSchema.py +++ b/src/ogd/common/configs/storage/MySQLConfig.py @@ -3,13 +3,11 @@ from typing import Any, Dict, Optional, Type # import local files from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig from ogd.common.utils.Logger import Logger -class SSHSchema(Schema): +class SSHConfig(Schema): _DEFAULT_HOST = "127.0.0.1" - _DEFAULT_USER = "DEFAULT USER" - _DEFAULT_PASS = None _DEFAULT_PORT = 22 # *** BUILT-INS & PROPERTIES *** @@ -54,7 +52,7 @@ def AsConnectionInfo(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SSHSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "SSHConfig": _host : Optional[str] _user : Optional[str] _pass : Optional[str] @@ -90,12 +88,12 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _used = {"SSH_HOST", "SSH_USER", "SSH_PW", "SSH_PASS", "SSH_PORT"} _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return SSHSchema(name=name, ssh_host=_host, ssh_user=_user, ssh_pass=_pass, ssh_port=_port, other_elements=_leftovers) + return SSHConfig(name=name, ssh_host=_host, ssh_user=_user, ssh_pass=_pass, ssh_port=_port, other_elements=_leftovers) @classmethod - def Default(cls) -> "SSHSchema": - return SSHSchema( - name="DefaultMySQLSchema", + def Default(cls) -> "SSHConfig": + return SSHConfig( + name="DefaultSSHConfig", ssh_host=cls._DEFAULT_HOST, ssh_user=cls._DEFAULT_USER, ssh_pass=cls._DEFAULT_PASS, @@ -119,26 +117,6 @@ def _parseHost(host) -> Optional[str]: Logger.Log(f"SSH config for host was unexpected type {type(host)}, defaulting to str(host)={ret_val}.", logging.WARN) return ret_val - @staticmethod - def _parseUser(user) -> Optional[str]: - ret_val : Optional[str] - if isinstance(user, str): - ret_val = user - else: - ret_val = str(user) - Logger.Log(f"SSH config for user was unexpected type {type(user)}, defaulting to str(user)={ret_val}.", logging.WARN) - return ret_val - - @staticmethod - def _parsePass(pw) -> Optional[str]: - ret_val : Optional[str] - if isinstance(pw, str): - ret_val = pw - else: - ret_val = str(pw) - Logger.Log(f"SSH config for password was unexpected type {type(pw)}, defaulting to str(pw)=***.", logging.WARN) - return ret_val - @staticmethod def _parsePort(port) -> int: ret_val : int @@ -153,7 +131,7 @@ def _parsePort(port) -> int: # *** PRIVATE METHODS *** -class MySQLSchema(DataSourceSchema): +class MySQLConfig(DataStoreConfig): _DEFAULT_HOST = "127.0.0.1" _DEFAULT_PORT = 22 _DEFAULT_USER = "DEFAULT USER" @@ -161,12 +139,12 @@ class MySQLSchema(DataSourceSchema): # *** BUILT-INS & PROPERTIES *** - def __init__(self, name:str, db_host:str, db_port:int, db_user:str, db_pass:Optional[str], ssh_cfg:SSHSchema, other_elements:Dict[str, Any]): + def __init__(self, name:str, db_host:str, db_port:int, db_user:str, db_pass:Optional[str], ssh_cfg:SSHConfig, other_elements:Dict[str, Any]): self._db_host : str = db_host self._db_port : int = db_port self._db_user : str = db_user self._db_pass : Optional[str] = db_pass - self._ssh_cfg : SSHSchema = ssh_cfg + self._ssh_cfg : SSHConfig = ssh_cfg super().__init__(name=name, other_elements=other_elements) @property @@ -186,11 +164,11 @@ def DBPass(self) -> Optional[str]: return self._db_pass @property - def SSHConfig(self) -> SSHSchema: + def SSHConfig(self) -> SSHConfig: return self._ssh_cfg @property - def SSH(self) -> SSHSchema: + def SSH(self) -> SSHConfig: """Shortened alias for SSHConfig, convenient when using sub-elements of the SSHConfig. :return: The schema describing the configuration for an SSH connection to a data source. @@ -223,12 +201,12 @@ def AsConnectionInfo(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "MySQLSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "MySQLConfig": _db_host : str _db_port : int _db_user : str _db_pass : Optional[str] - _ssh_cfg : SSHSchema + _ssh_cfg : SSHConfig if not isinstance(all_elements, dict): all_elements = {} @@ -262,21 +240,21 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging # TODO : probably shouldn't have keys expected for SSH be hardcoded here, maybe need a way to get back what stuff it didn't use? _ssh_keys = {"SSH_HOST", "SSH_PORT", "SSH_USER", "SSH_PW", "SSH_PASS"} _ssh_elems = { key : all_elements.get(key) for key in _ssh_keys.intersection(all_elements.keys()) } - _ssh_cfg = SSHSchema.FromDict(name=f"{name}-SSH", all_elements=_ssh_elems, logger=logger) + _ssh_cfg = SSHConfig.FromDict(name=f"{name}-SSH", all_elements=_ssh_elems, logger=logger) _used = {"DB_HOST", "DB_PORT", "DB_USER", "DB_PW", "DB_PASS"}.union(_ssh_keys) _leftovers = { key : val for key,val in all_elements.items() if key not in _used } - return MySQLSchema(name=name, db_host=_db_host, db_port=_db_port, db_user=_db_user, db_pass=_db_pass, ssh_cfg=_ssh_cfg, other_elements=_leftovers) + return MySQLConfig(name=name, db_host=_db_host, db_port=_db_port, db_user=_db_user, db_pass=_db_pass, ssh_cfg=_ssh_cfg, other_elements=_leftovers) @classmethod - def Default(cls) -> "MySQLSchema": - return MySQLSchema( - name="DefaultMySQLSchema", + def Default(cls) -> "MySQLConfig": + return MySQLConfig( + name="DefaultMySQLConfig", db_host=cls._DEFAULT_HOST, db_port=cls._DEFAULT_PORT, db_user=cls._DEFAULT_USER, db_pass=cls._DEFAULT_PASS, - ssh_cfg=SSHSchema.Default(), + ssh_cfg=SSHConfig.Default(), other_elements={} ) diff --git a/src/ogd/common/interfaces/outerfaces/__init__.py b/src/ogd/common/configs/storage/__init__.py similarity index 100% rename from src/ogd/common/interfaces/outerfaces/__init__.py rename to src/ogd/common/configs/storage/__init__.py diff --git a/src/ogd/common/configs/storage/credentials/CredentialConfig.py b/src/ogd/common/configs/storage/credentials/CredentialConfig.py new file mode 100644 index 0000000..487aafa --- /dev/null +++ b/src/ogd/common/configs/storage/credentials/CredentialConfig.py @@ -0,0 +1,16 @@ +# import standard libraries +from typing import Any, Dict # , overload +# import local files +from ogd.common.configs.Config import Config + + +class CredentialConfig(Config): + """Dumb struct to contain data pertaining to credentials for accessing a data source. + + In general, a credential can have a key, or a user-password combination. + """ + # @overload + # def __init__(self, name:str, other_elements:Dict[str, Any]): ... + + def __init__(self, name:str, other_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=other_elements) diff --git a/src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py b/src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py new file mode 100644 index 0000000..663dcb4 --- /dev/null +++ b/src/ogd/common/configs/storage/credentials/KeyCredentialConfig.py @@ -0,0 +1,125 @@ +# import standard libraries +import logging +import os +from pathlib import Path +from typing import Any, Dict, Optional +# import local files +from ogd.common.configs.storage.credentials.CredentialConfig import CredentialConfig +from ogd.common.utils.Logger import Logger + + +class KeyCredential(CredentialConfig): + """Dumb struct to contain data pertaining to loading a key credential + """ + _DEFAULT_PATH = "./" + _DEFAULT_FILE = "key.txt" + + def __init__(self, name:str, filename:str, path:Path | str, other_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=other_elements) + if isinstance(path, str): + path = Path(path) + self._path : Path = path + self._file : str = filename + + @property + def File(self) -> str: + return self._file + + @property + def Folder(self) -> Path: + """The path to the folder containing the key credential file. + + :return: The path to the folder containing the key credential file. + :rtype: Path + """ + return self._path + + @property + def Filepath(self) -> Path: + """The full path to the key credential file. + + :return: The full path to the key credential file. + :rtype: Path + """ + return self.Folder / self.File + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + @property + def AsMarkdown(self) -> str: + ret_val : str + + ret_val = f"Key: {self.Filepath}" + return ret_val + + @classmethod + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "KeyCredential": + _file : Optional[str] + _path : Optional[Path] + + if not isinstance(all_elements, dict): + all_elements = {} + _msg = f"For {name} key credential config, all_elements was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + _file = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["FILE", "KEY"], + parser_function=cls._parseFile, + default_value=cls._DEFAULT_FILE + ) + _path = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["PATH"], + parser_function=cls._parsePath, + default_value=cls._DEFAULT_PATH + ) + # if we didn't find a PATH, but the FILE has a '/' in it, + # we should be able to get file separate from path. + if _path is None and _file is not None and "/" in _file: + _full_path = Path(_file) + _path = _full_path.parent + _file = _full_path.name + + _used = {"FILE", "KEY", "PATH"} + _leftovers = { key : val for key,val in all_elements.items() if key not in _used } + return KeyCredential(name=name, filename=_file, path=_path, other_elements=_leftovers) + + @classmethod + def Default(cls) -> "KeyCredential": + return KeyCredential( + name="DefaultKeyCredential", + filename=cls._DEFAULT_FILE, + path=cls._DEFAULT_PATH, + other_elements={} + ) + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + # *** PRIVATE STATICS *** + + @staticmethod + def _parseFile(file) -> str: + ret_val : Optional[str] + if isinstance(file, str): + ret_val = file + else: + ret_val = str(file) + Logger.Log(f"Filename for key credential was unexpected type {type(file)}, defaulting to str(file)={ret_val}.", logging.WARN) + return ret_val + + @staticmethod + def _parsePath(folder) -> Path: + ret_val : Path + if isinstance(folder, Path): + ret_val = folder + if isinstance(folder, str): + ret_val = Path(folder) + else: + ret_val = Path(str(folder)) + Logger.Log(f"Folder for key credential was unexpected type {type(folder)}, defaulting to Path(str(folder))={ret_val}.", logging.WARN) + return ret_val + + # *** PRIVATE METHODS *** diff --git a/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py b/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py new file mode 100644 index 0000000..33f7f0a --- /dev/null +++ b/src/ogd/common/configs/storage/credentials/PasswordCredentialConfig.py @@ -0,0 +1,101 @@ +# import standard libraries +import logging +from typing import Any, Dict, Optional +# import local files +from ogd.common.configs.storage.credentials.CredentialConfig import CredentialConfig +from ogd.common.utils.Logger import Logger + + +class PasswordCredential(CredentialConfig): + """Dumb struct to contain data pertaining to credentials for accessing a data source. + + In general, a credential can have a key, or a user-password combination. + """ + _DEFAULT_USER = "DEFAULT USER" + _DEFAULT_PASS = None + + def __init__(self, name:str, username:str, password:Optional[str], other_elements:Dict[str, Any] | Any): + super().__init__(name=name, other_elements=other_elements) + self._user = username + self._pass = password + + @property + def User(self) -> Optional[str]: + return self._user + + @property + def Pass(self) -> Optional[str]: + return self._pass + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + @property + def AsMarkdown(self) -> str: + ret_val : str + + ret_val = f"User : `{self.User}`\nPass: `****`" + return ret_val + + @classmethod + def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "PasswordCredential": + _user : Optional[str] + _pass : Optional[str] + + if not isinstance(all_elements, dict): + all_elements = {} + _msg = f"For {name} password credential config, all_elements was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + _user = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["USER"], + parser_function=cls._parseUser, + default_value=cls._DEFAULT_USER + ) + _pass = cls.ElementFromDict(all_elements=all_elements, logger=logger, + element_names=["PASS"], + parser_function=cls._parsePass, + default_value=cls._DEFAULT_PASS + ) + + _used = {"USER", "PASS"} + _leftovers = { key : val for key,val in all_elements.items() if key not in _used } + return PasswordCredential(name=name, username=_user, password=_pass, other_elements=_leftovers) + + @classmethod + def Default(cls) -> "PasswordCredential": + return PasswordCredential( + name="DefaultPasswordCredential", + username=cls._DEFAULT_USER, + password=cls._DEFAULT_PASS, + other_elements={} + ) + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + # *** PRIVATE STATICS *** + + @staticmethod + def _parseUser(user) -> Optional[str]: + ret_val : Optional[str] + if isinstance(user, str): + ret_val = user + else: + ret_val = str(user) + Logger.Log(f"User for password credential was unexpected type {type(user)}, defaulting to str(user)={ret_val}.", logging.WARN) + return ret_val + + @staticmethod + def _parsePass(pw) -> Optional[str]: + ret_val : Optional[str] + if isinstance(pw, str): + ret_val = pw + else: + ret_val = str(pw) + Logger.Log(f"Password for password credential was unexpected type {type(pw)}, defaulting to str(pw)=***.", logging.WARN) + return ret_val + + # *** PRIVATE METHODS *** diff --git a/src/ogd/common/schemas/configs/__init__.py b/src/ogd/common/configs/storage/credentials/__init__.py similarity index 100% rename from src/ogd/common/schemas/configs/__init__.py rename to src/ogd/common/configs/storage/credentials/__init__.py diff --git a/src/ogd/common/connectors/StorageConnector.py b/src/ogd/common/connectors/StorageConnector.py new file mode 100644 index 0000000..9177eb4 --- /dev/null +++ b/src/ogd/common/connectors/StorageConnector.py @@ -0,0 +1,113 @@ +"""StorageConnector Module +""" + +# import standard libraries +import abc +import logging + +# import local files +from ogd.common.configs.GameSourceSchema import GameSourceSchema +from ogd.common.utils.Logger import Logger + +class StorageConnector(abc.ABC): + """Base class for all interfaces and outerfaces. + Ensures each inter/outerface can be opened and closed, like most system resources. + + All subclasses must implement the `_open` and `_close` functions. + """ + + # *** ABSTRACTS *** + + @abc.abstractmethod + def _open(self) -> bool: + """Private implementation of the logic for opening a connection to a storage resource + + :return: True if the connection was successful, otherwise False. + :rtype: bool + """ + pass + + @abc.abstractmethod + def _close(self) -> bool: + """Private implementation of the logic for closing a connection to a storage resource + + :return: True if the connection was closed successfully, otherwise False. + :rtype: bool + """ + pass + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, schema:GameSourceSchema): + self._source_schema : GameSourceSchema = schema + self._is_open : bool = False + + def __del__(self): + self.Close() + + @property + def IsOpen(self) -> bool: + """Property to indicate whether a connection with the storage resource is open or not. + + :return: True if there is an open connection to the storage resource, otherwise false. + :rtype: bool + """ + return True if self._is_open else False + + @property + def ResourceName(self) -> str: + return self._source_schema.Name + + @property + def GameSourceSchema(self) -> GameSourceSchema: + return self._source_schema + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + def Open(self, force_reopen:bool = False) -> bool: + """Function to open the connection to a storage resource. + + If the resource was already open, this function (by default) does nothing. + The type of resource is determined by the implementation of a given interface/outerface class. + + :param force_reopen: Force a re-open of the storage resource, if it was already open. Defaults to False + :type force_reopen: bool, optional + :return: True if the resource was successfully opened (or was already open), otherwise False. + :rtype: bool + """ + if not self.IsOpen: + self._is_open = self._open() + elif force_reopen: + self.Close() + self._is_open = self._open() + Logger.Log(f"Successfully force-reopened {self.__class__}", logging.INFO) + return self.IsOpen + + def Close(self, force_close:bool = False) -> bool: + """Function to close the connection to a storage resource. + + If there was no open connection, this function (by default) does nothing. + + :param force_close: Force an attempt to close the resource, even if there is not a known open connection. Defaults to False + :type force_close: bool, optional + :return: True if the resource was successfully closed (or was not open to begin with), otherwise False. + :rtype: bool + """ + ret_val = True + if self.IsOpen: + ret_val = self._close() + elif force_close: + try: + self._close() + except Exception as err: + Logger.Log(f"Encountered an error while force-closing {self.__class__}:\n{err}", logging.WARNING) + else: + Logger.Log(f"Successfully force-closed {self.__class__}", logging.INFO) + + return ret_val + + # *** PRIVATE STATICS *** + + # *** PRIVATE METHODS *** diff --git a/tests/cases/schemas/config/__init__.py b/src/ogd/common/connectors/__init__.py similarity index 100% rename from tests/cases/schemas/config/__init__.py rename to src/ogd/common/connectors/__init__.py diff --git a/src/ogd/common/connectors/filters/collections/FilterCollection.py b/src/ogd/common/connectors/filters/collections/FilterCollection.py index 0d0ad39..d962dc3 100644 --- a/src/ogd/common/connectors/filters/collections/FilterCollection.py +++ b/src/ogd/common/connectors/filters/collections/FilterCollection.py @@ -6,11 +6,12 @@ class FilterCollection: @abc.abstractmethod - def _asDict(self): + def _asDict(self) -> Dict[str, Filter]: pass def __init__(self): pass + @property def AsDict(self) -> Dict[str, Filter]: return self._asDict() \ No newline at end of file diff --git a/src/ogd/common/interfaces/BQFirebaseInterface.py b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py similarity index 98% rename from src/ogd/common/interfaces/BQFirebaseInterface.py rename to src/ogd/common/connectors/interfaces/BQFirebaseInterface.py index 4baa7ce..b2634fe 100644 --- a/src/ogd/common/interfaces/BQFirebaseInterface.py +++ b/src/ogd/common/connectors/interfaces/BQFirebaseInterface.py @@ -4,9 +4,9 @@ from datetime import datetime from typing import Dict, Final, List, Tuple, Optional # import locals -from ogd.common.interfaces.BigQueryInterface import BigQueryInterface +from ogd.common.connectors.interfaces.BigQueryInterface import BigQueryInterface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger AQUALAB_MIN_VERSION : Final[float] = 6.2 diff --git a/src/ogd/common/interfaces/BigQueryCodingInterface.py b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py similarity index 97% rename from src/ogd/common/interfaces/BigQueryCodingInterface.py rename to src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py index 746e70b..c53be27 100644 --- a/src/ogd/common/interfaces/BigQueryCodingInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryCodingInterface.py @@ -1,15 +1,13 @@ -import json import logging import os -from datetime import datetime from google.cloud import bigquery from typing import Dict, List, Tuple, Optional # import locals -from coding.Code import Code -from coding.Coder import Coder -from ogd.common.interfaces.CodingInterface import CodingInterface +from ogd.common.models.coding.Code import Code +from ogd.common.models.coding.Coder import Coder +from ogd.common.connectors.interfaces.CodingInterface import CodingInterface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger # TODO: see about merging this back into BigQueryInterface for a unified interface. diff --git a/src/ogd/common/interfaces/BigQueryInterface.py b/src/ogd/common/connectors/interfaces/BigQueryInterface.py similarity index 97% rename from src/ogd/common/interfaces/BigQueryInterface.py rename to src/ogd/common/connectors/interfaces/BigQueryInterface.py index 0558e61..9e1ee61 100644 --- a/src/ogd/common/interfaces/BigQueryInterface.py +++ b/src/ogd/common/connectors/interfaces/BigQueryInterface.py @@ -6,10 +6,10 @@ from google.api_core.exceptions import BadRequest from typing import Dict, Final, List, Tuple, Optional # import locals -from ogd.common.interfaces.EventInterface import EventInterface +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.schemas.configs.data_sources.BigQuerySourceSchema import BigQuerySchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig from ogd.common.utils.Logger import Logger AQUALAB_MIN_VERSION : Final[float] = 6.2 @@ -31,7 +31,7 @@ def _open(self, force_reopen: bool = False) -> bool: if not self._is_open: if "GITHUB_ACTIONS" in os.environ: self._client = bigquery.Client() - elif isinstance(self._config.Source, BigQuerySchema): + elif isinstance(self._config.Source, BigQueryConfig): os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._config.Source.Credential or "NO CREDENTIAL CONFIGURED!" or f"./{self._game_id}.json" self._client = bigquery.Client() else: @@ -190,7 +190,7 @@ def DBPath(self, min_date:Optional[date]=None, max_date:Optional[date]=None) -> :return: The full path from project ID to table name, if properly set in configuration, else the literal string "INVALID SOURCE SCHEMA". :rtype: str """ - if isinstance(self._config.Source, BigQuerySchema): + if isinstance(self._config.Source, BigQueryConfig): # _current_date = datetime.now().date() date_wildcard = "*" # if min_date is not None and max_date is not None: diff --git a/src/ogd/common/interfaces/CSVInterface.py b/src/ogd/common/connectors/interfaces/CSVInterface.py similarity index 97% rename from src/ogd/common/interfaces/CSVInterface.py rename to src/ogd/common/connectors/interfaces/CSVInterface.py index 61a1bcc..0e16e04 100644 --- a/src/ogd/common/interfaces/CSVInterface.py +++ b/src/ogd/common/connectors/interfaces/CSVInterface.py @@ -5,9 +5,9 @@ from pathlib import Path from typing import Any, Dict, IO, List, Tuple, Optional ## import local files -from ogd.common.interfaces.EventInterface import EventInterface +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.tables.TableSchema import TableSchema from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/interfaces/CodingInterface.py b/src/ogd/common/connectors/interfaces/CodingInterface.py similarity index 95% rename from src/ogd/common/interfaces/CodingInterface.py rename to src/ogd/common/connectors/interfaces/CodingInterface.py index 91edea2..698d09c 100644 --- a/src/ogd/common/interfaces/CodingInterface.py +++ b/src/ogd/common/connectors/interfaces/CodingInterface.py @@ -4,9 +4,9 @@ from typing import Dict, List, Tuple, Optional # import local files -from coding.Code import Code -from coding.Coder import Coder -from ogd.common.interfaces.Interface import Interface +from ogd.common.models.coding.Code import Code +from ogd.common.models.coding.Coder import Coder +from ogd.common.connectors.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.utils.Logger import Logger diff --git a/src/ogd/common/connectors/interfaces/Interface.py b/src/ogd/common/connectors/interfaces/Interface.py new file mode 100644 index 0000000..1404c3c --- /dev/null +++ b/src/ogd/common/connectors/interfaces/Interface.py @@ -0,0 +1,218 @@ +"""DataInterface Module +""" +## import standard libraries +import abc +import logging +from datetime import datetime +from pprint import pformat +from typing import Dict, List, Optional, Tuple, Union + +# import local files +from ogd.common.connectors.filters.collections import * +from ogd.common.connectors.StorageConnector import StorageConnector +from ogd.common.models.Event import Event +from ogd.common.models.EventDataset import EventDataset +from ogd.common.models.FeatureData import FeatureData +from ogd.common.models.FeatureDataset import FeatureDataset +from ogd.common.models.enums.IDMode import IDMode +from ogd.common.models.enums.VersionType import VersionType +from ogd.common.configs.GameSourceSchema import GameSourceSchema +from ogd.common.schemas.tables.EventTableSchema import EventTableSchema +from ogd.common.schemas.tables.FeatureTableSchema import FeatureTableSchema +from ogd.common.utils.SemanticVersion import SemanticVersion +from ogd.common.utils.Logger import Logger + +class Interface(StorageConnector): + """Base class for all connectors that serve as an interface to some IO resource. + + All subclasses must implement the `_availableIDs`, `_availableDates`, `_IDsFromDates`, and `_datesFromIDs` functions. + """ + + # *** ABSTRACTS *** + + @abc.abstractmethod + def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[str]: + """Private implementation of the logic to retrieve all IDs of given mode from the connected storage. + + :param mode: The type of ID to be listed. + :type mode: IDMode + :return: A list of IDs with given mode available through the connected storage. + :rtype: List[str] + """ + pass + + @abc.abstractmethod + def _availableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Dict[str,datetime]: + """Private implementation of the logic to retrieve the full range of dates/times from the connected storage. + + :return: A dict mapping `min` and `max` to the minimum and maximum datetimes + :rtype: Dict[str,datetime] + """ + pass + + @abc.abstractmethod + def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: + pass + + @abc.abstractmethod + def _getEventRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: + pass + + @abc.abstractmethod + def _getFeatureRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[Tuple]: + pass + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, schema:GameSourceSchema, fail_fast:bool): + self._fail_fast = fail_fast + super().__init__(schema=schema) + + def __del__(self): + self.Close() + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + def AvailableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> Optional[List[str]]: + """Retrieve all IDs of given mode from the connected storage. + + :param mode: The type of ID to be listed. + :type mode: IDMode + :return: A list of IDs with given mode available through the connected storage. + :rtype: List[str] + """ + ret_val = None + if self.IsOpen: + _date_clause = f" on date(s) {date_filter}" + _version_clause = f" with version(s) {version_filter}" + _msg = f"Retrieving IDs with {mode} ID mode{_date_clause}{_version_clause} from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + ret_val = self._availableIDs(mode=mode, date_filter=date_filter, version_filter=version_filter) + else: + Logger.Log(f"Can't retrieve list of {mode} IDs from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) + return ret_val + + def AvailableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Union[Dict[str,datetime], Dict[str,None]]: + """Retrieve the full range of dates/times covered by data in the connected storage, subject to given filters. + + Note, this is different from listing the exact dates in which the data exists. + This function gets the range from the earliest instance of an event matching the filters, to the last such instance. + + TODO: Create separate functions for exact dates and date range. + + :return: A dictionary mapping `min` and `max` to the range of dates covering all data for the given IDs/versions + :rtype: Union[Dict[str,datetime], Dict[str,None]] + """ + ret_val = {'min':None, 'max':None} + if self.IsOpen: + _version_clause = f" with version(s) {version_filter}" + _msg = f"Retrieving range of event/feature dates{_version_clause} from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + ret_val = self._availableDates(id_filter=id_filter, version_filter=version_filter) + else: + Logger.Log(f"Could not get full date range from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) + return ret_val + + + def AvailableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: + """Get a list of all versions of given type in the connected storage, subject to ID and date filters. + + :param mode: _description_ + :type mode: VersionType + :param id_filter: _description_ + :type id_filter: IDFilterCollection + :param date_filter: _description_ + :type date_filter: TimingFilterCollection + :return: _description_ + :rtype: List[SemanticVersion | str] + """ + ret_val = [] + if self.IsOpen: + _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving data versions{_date_clause} from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + ret_val = self._availableVersions(mode=mode, id_filter=id_filter, date_filter=date_filter) + else: + Logger.Log(f"Could not retrieve data versions from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) + return ret_val + + def GetEventCollection(self, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection(), event_filter:EventFilterCollection=EventFilterCollection()) -> EventDataset: + _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict | event_filter.AsDict + _events = [] + if self.IsOpen: + if isinstance(self.GameSourceSchema.TableSchemaName, EventTableSchema): + # _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving event data from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + _rows = self._getEventRows(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter, event_filter=event_filter) + _events = self._eventsFromRows(rows=_rows) + else: + Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the given table schema was not for event data!", logging.WARNING, depth=3) + else: + Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) + return EventDataset(events=_events, filters=_filters) + + def GetFeatureCollection(self, id_filter:IDFilterCollection=IDFilterCollection(), date_filter:TimingFilterCollection=TimingFilterCollection(), version_filter:VersioningFilterCollection=VersioningFilterCollection()) -> FeatureDataset: + _filters = id_filter.AsDict | date_filter.AsDict | version_filter.AsDict + _features = [] + if self.IsOpen: + if isinstance(self.GameSourceSchema.TableSchemaName, EventTableSchema): + # _date_clause = f" on date(s) {date_filter}" + _msg = f"Retrieving event data from {self.ResourceName}." + Logger.Log(_msg, logging.INFO, depth=3) + _rows = self._getFeatureRows(id_filter=id_filter, date_filter=date_filter, version_filter=version_filter) + _features = self._featuresFromRows(rows=_rows) + else: + Logger.Log(f"Could not retrieve event data from {self.ResourceName}, the given table schema was not for event data!", logging.WARNING, depth=3) + else: + Logger.Log(f"Could not retrieve feature data from {self.ResourceName}, the storage connection is not open!", logging.WARNING, depth=3) + return FeatureDataset(features=_features, filters=_filters) + + # *** PRIVATE STATICS *** + + # *** PRIVATE METHODS *** + + def _eventsFromRows(self, rows:List[Tuple]) -> List[Event]: + ret_val = [] + + _curr_sess : str = "" + _evt_sess_index : int = 1 + _fallbacks = {"app_id":self._source_schema.GameID} + _table_schema = self.GameSourceSchema.TableSchemaName + if isinstance(_table_schema, EventTableSchema): + for row in rows: + try: + event = _table_schema.RowToEvent(row) + # in case event index was not given, we should fall back on using the order it came to us. + if event.SessionID != _curr_sess: + _curr_sess = event.SessionID + _evt_sess_index = 1 + event.FallbackDefaults(index=_evt_sess_index) + _evt_sess_index += 1 + except Exception as err: + if self._fail_fast: + Logger.Log(f"Error while converting row to Event\nFull error: {err}\nRow data: {pformat(row)}", logging.ERROR, depth=2) + raise err + else: + Logger.Log(f"Error while converting row ({row}) to Event. This row will be skipped.\nFull error: {err}", logging.WARNING, depth=2) + else: + ret_val.append(event) + else: + Logger.Log(f"Could not convert row data to Events, the given table schema was not for event data!", logging.WARNING, depth=3) + return ret_val + + def _featuresFromRows(self, rows:List[Tuple]) -> List[FeatureData]: + """_summary_ + + TODO :implement + + :param rows: _description_ + :type rows: List[Tuple] + :param schema: _description_ + :type schema: FeatureTableSchema + :return: _description_ + :rtype: List[FeatureData] + """ + return [] diff --git a/src/ogd/common/interfaces/MySQLInterface.py b/src/ogd/common/connectors/interfaces/MySQLInterface.py similarity index 68% rename from src/ogd/common/interfaces/MySQLInterface.py rename to src/ogd/common/connectors/interfaces/MySQLInterface.py index 61826da..c086b8f 100644 --- a/src/ogd/common/interfaces/MySQLInterface.py +++ b/src/ogd/common/connectors/interfaces/MySQLInterface.py @@ -6,10 +6,14 @@ from datetime import datetime from typing import Dict, Final, List, Tuple, Optional # import locals -from ogd.common.interfaces.EventInterface import EventInterface +from ogd.common.connectors.filters import * +from ogd.common.connectors.filters.collections import * +from ogd.common.connectors.interfaces.Interface import Interface +from ogd.common.models.enums.FilterMode import FilterMode from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.schemas.configs.data_sources.MySQLSourceSchema import MySQLSchema +from ogd.common.models.enums.VersionType import VersionType +from ogd.common.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.storage.MySQLConfig import MySQLConfig from ogd.common.utils.Logger import Logger @@ -37,7 +41,7 @@ def ConnectDB(schema:GameSourceSchema) -> Tuple[Optional[sshtunnel.SSHTunnelForw tunnel : Optional[sshtunnel.SSHTunnelForwarder] = None db_conn : Optional[connection.MySQLConnection] = None # Logger.Log("Preparing database connection...", logging.INFO) - if schema.Source is not None and isinstance(schema.Source, MySQLSchema): + if schema.Source is not None and isinstance(schema.Source, MySQLConfig): if schema.Source.HasSSH: Logger.Log(f"Preparing to connect to MySQL via SSH, on host {schema.Source.SSH.Host}", level=logging.DEBUG) if (schema.Source.SSH.Host != "" and schema.Source.SSH.User != "" and schema.Source.SSH.Pass != ""): @@ -59,7 +63,7 @@ def ConnectDB(schema:GameSourceSchema) -> Tuple[Optional[sshtunnel.SSHTunnelForw # Function to help connect to a mySQL server. @staticmethod - def _connectToMySQL(login:MySQLSchema, db:str) -> Optional[connection.MySQLConnection]: + def _connectToMySQL(login:MySQLConfig, db:str) -> Optional[connection.MySQLConnection]: """Function to help connect to a mySQL server. Simply tries to make a connection, and prints an error in case of failure. @@ -86,7 +90,7 @@ def _connectToMySQL(login:MySQLSchema, db:str) -> Optional[connection.MySQLConne ## Function to help connect to a mySQL server over SSH. @staticmethod - def _connectToMySQLviaSSH(sql:MySQLSchema, db:str) -> Tuple[Optional[sshtunnel.SSHTunnelForwarder], Optional[connection.MySQLConnection]]: + def _connectToMySQLviaSSH(sql:MySQLConfig, db:str) -> Tuple[Optional[sshtunnel.SSHTunnelForwarder], Optional[connection.MySQLConnection]]: """Function to help connect to a mySQL server over SSH. Simply tries to make a connection, and prints an error in case of failure. @@ -161,7 +165,7 @@ def SELECT(cursor :cursor.MySQLCursor, db_name : str, columns :List[str] = [], filter : Optional[str] = None, sort_columns :Optional[List[str]] = None, sort_direction : str = "ASC", grouping : Optional[str] = None, distinct :bool = False, offset : int = 0, limit : int = -1, - fetch_results :bool = True, params : Tuple[str] = tuple()) -> Optional[List[Tuple]]: + fetch_results :bool = True, params : Tuple = tuple()) -> Optional[List[Tuple]]: """Function to build and execute SELECT statements on a database connection. :param cursor: A database cursor, retrieved from the active connection. @@ -218,15 +222,30 @@ def Query(cursor:cursor.MySQLCursor, query:str, params:Optional[Tuple], fetch_re Logger.Log(f"Query fetch completed, total query time: {time_delta} to get {len(result) if result is not None else 0:d} rows", logging.DEBUG) return result -class MySQLInterface(EventInterface): +class MySQLFilters: + @staticmethod + def FilterToMySQL(filter:Filter, column_name:str): + if isinstance(filter, NoFilter): + return "" + elif isinstance(filter, MinFilter): + return f"{column_name} > {filter.Min}" if filter.FilterMode == FilterMode.INCLUDE else f"{column_name} < {filter.Min}" + elif isinstance(filter, MaxFilter): + return f"{column_name} < {filter.Max}" if filter.FilterMode == FilterMode.INCLUDE else f"{column_name} > {filter.Max}" + elif isinstance(filter, MinMaxFilter): + return f"{filter.Min} < {column_name} AND {column_name} < {filter.Max}" if filter.FilterMode == FilterMode.INCLUDE else f"{filter.Min} > {column_name} AND {column_name} > {filter.Max}" + elif isinstance(filter, SetFilter): + set_str = ','.join(filter.Set) + return f"{column_name} IN ({set_str})" if filter.FilterMode == FilterMode.INCLUDE else f"{column_name} NOT IN ({set_str})" + +class MySQLInterface(Interface): # *** BUILT-INS & PROPERTIES *** - def __init__(self, game_id:str, config:GameSourceSchema, fail_fast:bool): + def __init__(self, schema:GameSourceSchema, fail_fast:bool): self._tunnel : Optional[sshtunnel.SSHTunnelForwarder] = None self._db : Optional[connection.MySQLConnection] = None self._db_cursor : Optional[cursor.MySQLCursor] = None - super().__init__(game_id=game_id, config=config, fail_fast=fail_fast) + super().__init__(schema=schema, fail_fast=fail_fast) self.Open() # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -237,8 +256,8 @@ def _open(self, force_reopen:bool = False) -> bool: self.Open(force_reopen=False) if not self._is_open: start = datetime.now() - if isinstance(self._config.Source, MySQLSchema): - self._tunnel, self._db = SQL.ConnectDB(schema=self._config) + if isinstance(self.GameSourceSchema.Source, MySQLConfig): + self._tunnel, self._db = SQL.ConnectDB(schema=self.GameSourceSchema) if self._db is not None: self._db_cursor = self._getCursor() self._is_open = True @@ -250,7 +269,7 @@ def _open(self, force_reopen:bool = False) -> bool: SQL.disconnectMySQL(tunnel=self._tunnel, db=self._db) return False else: - Logger.Log(f"Unable to open MySQL interface, the schema has invalid type {type(self._config)}", logging.ERROR) + Logger.Log(f"Unable to open MySQL interface, the game source schema has invalid type {type(self.GameSourceSchema)}", logging.ERROR) SQL.disconnectMySQL(tunnel=self._tunnel, db=self._db) return False else: @@ -262,18 +281,24 @@ def _close(self) -> bool: self._is_open = False return True - def _allIDs(self) -> List[str]: - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + def _availableIDs(self, mode:IDMode, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection) -> List[str]: + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName - sess_id_col : str = self._TableSchema.SessionIDColumn or "session_id" + sess_id_col : str = self.GameSourceSchema.TableSchema.SessionIDColumn or "session_id" filters : List[str] = [] params : List[str] = [] - if _table_name != self._game_id: + # 1. If we're in shared table, then need to filter on game ID + if _table_name != self.GameSourceSchema.GameID: filters.append(f"`app_id`=%s") - params.append(self._game_id) + params.append(self.GameSourceSchema.GameID) + # 2. Sort out filters from date_filter + + # 3. Sort out filters from version_filter + + # 4. Combine filters & execute filter_clause = " AND ".join(filters) data = SQL.SELECT(cursor =self._db_cursor, db_name=_db_name, table =_table_name, @@ -284,18 +309,50 @@ def _allIDs(self) -> List[str]: Logger.Log(f"Could not get list of all session ids, MySQL connection is not open.", logging.WARN) return [] - def _fullDateRange(self) -> Dict[str,datetime]: + # def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> List[str]: + # ret_val = [] + # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): + # # alias long setting names. + # _db_name : str = self.GameSourceSchema.DatabaseName + # _table_name : str = self.GameSourceSchema.TableName + + # # prep filter strings + # filters = [] + # params = [] + # if _table_name != self._game_id: + # filters.append(f"`app_id`=%s") + # params.append(self._game_id) + # # if versions is not None and versions is not []: + # # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") + # filters.append(f"`{self._TableSchema.EventSequenceIndexColumn}`='0'") + # filters.append(f"(`server_time` BETWEEN '{min.isoformat()}' AND '{max.isoformat()}')") + # filter_clause = " AND ".join(filters) + + # # run query + # # We grab the ids for all sessions that have 0th move in the proper date range. + # sess_id_col = self._TableSchema.SessionIDColumn or "`session_id`" + # sess_ids_raw = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, + # columns=[sess_id_col], filter=filter_clause, + # sort_columns=[sess_id_col], sort_direction="ASC", distinct=True, + # params=tuple(params)) + # if sess_ids_raw is not None: + # ret_val = [str(sess[0]) for sess in sess_ids_raw] + # else: + # Logger.Log(f"Could not get session list for {min.isoformat()}-{max.isoformat()} range, MySQL connection is not open or config was not for MySQL.", logging.WARN) + # return ret_val + + def _availableDates(self, id_filter:IDFilterCollection, version_filter:VersioningFilterCollection) -> Dict[str,datetime]: ret_val = {'min':datetime.now(), 'max':datetime.now()} - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName # prep filter strings filters = [] params = [] - if _table_name != self._game_id: + if _table_name != self.GameSourceSchema.GameID: filters.append(f"`app_id`=%s") - params.append(self._game_id) + params.append(self.GameSourceSchema.GameID) filter_clause = " AND ".join(filters) # run query @@ -308,24 +365,62 @@ def _fullDateRange(self) -> Dict[str,datetime]: Logger.Log(f"Could not get full date range, MySQL connection is not open or config was not for MySQL.", logging.WARN) return ret_val - def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None, exclude_rows:Optional[List[str]]=None) -> List[Tuple]: + # def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Dict[str, datetime]: + # ret_val = {'min':datetime.now(), 'max':datetime.now()} + # if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): + # # alias long setting names. + # _db_name : str = self.GameSourceSchema.DatabaseName + # _table_name : str = self.GameSourceSchema.TableName + + # # prep filter strings + # filters = [] + # params = tuple() + # if _table_name != self._game_id: + # filters.append(f"`app_id`=%s") + # params = tuple(self._game_id) + # # if versions is not None and versions is not []: + # # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") + # ids_string = ','.join([f"'{x}'" for x in id_list]) + # if id_mode == IDMode.SESSION: + # sess_id_col = self._TableSchema.SessionIDColumn or "session_id" + # filters.append(f"{sess_id_col} IN ({ids_string})") + # elif id_mode == IDMode.USER: + # play_id_col = self._TableSchema.UserIDColumn or "player_id" + # filters.append(f"`{play_id_col}` IN ({ids_string})") + # else: + # raise ValueError("Invalid IDMode in MySQLInterface!") + # filter_clause = " AND ".join(filters) + # # run query + # result = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, + # columns=['MIN(server_time)', 'MAX(server_time)'], filter=filter_clause, + # params=params) + # if result is not None: + # ret_val = {'min':result[0][0], 'max':result[0][1]} + # else: + # Logger.Log(f"Could not get date range for {len(id_list)} sessions, MySQL connection is not open.", logging.WARN) + # return ret_val + + def _availableVersions(self, mode:VersionType, id_filter:IDFilterCollection, date_filter:TimingFilterCollection) -> List[SemanticVersion | str]: + return [] + + def _getEventRows(self, id_filter:IDFilterCollection, date_filter:TimingFilterCollection, version_filter:VersioningFilterCollection, event_filter:EventFilterCollection) -> List[Tuple]: ret_val = [] # grab data for the given session range. Sort by event time, so - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): + if self._db_cursor is not None and isinstance(self.GameSourceSchema.Source, MySQLConfig): # filt = f"app_id='{self._game_id}' AND (session_id BETWEEN '{next_slice[0]}' AND '{next_slice[-1]}'){ver_filter}" - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName + _db_name : str = self.GameSourceSchema.DatabaseName + _table_name : str = self.GameSourceSchema.TableName - sess_id_col = self._TableSchema.SessionIDColumn or 'session_id' - play_id_col = self._TableSchema.UserIDColumn or 'player_id' - seq_idx_col = self._TableSchema.EventSequenceIndexColumn or 'session_n' - evt_nam_col = self._TableSchema.EventNameColumn or "event_name" + sess_id_col = self.GameSourceSchema.TableSchema.SessionIDColumn or 'session_id' + play_id_col = self.GameSourceSchema.TableSchema.UserIDColumn or 'player_id' + seq_idx_col = self.GameSourceSchema.TableSchema.EventSequenceIndexColumn or 'session_n' + evt_nam_col = self.GameSourceSchema.TableSchema.EventNameColumn or "event_name" filters = [] params = [] - if _table_name != self._game_id: + if _table_name != self.GameSourceSchema.GameID: filters.append(f"`app_id`=%s") - params.append(self._game_id) + params.append(self.GameSourceSchema.GameID) # if versions is not None and versions is not []: # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") id_param_string = ",".join( [f"%s"]*len(id_list) ) @@ -353,73 +448,6 @@ def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, version Logger.Log(f"Could not get data for {len(id_list)} sessions, MySQL connection is not open or config was not for MySQL.", logging.WARN) return ret_val - def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> List[str]: - ret_val = [] - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): - # alias long setting names. - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName - - # prep filter strings - filters = [] - params = [] - if _table_name != self._game_id: - filters.append(f"`app_id`=%s") - params.append(self._game_id) - # if versions is not None and versions is not []: - # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") - filters.append(f"`{self._TableSchema.EventSequenceIndexColumn}`='0'") - filters.append(f"(`server_time` BETWEEN '{min.isoformat()}' AND '{max.isoformat()}')") - filter_clause = " AND ".join(filters) - - # run query - # We grab the ids for all sessions that have 0th move in the proper date range. - sess_id_col = self._TableSchema.SessionIDColumn or "`session_id`" - sess_ids_raw = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, - columns=[sess_id_col], filter=filter_clause, - sort_columns=[sess_id_col], sort_direction="ASC", distinct=True, - params=tuple(params)) - if sess_ids_raw is not None: - ret_val = [str(sess[0]) for sess in sess_ids_raw] - else: - Logger.Log(f"Could not get session list for {min.isoformat()}-{max.isoformat()} range, MySQL connection is not open or config was not for MySQL.", logging.WARN) - return ret_val - - def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Dict[str, datetime]: - ret_val = {'min':datetime.now(), 'max':datetime.now()} - if self._db_cursor is not None and isinstance(self._config.Source, MySQLSchema): - # alias long setting names. - _db_name : str = self._config.DatabaseName - _table_name : str = self._config.TableName - - # prep filter strings - filters = [] - params = tuple() - if _table_name != self._game_id: - filters.append(f"`app_id`=%s") - params = tuple(self._game_id) - # if versions is not None and versions is not []: - # filters.append(f"app_version in ({','.join([str(version) for version in versions])})") - ids_string = ','.join([f"'{x}'" for x in id_list]) - if id_mode == IDMode.SESSION: - sess_id_col = self._TableSchema.SessionIDColumn or "session_id" - filters.append(f"{sess_id_col} IN ({ids_string})") - elif id_mode == IDMode.USER: - play_id_col = self._TableSchema.UserIDColumn or "player_id" - filters.append(f"`{play_id_col}` IN ({ids_string})") - else: - raise ValueError("Invalid IDMode in MySQLInterface!") - filter_clause = " AND ".join(filters) - # run query - result = SQL.SELECT(cursor=self._db_cursor, db_name=_db_name, table=_table_name, - columns=['MIN(server_time)', 'MAX(server_time)'], filter=filter_clause, - params=params) - if result is not None: - ret_val = {'min':result[0][0], 'max':result[0][1]} - else: - Logger.Log(f"Could not get date range for {len(id_list)} sessions, MySQL connection is not open.", logging.WARN) - return ret_val - # *** PUBLIC STATICS *** # *** PUBLIC METHODS *** diff --git a/src/ogd/common/interfaces/outerfaces/DataOuterface.py b/src/ogd/common/connectors/outerfaces/DataOuterface.py similarity index 98% rename from src/ogd/common/interfaces/outerfaces/DataOuterface.py rename to src/ogd/common/connectors/outerfaces/DataOuterface.py index 445ba4a..803f584 100644 --- a/src/ogd/common/interfaces/outerfaces/DataOuterface.py +++ b/src/ogd/common/connectors/outerfaces/DataOuterface.py @@ -9,7 +9,7 @@ from ogd.common.interfaces.Interface import Interface from ogd.common.models.enums.IDMode import IDMode from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/src/ogd/common/interfaces/outerfaces/DebugOuterface.py b/src/ogd/common/connectors/outerfaces/DebugOuterface.py similarity index 98% rename from src/ogd/common/interfaces/outerfaces/DebugOuterface.py rename to src/ogd/common/connectors/outerfaces/DebugOuterface.py index e7d349a..0f2b361 100644 --- a/src/ogd/common/interfaces/outerfaces/DebugOuterface.py +++ b/src/ogd/common/connectors/outerfaces/DebugOuterface.py @@ -7,7 +7,7 @@ # import OGD files from ogd.common.interfaces.outerfaces.DataOuterface import DataOuterface from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/src/ogd/common/interfaces/outerfaces/DictionaryOuterface.py b/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py similarity index 98% rename from src/ogd/common/interfaces/outerfaces/DictionaryOuterface.py rename to src/ogd/common/connectors/outerfaces/DictionaryOuterface.py index 9797e4a..358d93b 100644 --- a/src/ogd/common/interfaces/outerfaces/DictionaryOuterface.py +++ b/src/ogd/common/connectors/outerfaces/DictionaryOuterface.py @@ -5,7 +5,7 @@ # import local files from ogd.common.interfaces.outerfaces.DataOuterface import DataOuterface from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow diff --git a/src/ogd/common/interfaces/outerfaces/TSVOuterface.py b/src/ogd/common/connectors/outerfaces/TSVOuterface.py similarity index 99% rename from src/ogd/common/interfaces/outerfaces/TSVOuterface.py rename to src/ogd/common/connectors/outerfaces/TSVOuterface.py index 70e139b..e9de96d 100644 --- a/src/ogd/common/interfaces/outerfaces/TSVOuterface.py +++ b/src/ogd/common/connectors/outerfaces/TSVOuterface.py @@ -18,10 +18,10 @@ from ogd.common.interfaces.outerfaces.DataOuterface import DataOuterface from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.models.enums.ExportMode import ExportMode -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.configs.GameSourceSchema import GameSourceSchema from ogd.common.schemas.games.GameSchema import GameSchema from ogd.common.schemas.tables.TableSchema import TableSchema -from ogd.common.schemas.configs.IndexingSchema import FileIndexingSchema +from ogd.common.configs.IndexingConfig import FileIndexingConfig from ogd.common.utils import fileio from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import ExportRow @@ -31,12 +31,12 @@ class TSVOuterface(DataOuterface): # *** BUILT-INS & PROPERTIES *** - def __init__(self, game_id:str, config:GameSourceSchema, export_modes:Set[ExportMode], date_range:Dict[str,Optional[datetime]], file_indexing:FileIndexingSchema, extension:str="tsv", dataset_id:Optional[str]=None): + def __init__(self, game_id:str, config:GameSourceSchema, export_modes:Set[ExportMode], date_range:Dict[str,Optional[datetime]], file_indexing:FileIndexingConfig, extension:str="tsv", dataset_id:Optional[str]=None): super().__init__(game_id=game_id, config=config, export_modes=export_modes) self._file_paths : Dict[str,Optional[Path]] = {"population":None, "players":None, "sessions":None, "processed_events":None, "raw_events":None} self._zip_paths : Dict[str,Optional[Path]] = {"population":None, "players":None, "sessions":None, "processed_events":None, "raw_events":None} self._files : Dict[str,Optional[IO]] = {"population":None, "players":None, "sessions":None, "processed_events":None, "raw_events":None} - self._file_indexing : FileIndexingSchema = file_indexing + self._file_indexing : FileIndexingConfig = file_indexing self._data_dir : Path = Path(f"./{self._file_indexing.LocalDirectory}") self._game_data_dir : Path = self._data_dir / self._game_id self._readme_path : Path = self._game_data_dir / "README.md" diff --git a/src/ogd/common/connectors/outerfaces/__init__.py b/src/ogd/common/connectors/outerfaces/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ogd/common/interfaces/EventInterface.py b/src/ogd/common/interfaces/EventInterface.py deleted file mode 100644 index ef270b5..0000000 --- a/src/ogd/common/interfaces/EventInterface.py +++ /dev/null @@ -1,141 +0,0 @@ -## import standard libraries -import abc -import logging -from datetime import datetime -from pprint import pformat -from typing import Any, Dict, List, Tuple, Optional, Union - -# import local files -from ogd.common.interfaces.Interface import Interface -from ogd.common.models.Event import Event -from ogd.common.models.enums.IDMode import IDMode -from ogd.common.schemas.tables.TableSchema import TableSchema -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema -from ogd.common.utils.Logger import Logger - -class EventInterface(Interface): - - # *** ABSTRACTS *** - - @abc.abstractmethod - def _allIDs(self) -> List[str]: - pass - - @abc.abstractmethod - def _fullDateRange(self) -> Dict[str,datetime]: - pass - - @abc.abstractmethod - def _rowsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]] = None, exclude_rows:Optional[List[str]] = None) -> List[Tuple]: - """Function to retrieve all rows for a given set of Session or Player IDs, which can be converted to Event objects by a TableSchema - - :param id_list: List of IDs whose events should be retrieved from the database. These are session IDs if id_mode is SESSION, or user IDs if id_mode is USER. - :type id_list: List[str] - :param id_mode: The mode of ID to use for interpreting the id_list, defaults to IDMode.SESSION - :type id_mode: IDMode, optional - :param versions: List of log_versions to include in the query, any versions not in the list will be ignored. Defaults to None - :type versions: Optional[List[int]], optional - :param exclude_rows: List of event names to be excluded from the query, defaults to None - :type exclude_rows: Optional[List[str]], optional - :return: A list of raw results from the query. - :rtype: List[Tuple] - """ - pass - - @abc.abstractmethod - def _IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]] = None) -> List[str]: - pass - - @abc.abstractmethod - def _datesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]] = None) -> Dict[str,datetime]: - pass - - # *** BUILT-INS & PROPERTIES *** - - def __init__(self, game_id:str, config:GameSourceSchema, fail_fast:bool): - super().__init__(config=config) - self._fail_fast = fail_fast - self._game_id : str = game_id - self._table_schema : TableSchema = TableSchema(schema_name=self._config.TableSchema) - - def __del__(self): - self.Close() - - # *** PUBLIC STATICS *** - - # *** PUBLIC METHODS *** - - def AllIDs(self) -> Optional[List[str]]: - ret_val = None - if self.IsOpen(): - ret_val = self._allIDs() - else: - Logger.Log("Can't retrieve list of all session IDs, the source interface is not open!", logging.WARNING, depth=3) - return ret_val - - def FullDateRange(self) -> Union[Dict[str,datetime], Dict[str,None]]: - ret_val = {'min':None, 'max':None} - if self.IsOpen(): - ret_val = self._fullDateRange() - else: - Logger.Log(f"Could not get full date range, the source interface is not open!", logging.WARNING, depth=3) - return ret_val - - def EventsFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None, exclude_rows:Optional[List[str]]=None) -> Optional[List[Event]]: - ret_val = None - - _curr_sess : str = "" - _evt_sess_index : int = 1 - if self.IsOpen(): - Logger.Log(f"Retrieving rows from IDs with {id_mode.name} ID mode.", logging.DEBUG, depth=3) - _rows = self._rowsFromIDs(id_list=id_list, id_mode=id_mode, versions=versions, exclude_rows=exclude_rows) - _fallbacks = {"app_id":self._game_id} - ret_val = [] - for row in _rows: - try: - next_event = self._table_schema.RowToEvent(row=row, fallbacks=_fallbacks) - # in case event index was not given, we should fall back on using the order it came to us. - if next_event.SessionID != _curr_sess: - _curr_sess = next_event.SessionID - _evt_sess_index = 1 - next_event.FallbackDefaults(index=_evt_sess_index) - _evt_sess_index += 1 - except Exception as err: - if self._fail_fast: - Logger.Log(f"Error while converting row to Event\nFull error: {err}\nRow data: {pformat(row)}", logging.ERROR, depth=2) - raise err - else: - Logger.Log(f"Error while converting row ({row}) to Event. This row will be skipped.\nFull error: {err}", logging.WARNING, depth=2) - else: - ret_val.append(next_event) - else: - Logger.Log(f"Could not retrieve rows for {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) - return ret_val - - def IDsFromDates(self, min:datetime, max:datetime, versions:Optional[List[int]]=None) -> Optional[List[str]]: - ret_val = None - if not self.IsOpen(): - str_min, str_max = min.strftime("%Y%m%d"), max.strftime("%Y%m%d") - Logger.Log(f"Could not retrieve IDs for {str_min}-{str_max}, the source interface is not open!", logging.WARNING, depth=3) - else: - ret_val = self._IDsFromDates(min=min, max=max, versions=versions) - return ret_val - - def DatesFromIDs(self, id_list:List[str], id_mode:IDMode=IDMode.SESSION, versions:Optional[List[int]]=None) -> Union[Dict[str,datetime], Dict[str,None]]: - ret_val = {'min':None, 'max':None} - if not self.IsOpen(): - Logger.Log(f"Could not retrieve date range {len(id_list)} session IDs, the source interface is not open!", logging.WARNING, depth=3) - else: - Logger.Log(f"Retrieving date range from IDs with {id_mode.name} ID mode.", logging.DEBUG, depth=3) - ret_val = self._datesFromIDs(id_list=id_list, id_mode=id_mode, versions=versions) - return ret_val - - # *** PROPERTIES *** - - @property - def _TableSchema(self) -> TableSchema: - return self._table_schema - - # *** PRIVATE STATICS *** - - # *** PRIVATE METHODS *** diff --git a/src/ogd/common/interfaces/Interface.py b/src/ogd/common/interfaces/Interface.py deleted file mode 100644 index 0a2683f..0000000 --- a/src/ogd/common/interfaces/Interface.py +++ /dev/null @@ -1,49 +0,0 @@ -## import standard libraries -import abc -from typing import Any, Dict - -# import local files -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema - -class Interface(abc.ABC): - - # *** ABSTRACTS *** - - @abc.abstractmethod - def _open(self) -> bool: - pass - - @abc.abstractmethod - def _close(self) -> bool: - pass - - # *** BUILT-INS & PROPERTIES *** - - def __init__(self, config:GameSourceSchema): - self._config : GameSourceSchema = config - self._is_open : bool = False - - def __del__(self): - self.Close() - - # *** PUBLIC STATICS *** - - # *** PUBLIC METHODS *** - - def Open(self, force_reopen:bool = False) -> bool: - if (not self._is_open) or force_reopen: - self._is_open = self._open() - return self._is_open - - def IsOpen(self) -> bool: - return True if self._is_open else False - - def Close(self) -> bool: - if self.IsOpen(): - return self._close() - else: - return True - - # *** PRIVATE STATICS *** - - # *** PRIVATE METHODS *** diff --git a/src/ogd/common/models/EventDataset.py b/src/ogd/common/models/EventDataset.py new file mode 100644 index 0000000..d4a7359 --- /dev/null +++ b/src/ogd/common/models/EventDataset.py @@ -0,0 +1,28 @@ +## import standard libraries +from typing import Dict, List +# import local files +from ogd.common.connectors.filters.collections import * +from ogd.common.connectors.filters.Filter import Filter +from ogd.common.models.Event import Event + +class EventDataset: + """Dumb struct that primarily just contains an ordered list of events. + It also contains information on any filters used to define the dataset, such as a date range or set of versions. + """ + + def __init__(self, events:List[Event], filters:Dict[str, Filter]) -> None: + self._events = events + self._filters = filters + + @property + def Events(self) -> List[Event]: + return self._events + + @property + def Filters(self) -> Dict[str, Filter]: + return self._filters + + @property + def AsMarkdown(self): + _filters_clause = "* ".join([f"{key} : {val}" for key,val in self._filters.items()]) + return f"## Event Dataset\n\n{_filters_clause}" \ No newline at end of file diff --git a/src/ogd/common/models/FeatureDataset.py b/src/ogd/common/models/FeatureDataset.py new file mode 100644 index 0000000..fe273f6 --- /dev/null +++ b/src/ogd/common/models/FeatureDataset.py @@ -0,0 +1,28 @@ +## import standard libraries +from typing import Dict, List +# import local files +from ogd.common.connectors.filters.collections import * +from ogd.common.connectors.filters.Filter import Filter +from ogd.common.models.FeatureData import FeatureData + +class FeatureDataset: + """Dumb struct that primarily just contains an ordered list of events. + It also contains information on any filters used to define the dataset, such as a date range or set of versions. + """ + + def __init__(self, features:List[FeatureData], filters:Dict[str, Filter]) -> None: + self._features = features + self._filters = filters + + @property + def Features(self) -> List[FeatureData]: + return self._features + + @property + def Filters(self) -> Dict[str, Filter]: + return self._filters + + @property + def AsMarkdown(self): + _filters_clause = "* ".join([f"{key} : {val}" for key,val in self._filters.items()]) + return f"## Feature Dataset\n\n{_filters_clause}" \ No newline at end of file diff --git a/src/ogd/common/models/enums/ElementMappingType.py b/src/ogd/common/models/enums/ElementMappingType.py new file mode 100644 index 0000000..e548280 --- /dev/null +++ b/src/ogd/common/models/enums/ElementMappingType.py @@ -0,0 +1,33 @@ +"""ElementMappingType Module +""" + +# import standard libraries +from enum import IntEnum + +class ElementMappingType(IntEnum): + """Enum representing the different kinds of column-element mappings in TableSchemas. + + Namely: + + * Dictionary + * List + * Single + """ + SINGLE = 1 + LIST = 2 + DICT = 3 + + def __str__(self): + return self.name + + @classmethod + def FromString(cls, string:str) -> "ElementMappingType": + match string.upper(): + case "SINGLE": + return cls.SINGLE + case "LIST": + return cls.LIST + case "DICT" | "DICTIONARY": + return cls.DICT + case _: + raise ValueError(f"Unrecognized element mapping type {string}!") diff --git a/src/ogd/common/models/enums/IDMode.py b/src/ogd/common/models/enums/IDMode.py index 92a5fa5..5b0b724 100644 --- a/src/ogd/common/models/enums/IDMode.py +++ b/src/ogd/common/models/enums/IDMode.py @@ -1,7 +1,23 @@ +"""IDMode Module +""" + # import standard libraries from enum import IntEnum class IDMode(IntEnum): + """Enum representing the different kinds of IDs in OpenGameData. + + Namely: + + * Session IDs + * User IDs (or Player IDs) + * App IDs (or Game IDs) + + :param IntEnum: _description_ + :type IntEnum: _type_ + :return: _description_ + :rtype: _type_ + """ SESSION = 1 USER = 2 GAME = 3 diff --git a/src/ogd/common/models/enums/TableType.py b/src/ogd/common/models/enums/TableType.py new file mode 100644 index 0000000..b0a9d6a --- /dev/null +++ b/src/ogd/common/models/enums/TableType.py @@ -0,0 +1,30 @@ +"""TableType Module +""" + +# import standard libraries +from enum import IntEnum +from typing import Self + +class TableType(IntEnum): + """Enum representing the different kinds of data table from which data can be retrieved + + Namely: + + * Events + * Features + """ + EVENT = 1 + FEATURE = 2 + + def __str__(self): + return self.name + + @classmethod + def FromString(cls, string:str) -> "TableType": + match string.upper(): + case "EVENT": + return cls.EVENT + case "FEATURE": + return cls.FEATURE + case _: + raise ValueError(f"Unrecognized table type {string}!") diff --git a/src/ogd/common/models/enums/VersionType.py b/src/ogd/common/models/enums/VersionType.py new file mode 100644 index 0000000..4002050 --- /dev/null +++ b/src/ogd/common/models/enums/VersionType.py @@ -0,0 +1,26 @@ +"""VersionType Module +""" + +# import standard libraries +from enum import IntEnum + +class VersionType(IntEnum): + """Enum representing the different kinds of versioning in OpenGameData. + + Namely: + + * Log Version + * App Version + * App Branch + + :param IntEnum: _description_ + :type IntEnum: _type_ + :return: _description_ + :rtype: _type_ + """ + LOG = 1 + APP = 2 + BRANCH = 3 + + def __str__(self): + return self.name diff --git a/src/ogd/common/schemas/configs/data_sources/DataSourceSchema.py b/src/ogd/common/schemas/configs/data_sources/DataSourceSchema.py deleted file mode 100644 index 8a09ca1..0000000 --- a/src/ogd/common/schemas/configs/data_sources/DataSourceSchema.py +++ /dev/null @@ -1,65 +0,0 @@ -# import standard libraries -import abc -import logging -from typing import Any, Dict -# import local files -from ogd.common.schemas.Schema import Schema -from ogd.common.utils.Logger import Logger - -class DataSourceSchema(Schema): - - # *** ABSTRACTS *** - - @property - @abc.abstractmethod - def AsConnectionInfo(self) -> str: - pass - - # *** BUILT-INS & PROPERTIES *** - - def __init__(self, name:str, other_elements:Dict[str, Any]): - self._db_type : str - if not isinstance(other_elements, dict): - other_elements = {} - Logger.Log(f"For {name} Data Source config, other_elements was not a dict, defaulting to empty dict", logging.WARN) - # Parse DB info - self._db_type = DataSourceSchema.ElementFromDict(all_elements=other_elements, logger=None, - element_names=["DB_TYPE"], - parser_function=DataSourceSchema._parseDBType, - default_value="UNKNOWN" - ) - - _used = {"DB_TYPE"} - _leftovers = { key : val for key,val in other_elements.items() if key not in _used } - super().__init__(name=name, other_elements=_leftovers) - - @property - def Type(self) -> str: - """The type of source indicated by the data source schema. - - This includes but is not limited to "FIREBASE", "BIGQUERY", and "MySQL" - - :return: A string describing the type of the data source - :rtype: str - """ - return self._db_type - - # *** IMPLEMENT ABSTRACT FUNCTIONS *** - - # *** PUBLIC STATICS *** - - # *** PUBLIC METHODS *** - - # *** PRIVATE STATICS *** - - @staticmethod - def _parseDBType(db_type) -> str: - ret_val : str - if isinstance(db_type, str): - ret_val = db_type - else: - ret_val = str(db_type) - Logger.Log(f"Data Source DB type was unexpected type {type(db_type)}, defaulting to str(db_type)={ret_val}.", logging.WARN) - return ret_val - - # *** PRIVATE METHODS *** diff --git a/src/ogd/common/schemas/games/GameSchema.py b/src/ogd/common/schemas/games/GameSchema.py index 8361a6e..ed86123 100644 --- a/src/ogd/common/schemas/games/GameSchema.py +++ b/src/ogd/common/schemas/games/GameSchema.py @@ -6,14 +6,14 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union # import local files from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.games.AggregateSchema import AggregateSchema -from ogd.common.schemas.games.DetectorSchema import DetectorSchema -from ogd.common.schemas.games.DetectorMapSchema import DetectorMapSchema +from ogd.common.configs.games.AggregateConfig import AggregateConfig +from ogd.common.configs.games.DetectorConfig import DetectorConfig +from ogd.common.configs.games.DetectorMapConfig import DetectorMapConfig from ogd.common.schemas.games.DataElementSchema import DataElementSchema from ogd.common.schemas.games.EventSchema import EventSchema -from ogd.common.schemas.games.PerCountSchema import PerCountSchema -from ogd.common.schemas.games.FeatureSchema import FeatureSchema -from ogd.common.schemas.games.FeatureMapSchema import FeatureMapSchema +from ogd.common.configs.games.PerCountConfig import PerCountConfig +from ogd.common.configs.games.FeatureConfig import FeatureConfig +from ogd.common.configs.games.FeatureMapConfig import FeatureMapConfig from ogd.common.models.enums.IterationMode import IterationMode from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils import fileio @@ -47,9 +47,9 @@ class GameSchema(Schema): def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], game_state:Map, user_data:Map, event_list:List[EventSchema], - detector_map:Dict[str, Dict[str, DetectorSchema]], - aggregate_feats: Dict[str, AggregateSchema], percount_feats:Dict[str, PerCountSchema], - legacy_perlevel_feats: Dict[str, PerCountSchema], use_legacy_mode:bool, + detector_map:Dict[str, Dict[str, DetectorConfig]], + aggregate_feats: Dict[str, AggregateConfig], percount_feats:Dict[str, PerCountConfig], + legacy_perlevel_feats: Dict[str, PerCountConfig], use_legacy_mode:bool, config:Map, min_level:Optional[int], max_level:Optional[int], other_ranges:Dict[str, range], supported_vers:Optional[List[int]], other_elements:Optional[Map]=None): """Constructor for the GameSchema class. @@ -58,7 +58,7 @@ def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], all features to be extracted. TODO: need to get game_state from schema file, and use a GameStateSchema instead of general Map. - TODO: Use DetectorMapSchema and FeatureMapSchema instead of just dicts... I think. Depending how these all work together. + TODO: Use DetectorMapConfig and FeatureMapConfig instead of just dicts... I think. Depending how these all work together. TODO : make parser functions for config and versions, so we can do ElementFromDict for them as well. :param name: _description_ @@ -74,13 +74,13 @@ def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], :param event_list: _description_ :type event_list: List[EventSchema] :param detector_map: _description_ - :type detector_map: Dict[str, Dict[str, DetectorSchema]] + :type detector_map: Dict[str, Dict[str, DetectorConfig]] :param aggregate_feats: _description_ - :type aggregate_feats: Dict[str, AggregateSchema] + :type aggregate_feats: Dict[str, AggregateConfig] :param percount_feats: _description_ :type percount_feats: Dict[str, PerCountFeatures] :param legacy_perlevel_feats: _description_ - :type legacy_perlevel_feats: Dict[str, PerCountSchema] + :type legacy_perlevel_feats: Dict[str, PerCountConfig] :param use_legacy_mode: _description_ :type use_legacy_mode: bool :param config: _description_ @@ -104,10 +104,10 @@ def __init__(self, name:str, game_id:str, enum_defs:Dict[str, List[str]], self._game_state : Map = game_state self._user_data : Map = user_data self._event_list : List[EventSchema] = event_list - self._detector_map : Dict[str, Dict[str, DetectorSchema]] = detector_map - self._aggregate_feats : Dict[str, AggregateSchema] = aggregate_feats - self._percount_feats : Dict[str, PerCountSchema] = percount_feats - self._legacy_perlevel_feats : Dict[str, PerCountSchema] = legacy_perlevel_feats + self._detector_map : Dict[str, Dict[str, DetectorConfig]] = detector_map + self._aggregate_feats : Dict[str, AggregateConfig] = aggregate_feats + self._percount_feats : Dict[str, PerCountConfig] = percount_feats + self._legacy_perlevel_feats : Dict[str, PerCountConfig] = legacy_perlevel_feats self._legacy_mode : bool = use_legacy_mode self._config : Map = config self._min_level : Optional[int] = min_level @@ -157,7 +157,7 @@ def EventTypes(self) -> List[str]: return [event.Name for event in self.Events] @property - def Detectors(self) -> Dict[str, Dict[str, DetectorSchema]]: + def Detectors(self) -> Dict[str, Dict[str, DetectorConfig]]: """Property for the dictionary of categorized detectors to extract. """ return self._detector_map @@ -172,19 +172,19 @@ def DetectorNames(self) -> List[str]: return ret_val @property - def PerCountDetectors(self) -> Dict[str, DetectorSchema]: + def PerCountDetectors(self) -> Dict[str, DetectorConfig]: """Property for the dictionary of per-custom-count detectors. """ return self.Detectors.get("per_count", {}) @property - def AggregateDetectors(self) -> Dict[str, DetectorSchema]: + def AggregateDetectors(self) -> Dict[str, DetectorConfig]: """Property for the dictionary of aggregate detectors. """ return self.Detectors.get("aggregate", {}) @property - def Features(self) -> Dict[str, Union[Dict[str, AggregateSchema], Dict[str, PerCountSchema]]]: + def Features(self) -> Dict[str, Union[Dict[str, AggregateConfig], Dict[str, PerCountConfig]]]: """Property for the dictionary of categorized features to extract. """ return { 'aggregate' : self._aggregate_feats, 'per_count' : self._percount_feats, 'perlevel' : self._legacy_perlevel_feats } @@ -199,19 +199,19 @@ def FeatureNames(self) -> List[str]: return ret_val @property - def LegacyPerLevelFeatures(self) -> Dict[str,PerCountSchema]: + def LegacyPerLevelFeatures(self) -> Dict[str,PerCountConfig]: """Property for the dictionary of legacy per-level features """ return self._legacy_perlevel_feats @property - def PerCountFeatures(self) -> Dict[str,PerCountSchema]: + def PerCountFeatures(self) -> Dict[str,PerCountConfig]: """Property for the dictionary of per-custom-count features. """ return self._percount_feats @property - def AggregateFeatures(self) -> Dict[str,AggregateSchema]: + def AggregateFeatures(self) -> Dict[str,AggregateConfig]: """Property for the dictionary of aggregate features. """ return self._aggregate_feats @@ -305,10 +305,10 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging _game_state : Dict[str, Any] _user_data : Dict[str, Any] _event_list : List[EventSchema] - _detector_map : Dict[str, Dict[str, DetectorSchema]] - _aggregate_feats : Dict[str, AggregateSchema] = {} - _percount_feats : Dict[str, PerCountSchema] = {} - _legacy_perlevel_feats : Dict[str, PerCountSchema] = {} + _detector_map : Dict[str, Dict[str, DetectorConfig]] + _aggregate_feats : Dict[str, AggregateConfig] = {} + _percount_feats : Dict[str, PerCountConfig] = {} + _legacy_perlevel_feats : Dict[str, PerCountConfig] = {} _legacy_mode : bool _config : Dict[str, Any] _min_level : Optional[int] @@ -349,7 +349,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging parser_function=cls._parseDetectorMap, default_value=cls._DEFAULT_DETECTOR_MAP ) - _detector_map = _detector_map.AsDict # TODO : investigate weird Dict[str, Dict[str, DetectorSchema]] type inference + _detector_map = _detector_map.AsDict # TODO : investigate weird Dict[str, Dict[str, DetectorConfig]] type inference # 4. Get feature information _feat_map = cls.ElementFromDict(all_elements=all_elements, logger=logger, @@ -465,7 +465,7 @@ def DetectorEnabled(self, detector_name:str, iter_mode:IterationMode, extract_mo return False ret_val : bool - _detector_schema : Optional[DetectorSchema] + _detector_schema : Optional[DetectorConfig] match iter_mode: case IterationMode.AGGREGATE: _detector_schema = self.Detectors['aggregate'].get(detector_name) @@ -485,7 +485,7 @@ def FeatureEnabled(self, feature_name:str, iter_mode:IterationMode, extract_mode return feature_name == "legacy" ret_val : bool - _feature_schema : Optional[FeatureSchema] + _feature_schema : Optional[FeatureConfig] match iter_mode: case IterationMode.AGGREGATE: _feature_schema = self.AggregateFeatures.get(feature_name) @@ -500,10 +500,10 @@ def FeatureEnabled(self, feature_name:str, iter_mode:IterationMode, extract_mode ret_val = False return ret_val - def EnabledDetectors(self, iter_modes:Set[IterationMode], extract_modes:Set[ExtractionMode]=set()) -> Dict[str, DetectorSchema]: + def EnabledDetectors(self, iter_modes:Set[IterationMode], extract_modes:Set[ExtractionMode]=set()) -> Dict[str, DetectorConfig]: if self._legacy_mode: return {} - ret_val : Dict[str, DetectorSchema] = {} + ret_val : Dict[str, DetectorConfig] = {} if IterationMode.AGGREGATE in iter_modes: ret_val.update({key:val for key,val in self.AggregateDetectors.items() if val.Enabled.issuperset(extract_modes)}) @@ -511,10 +511,10 @@ def EnabledDetectors(self, iter_modes:Set[IterationMode], extract_modes:Set[Extr ret_val.update({key:val for key,val in self.PerCountDetectors.items() if val.Enabled.issuperset(extract_modes)}) return ret_val - def EnabledFeatures(self, iter_modes:Set[IterationMode]={IterationMode.AGGREGATE, IterationMode.PERCOUNT}, extract_modes:Set[ExtractionMode]=set()) -> Dict[str, FeatureSchema]: + def EnabledFeatures(self, iter_modes:Set[IterationMode]={IterationMode.AGGREGATE, IterationMode.PERCOUNT}, extract_modes:Set[ExtractionMode]=set()) -> Dict[str, FeatureConfig]: if self._legacy_mode: - return {"legacy" : AggregateSchema("legacy", {"type":"legacy", "return_type":None, "description":"", "enabled":True})} if IterationMode.AGGREGATE in iter_modes else {} - ret_val : Dict[str, FeatureSchema] = {} + return {"legacy" : AggregateConfig("legacy", {"type":"legacy", "return_type":None, "description":"", "enabled":True})} if IterationMode.AGGREGATE in iter_modes else {} + ret_val : Dict[str, FeatureConfig] = {} if IterationMode.AGGREGATE in iter_modes: ret_val.update({key:val for key,val in self.AggregateFeatures.items() if val.Enabled.issuperset(extract_modes)}) @@ -609,22 +609,22 @@ def _parseEventList(events_list:Dict[str, Any]) -> List[EventSchema]: return ret_val @staticmethod - def _parseDetectorMap(detector_map:Dict[str, Any]) -> DetectorMapSchema: - ret_val : DetectorMapSchema + def _parseDetectorMap(detector_map:Dict[str, Any]) -> DetectorMapConfig: + ret_val : DetectorMapConfig if isinstance(detector_map, dict): - ret_val = DetectorMapSchema.FromDict(name=f"Detectors", all_elements=detector_map) + ret_val = DetectorMapConfig.FromDict(name=f"Detectors", all_elements=detector_map) else: - ret_val = DetectorMapSchema.FromDict(name="Empty Features", all_elements={}) + ret_val = DetectorMapConfig.FromDict(name="Empty Features", all_elements={}) Logger.Log(f"detector_map was unexpected type {type(detector_map)}, defaulting to empty map.", logging.WARN) return ret_val @staticmethod - def _parseFeatureMap(feature_map:Dict[str, Any]) -> FeatureMapSchema: - ret_val : FeatureMapSchema + def _parseFeatureMap(feature_map:Dict[str, Any]) -> FeatureMapConfig: + ret_val : FeatureMapConfig if isinstance(feature_map, dict): - ret_val = FeatureMapSchema.FromDict(name=f"Features", all_elements=feature_map) + ret_val = FeatureMapConfig.FromDict(name=f"Features", all_elements=feature_map) else: - ret_val = FeatureMapSchema.FromDict(name="Empty Features", all_elements={}) + ret_val = FeatureMapConfig.FromDict(name="Empty Features", all_elements={}) Logger.Log(f"feature_map was unexpected type {type(feature_map)}, defaulting to empty map.", logging.WARN) return ret_val diff --git a/src/ogd/common/schemas/games/__init__.py b/src/ogd/common/schemas/games/__init__.py index c4f4cdb..d92c0ef 100644 --- a/src/ogd/common/schemas/games/__init__.py +++ b/src/ogd/common/schemas/games/__init__.py @@ -1,17 +1,7 @@ __all__ = [ "EventSchema", - "GeneratorSchema", - "DetectorSchema", - "FeatureSchema", - "AggregateSchema", - "PerCountSchema", "GameSchema" ] from . import EventSchema -from . import GeneratorSchema -from . import DetectorSchema -from . import FeatureSchema -from . import AggregateSchema -from . import PerCountSchema from . import GameSchema diff --git a/src/ogd/common/schemas/tables/ColumnMapSchema.py b/src/ogd/common/schemas/tables/ElementMappingSchema.py similarity index 65% rename from src/ogd/common/schemas/tables/ColumnMapSchema.py rename to src/ogd/common/schemas/tables/ElementMappingSchema.py index 2ae1fb3..ce6b2b1 100644 --- a/src/ogd/common/schemas/tables/ColumnMapSchema.py +++ b/src/ogd/common/schemas/tables/ElementMappingSchema.py @@ -2,99 +2,57 @@ import logging from typing import Any, Dict, List, Optional, TypeAlias # import local files +from ogd.common.models.enums.ElementMappingType import ElementMappingType +from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.schemas.Schema import Schema from ogd.common.utils.Logger import Logger from ogd.common.utils.typing import Map -class ColumnMapSchema(Schema): - ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] +ElementMap: TypeAlias = ColumnSchema | List[ColumnSchema] | Dict[str,ColumnSchema] + +class ElementMappingSchema(Schema): + """Simple struct-like class to define a mapping of one or more data table columns to a single GameData element. + + For example, the following JSON-style mapping definition for the EventData element of an Event: + ```json + "event_data" : { "item1":"someColumn", "item2":"someOtherColumn" } + ``` + would result in an ElementMappingSchema with name "EventData", mapping type "DICT" and mapping definition like: + ```python + { + "item1" : , + "item2" : + } + ``` + """ _DEFAULT_MAP = {} _DEFAULT_COLUMN_NAMES = [] # *** BUILT-INS & PROPERTIES *** - def __init__(self, name:str, map:Dict[str, ColumnMapIndex], column_names:List[str], other_elements:Optional[Map]=None): - self._map : Dict[str, ColumnMapSchema.ColumnMapIndex] = map - self._column_names : List[str] = column_names - + def __init__(self, name:str, map:ElementMap, other_elements:Optional[Map]=None): + self._map : ElementMap = map + self._map_type : ElementMappingType + if isinstance(map, ColumnSchema): + self._map_type = ElementMappingType.SINGLE + elif isinstance(map, list): + self._map_type = ElementMappingType.LIST + elif isinstance(map, dict): + self._map_type = ElementMappingType.DICT + else: + raise TypeError(f"The map passed to ElementMappingSchema had invalide type {type(map)}") super().__init__(name=name, other_elements=other_elements) @property - def Map(self) -> Dict[str, ColumnMapIndex]: - """Mapping from Event element names to the indices of the database columns mapped to them. - There may be a single index, indicating a 1-to-1 mapping of a database column to the element; - There may be a list of indices, indicating multiple columns will be concatenated to form the element value; - There may be a further mapping of keys to indicies, indicating multiple columns will be joined into a JSON object, with keys mapped to values found at the columns with given indices. - - :return: The dictionary mapping of element names to indices. - :rtype: Dict[str, Union[int, List[int], Dict[str, int], None]] - """ + def Map(self) -> ElementMap: return self._map @property - def SessionID(self) -> ColumnMapIndex: - return self._map['session_id'] - - @property - def AppID(self) -> ColumnMapIndex: - return self._map['app_id'] - - @property - def Timestamp(self) -> ColumnMapIndex: - return self._map['timestamp'] - - @property - def EventName(self) -> ColumnMapIndex: - return self._map['event_name'] - - @property - def EventData(self) -> ColumnMapIndex: - return self._map['event_data'] - - @property - def EventSource(self) -> ColumnMapIndex: - return self._map['event_source'] - - @property - def AppVersion(self) -> ColumnMapIndex: - return self._map['app_version'] - - @property - def AppBranch(self) -> ColumnMapIndex: - return self._map['app_branch'] - - @property - def LogVersion(self) -> ColumnMapIndex: - return self._map['log_version'] - - @property - def TimeOffset(self) -> ColumnMapIndex: - return self._map['time_offset'] - - @property - def UserID(self) -> ColumnMapIndex: - return self._map['user_id'] - - @property - def UserData(self) -> ColumnMapIndex: - return self._map['user_data'] - - @property - def GameState(self) -> ColumnMapIndex: - return self._map['game_state'] - - @property - def EventSequenceIndex(self) -> ColumnMapIndex: - return self._map['event_sequence_index'] - - @property - def Elements(self) -> Dict[str, str]: - return self._other_elements - - @property - def ElementNames(self) -> List[str]: - return list(self._other_elements.keys()) + def ColumnNames(self) -> List[str]: + match self._map_type: + case ElementMappingType.SINGLE: + # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -120,7 +78,7 @@ def AsMarkdown(self) -> str: return ret_val @classmethod - def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], logger:Optional[logging.Logger]=None)-> "ColumnMapSchema": + def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], logger:Optional[logging.Logger]=None)-> "ElementMappingSchema": """Function to generate a ColumnMapSchema from a JSON object TODO : find a way around using column_names as a direct parameter. @@ -136,7 +94,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], :return: _description_ :rtype: ColumnMapSchema """ - _map : Dict[str, ColumnMapSchema.ColumnMapIndex] = { + _map : Dict[str, ElementMappingSchema.ElementMapIndex] = { "session_id" : None, "app_id" : None, "timestamp" : None, @@ -179,11 +137,11 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], column_names:List[str], Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) _leftovers = { key : val for key,val in all_elements.items() if key not in _map.keys() } - return ColumnMapSchema(name=name, map=_map, column_names=column_names, other_elements=_leftovers) + return ElementMappingSchema(name=name, map=_map, column_names=column_names, other_elements=_leftovers) @classmethod - def Default(cls) -> "ColumnMapSchema": - return ColumnMapSchema( + def Default(cls) -> "ElementMappingSchema": + return ElementMappingSchema( name="DefaultColumnMapSchema", map=cls._DEFAULT_MAP, column_names=cls._DEFAULT_COLUMN_NAMES, diff --git a/src/ogd/common/schemas/tables/EventTableSchema.py b/src/ogd/common/schemas/tables/EventTableSchema.py new file mode 100644 index 0000000..326084e --- /dev/null +++ b/src/ogd/common/schemas/tables/EventTableSchema.py @@ -0,0 +1,335 @@ +"""EventTableSchema Module""" +# import standard libraries +import logging +from collections import Counter +from datetime import datetime, timedelta, timezone +from typing import Any, Dict, List, Tuple, Optional + +# import local files +from ogd.common.models.enums.TableType import TableType +from ogd.common.models.Event import Event, EventSource +from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex, ColumnMapElement +from ogd.common.schemas.tables.ColumnSchema import ColumnSchema +from ogd.common.utils.Logger import Logger +from ogd.common.utils.typing import Map, conversions + +## @class TableSchema +# Dumb struct to hold useful info about the structure of database data +# for a particular game. +# This includes the indices of several important database columns, the names +# of the database columns, the max and min levels in the game, and a list of +# IDs for the game sessions in the given requested date range. +class EventTableSchema(TableSchema): + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema], other_elements:Optional[Map]): + """Constructor for the TableSchema class. + Given a database connection and a game data request, + this retrieves a bit of information from the database to fill in the + class variables. + + :param schema_name: The filename for the table schema JSON. + :type schema_name: str + :param schema_path: Path to find the given table schema file, defaults to "./schemas/table_schemas/" + :type schema_path: str, optional + :param is_legacy: [description], defaults to False + :type is_legacy: bool, optional + """ + super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns, other_elements=other_elements) + + @property + def TimestampIndex(self) -> ColumnMapIndex: + return self._column_map['timestamp'] + + @property + def EventNameIndex(self) -> ColumnMapIndex: + return self._column_map['event_name'] + + @property + def EventDataIndex(self) -> ColumnMapIndex: + return self._column_map['event_data'] + + @property + def EventSourceIndex(self) -> ColumnMapIndex: + return self._column_map['event_source'] + + @property + def TimeOffsetIndex(self) -> ColumnMapIndex: + return self._column_map['time_offset'] + + @property + def UserDataIndex(self) -> ColumnMapIndex: + return self._column_map['user_data'] + + @property + def GameStateIndex(self) -> ColumnMapIndex: + return self._column_map['game_state'] + + @property + def EventSequenceIndexIndex(self) -> ColumnMapIndex: + return self._column_map['event_sequence_index'] + + @property + def TimestampColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.TimestampIndex, int): + ret_val = self.ColumnNames[self.TimestampIndex] + elif isinstance(self.TimestampIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimestampIndex]) + return ret_val + + @property + def EventNameColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventNameIndex, int): + ret_val = self.ColumnNames[self.EventNameIndex] + elif isinstance(self.EventNameIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventNameIndex]) + return ret_val + + @property + def EventDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventDataIndex, int): + ret_val = self.ColumnNames[self.EventDataIndex] + elif isinstance(self.EventDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventDataIndex]) + return ret_val + + @property + def EventSourceColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventSourceIndex, int): + ret_val = self.ColumnNames[self.EventSourceIndex] + elif isinstance(self.EventSourceIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSourceIndex]) + return ret_val + + @property + def TimeOffsetColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.TimeOffsetIndex, int): + ret_val = self.ColumnNames[self.TimeOffsetIndex] + elif isinstance(self.TimeOffsetIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimeOffsetIndex]) + return ret_val + + @property + def UserDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.UserDataIndex, int): + ret_val = self.ColumnNames[self.UserDataIndex] + elif isinstance(self.UserDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserDataIndex]) + return ret_val + + @property + def GameStateColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.GameStateIndex, int): + ret_val = self.ColumnNames[self.GameStateIndex] + elif isinstance(self.GameStateIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.GameStateIndex]) + return ret_val + + @property + def EventSequenceIndexColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventSequenceIndexIndex, int): + ret_val = self.ColumnNames[self.EventSequenceIndexIndex] + elif isinstance(self.EventSequenceIndexIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSequenceIndexIndex]) + return ret_val + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + @classmethod + def Default(cls) -> "EventTableSchema": + return EventTableSchema( + name="DefaultEventTableSchema", + table_type=TableType.EVENT, + column_map={}, + columns=cls._DEFAULT_COLUMNS, + other_elements={} + ) + + @classmethod + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema], logger:Optional[logging.Logger]=None) -> "TableSchema": + _column_map : Dict[str, ColumnMapIndex] = { + "session_id" : None, + "app_id" : None, + "timestamp" : None, + "event_name" : None, + "event_data" : None, + "event_source" : None, + "app_version" : None, + "app_branch" : None, + "log_version" : None, + "time_offset" : None, + "user_id" : None, + "user_data" : None, + "game_state" : None, + "event_sequence_index" : None + } + + column_names = [elem.Name for elem in column_schemas] + if not isinstance(raw_map, dict): + raw_map = {} + _msg = f"For {name} column map schema, raw_map was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + # for each item in the map above that we expect... + for key in _column_map.keys(): + # if the item was found in the given "column_map" dictionary... + if key in raw_map: + # parse what was mapped to the item. Could get back a string, or a list, or a dict... + element = cls._parseElement(elem=map[key], name=key) + # then if we got a string, we just find it in list of column names + if isinstance(element, str): + _column_map[key] = column_names.index(element) + # but if it's a list, we need to get index of each item in list of column names + elif isinstance(element, list): + _column_map[key] = [column_names.index(listelem) for listelem in element] + # but if it's a dict, we need to make equivalent dict mapping the key (new name) to the index (in list of column names) + elif isinstance(element, dict): + _column_map[key] = {key : column_names.index(listelem) for key,listelem in element.items()} + else: + Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) + _leftovers = { key : val for key,val in raw_map.items() if key not in _column_map.keys() } + + return EventTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas, other_elements=_leftovers) + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + _conversion_warnings = Counter() + def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): + """Function to convert a row to an Event, based on the loaded schema. + In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. + If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. + Finally, the concatenated values (or single value) are parsed according to the type required by Event. + One exception: For event_data, we expect to create a Dict object, so each column in the list will have its value parsed according to the type in 'columns', + and placed into a dict mapping the original column name to the parsed value (unless the parsed value is a dict, then it is merged into the top-level dict). + + :param row: The raw row data for an event. Generally assumed to be a tuple, though in principle a list would be fine too. + :type row: Tuple[str] + :param concatenator: A string to use as a separator when concatenating multiple columns into a single Event element. + :type concatenator: str + :return: [description] + :rtype: [type] + """ + # define vars to be passed as params + sess_id : str + app_id : str + tstamp : datetime + ename : str + edata : Map + app_ver : str + app_br : str + log_ver : str + offset : Optional[timezone] + uid : Optional[str] + udata : Optional[Map] + state : Optional[Map] + index : Optional[int] + + # 2) Handle event_data parameter, a special case. + # For this case we've got to parse the json, and then fold in whatever other columns were desired. + # 3) Assign vals to our arg vars and pass to Event ctor. + sess_id = self._getValueFromRow(row=row, indices=self.SessionIDIndex, concatenator=concatenator, fallback=fallbacks.get('session_id')) + if not isinstance(sess_id, str): + if "sess_id" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings["sess_id"] += 1 + sess_id = str(sess_id) + + app_id = self._getValueFromRow(row=row, indices=self.AppIDIndex, concatenator=concatenator, fallback=fallbacks.get('app_id')) + if not isinstance(app_id, str): + if "app_id" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings["app_id"] += 1 + app_id = str(app_id) + + tstamp = self._getValueFromRow(row=row, indices=self.TimestampIndex, concatenator=concatenator, fallback=fallbacks.get('timestamp')) + if not isinstance(tstamp, datetime): + if "timestamp" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) + EventTableSchema._conversion_warnings["timestamp"] += 1 + tstamp = conversions.DatetimeFromString(tstamp) + + ename = self._getValueFromRow(row=row, indices=self.EventNameIndex, concatenator=concatenator, fallback=fallbacks.get('event_name')) + if not isinstance(ename, str): + if "ename" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) + EventTableSchema._conversion_warnings["ename"] += 1 + ename = str(ename) + + datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self.EventDataIndex, concatenator=concatenator, fallback=fallbacks.get('event_data')) + + # TODO: go bac to isostring function; need 0-padding on ms first, though + edata = dict(sorted(datas.items())) # Sort keys alphabetically + + esrc = self._getValueFromRow(row=row, indices=self.EventSourceIndex, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) + if not isinstance(esrc, EventSource): + if "esrc" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) + EventTableSchema._conversion_warnings["esrc"] += 1 + esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME + + app_ver = self._getValueFromRow(row=row, indices=self.AppVersionIndex, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) + if not isinstance(app_ver, str): + if "app_ver" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) + EventTableSchema._conversion_warnings["app_ver"] += 1 + app_ver = str(app_ver) + + app_br = self._getValueFromRow(row=row, indices=self.AppBranchIndex, concatenator=concatenator, fallback=fallbacks.get('app_branch')) + if not isinstance(app_br, str): + if "app_br" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) + EventTableSchema._conversion_warnings["app_br"] += 1 + app_br = str(app_br) + + log_ver = self._getValueFromRow(row=row, indices=self.LogVersionIndex, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) + if not isinstance(log_ver, str): + if "log_ver" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) + EventTableSchema._conversion_warnings["log_ver"] += 1 + log_ver = str(log_ver) + + offset = self._getValueFromRow(row=row, indices=self.TimeOffsetIndex, concatenator=concatenator, fallback=fallbacks.get('time_offset')) + if isinstance(offset, timedelta): + if "offset" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) + EventTableSchema._conversion_warnings["offset"] += 1 + offset = timezone(offset) + + uid = self._getValueFromRow(row=row, indices=self.UserIDIndex, concatenator=concatenator, fallback=fallbacks.get('user_id')) + if uid is not None and not isinstance(uid, str): + if "uid" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) + EventTableSchema._conversion_warnings["uid"] += 1 + uid = str(uid) + + udata = self._getValueFromRow(row=row, indices=self.UserDataIndex, concatenator=concatenator, fallback=fallbacks.get('user_data')) + + state = self._getValueFromRow(row=row, indices=self.GameStateIndex, concatenator=concatenator, fallback=fallbacks.get('game_state')) + + index = self._getValueFromRow(row=row, indices=self.EventSequenceIndexIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) + if index is not None and not isinstance(index, int): + if "index" not in EventTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) + EventTableSchema._conversion_warnings["index"] += 1 + index = int(index) + + return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, + event_name=ename, event_data=edata, event_source=esrc, + app_version=app_ver, app_branch=app_br, log_version=log_ver, + time_offset=offset, user_id=uid, user_data=udata, + game_state=state, event_sequence_index=index) + + # *** PRIVATE STATICS *** diff --git a/src/ogd/common/schemas/tables/FeatureTableSchema.py b/src/ogd/common/schemas/tables/FeatureTableSchema.py new file mode 100644 index 0000000..b57a676 --- /dev/null +++ b/src/ogd/common/schemas/tables/FeatureTableSchema.py @@ -0,0 +1,339 @@ +"""EventTableSchema Module""" +# import standard libraries +import logging +from collections import Counter +from datetime import datetime, timedelta, timezone +from typing import Any, Dict, List, Tuple, Optional + +# import local files +from ogd.common.models.enums.TableType import TableType +from ogd.common.models.FeatureData import FeatureData +from ogd.common.models.Event import Event, EventSource +from ogd.common.schemas.tables.TableSchema import TableSchema, ColumnMapIndex, ColumnMapElement +from ogd.common.schemas.tables.ColumnSchema import ColumnSchema +from ogd.common.utils.typing import Map +from ogd.common.utils.Logger import Logger +from ogd.common.utils.typing import Map, conversions + +## @class TableSchema +class FeatureTableSchema(TableSchema): + """Dumb struct to hold useful info about the structure of feature data for a particular game in a particular database. + This includes the indices of several important database columns, the names + of the database columns, and a list of + IDs for the game sessions in the given requested date range. + + TODO : right now, this is all just a copy of what's in EventTableSchema, need to implement for feature data. + """ + + # *** BUILT-INS & PROPERTIES *** + + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema], other_elements:Optional[Map]): + """Constructor for the TableSchema class. + Given a database connection and a game data request, + this retrieves a bit of information from the database to fill in the + class variables. + + :param schema_name: The filename for the table schema JSON. + :type schema_name: str + :param schema_path: Path to find the given table schema file, defaults to "./schemas/table_schemas/" + :type schema_path: str, optional + :param is_legacy: [description], defaults to False + :type is_legacy: bool, optional + """ + super().__init__(name=name, table_type=table_type, column_map=column_map, columns=columns, other_elements=other_elements) + + @property + def TimestampIndex(self) -> ColumnMapIndex: + return self._column_map['timestamp'] + + @property + def EventNameIndex(self) -> ColumnMapIndex: + return self._column_map['event_name'] + + @property + def EventDataIndex(self) -> ColumnMapIndex: + return self._column_map['event_data'] + + @property + def EventSourceIndex(self) -> ColumnMapIndex: + return self._column_map['event_source'] + + @property + def TimeOffsetIndex(self) -> ColumnMapIndex: + return self._column_map['time_offset'] + + @property + def UserDataIndex(self) -> ColumnMapIndex: + return self._column_map['user_data'] + + @property + def GameStateIndex(self) -> ColumnMapIndex: + return self._column_map['game_state'] + + @property + def EventSequenceIndexIndex(self) -> ColumnMapIndex: + return self._column_map['event_sequence_index'] + + @property + def TimestampColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.TimestampIndex, int): + ret_val = self.ColumnNames[self.TimestampIndex] + elif isinstance(self.TimestampIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimestampIndex]) + return ret_val + + @property + def EventNameColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventNameIndex, int): + ret_val = self.ColumnNames[self.EventNameIndex] + elif isinstance(self.EventNameIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventNameIndex]) + return ret_val + + @property + def EventDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventDataIndex, int): + ret_val = self.ColumnNames[self.EventDataIndex] + elif isinstance(self.EventDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventDataIndex]) + return ret_val + + @property + def EventSourceColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventSourceIndex, int): + ret_val = self.ColumnNames[self.EventSourceIndex] + elif isinstance(self.EventSourceIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSourceIndex]) + return ret_val + + @property + def TimeOffsetColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.TimeOffsetIndex, int): + ret_val = self.ColumnNames[self.TimeOffsetIndex] + elif isinstance(self.TimeOffsetIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.TimeOffsetIndex]) + return ret_val + + @property + def UserDataColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.UserDataIndex, int): + ret_val = self.ColumnNames[self.UserDataIndex] + elif isinstance(self.UserDataIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserDataIndex]) + return ret_val + + @property + def GameStateColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.GameStateIndex, int): + ret_val = self.ColumnNames[self.GameStateIndex] + elif isinstance(self.GameStateIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.GameStateIndex]) + return ret_val + + @property + def EventSequenceIndexColumn(self) -> Optional[str]: + ret_val = None + if isinstance(self.EventSequenceIndexIndex, int): + ret_val = self.ColumnNames[self.EventSequenceIndexIndex] + elif isinstance(self.EventSequenceIndexIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.EventSequenceIndexIndex]) + return ret_val + + # *** IMPLEMENT ABSTRACT FUNCTIONS *** + + @classmethod + def Default(cls) -> "FeatureTableSchema": + return FeatureTableSchema( + name="DefaultFeatureTableSchema", + table_type=TableType.EVENT, + column_map={}, + columns=cls._DEFAULT_COLUMNS, + other_elements={} + ) + + @classmethod + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema], logger:Optional[logging.Logger]=None) -> "TableSchema": + _column_map : Dict[str, ColumnMapIndex] = { + "session_id" : None, + "app_id" : None, + "timestamp" : None, + "event_name" : None, + "event_data" : None, + "event_source" : None, + "app_version" : None, + "app_branch" : None, + "log_version" : None, + "time_offset" : None, + "user_id" : None, + "user_data" : None, + "game_state" : None, + "event_sequence_index" : None + } + + column_names = [elem.Name for elem in column_schemas] + if not isinstance(raw_map, dict): + raw_map = {} + _msg = f"For {name} column map schema, raw_map was not a dict, defaulting to empty dict" + if logger: + logger.warning(_msg) + else: + Logger.Log(_msg, logging.WARN) + # for each item in the map above that we expect... + for key in _column_map.keys(): + # if the item was found in the given "column_map" dictionary... + if key in raw_map: + # parse what was mapped to the item. Could get back a string, or a list, or a dict... + element = cls._parseElement(elem=map[key], name=key) + # then if we got a string, we just find it in list of column names + if isinstance(element, str): + _column_map[key] = column_names.index(element) + # but if it's a list, we need to get index of each item in list of column names + elif isinstance(element, list): + _column_map[key] = [column_names.index(listelem) for listelem in element] + # but if it's a dict, we need to make equivalent dict mapping the key (new name) to the index (in list of column names) + elif isinstance(element, dict): + _column_map[key] = {key : column_names.index(listelem) for key,listelem in element.items()} + else: + Logger.Log(f"Column config does not have a '{key}' element, defaulting to {key} : None", logging.WARN) + _leftovers = { key : val for key,val in raw_map.items() if key not in _column_map.keys() } + + return FeatureTableSchema(name=name, table_type=table_type, column_map=_column_map, columns=column_schemas, other_elements=_leftovers) + + # *** PUBLIC STATICS *** + + # *** PUBLIC METHODS *** + + _conversion_warnings = Counter() + def RowToFeatureData(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}) -> FeatureData: + """Function to convert a row to an Event, based on the loaded schema. + In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. + If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. + Finally, the concatenated values (or single value) are parsed according to the type required by Event. + One exception: For event_data, we expect to create a Dict object, so each column in the list will have its value parsed according to the type in 'columns', + and placed into a dict mapping the original column name to the parsed value (unless the parsed value is a dict, then it is merged into the top-level dict). + + :param row: The raw row data for an event. Generally assumed to be a tuple, though in principle a list would be fine too. + :type row: Tuple[str] + :param concatenator: A string to use as a separator when concatenating multiple columns into a single Event element. + :type concatenator: str + :return: [description] + :rtype: [type] + """ + # define vars to be passed as params + sess_id : str + app_id : str + tstamp : datetime + ename : str + edata : Map + app_ver : str + app_br : str + log_ver : str + offset : Optional[timezone] + uid : Optional[str] + udata : Optional[Map] + state : Optional[Map] + index : Optional[int] + + # 2) Handle event_data parameter, a special case. + # For this case we've got to parse the json, and then fold in whatever other columns were desired. + # 3) Assign vals to our arg vars and pass to Event ctor. + sess_id = self._getValueFromRow(row=row, indices=self.SessionIDIndex, concatenator=concatenator, fallback=fallbacks.get('session_id')) + if not isinstance(sess_id, str): + if "sess_id" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["sess_id"] += 1 + sess_id = str(sess_id) + + app_id = self._getValueFromRow(row=row, indices=self.AppIDIndex, concatenator=concatenator, fallback=fallbacks.get('app_id')) + if not isinstance(app_id, str): + if "app_id" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["app_id"] += 1 + app_id = str(app_id) + + tstamp = self._getValueFromRow(row=row, indices=self.TimestampIndex, concatenator=concatenator, fallback=fallbacks.get('timestamp')) + if not isinstance(tstamp, datetime): + if "timestamp" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema parsed timestamp as {type(tstamp)}, but timestamp should be a datetime", logging.WARN) + FeatureTableSchema._conversion_warnings["timestamp"] += 1 + tstamp = conversions.DatetimeFromString(tstamp) + + ename = self._getValueFromRow(row=row, indices=self.EventNameIndex, concatenator=concatenator, fallback=fallbacks.get('event_name')) + if not isinstance(ename, str): + if "ename" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["ename"] += 1 + ename = str(ename) + + datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self.EventDataIndex, concatenator=concatenator, fallback=fallbacks.get('event_data')) + + # TODO: go bac to isostring function; need 0-padding on ms first, though + edata = dict(sorted(datas.items())) # Sort keys alphabetically + + esrc = self._getValueFromRow(row=row, indices=self.EventSourceIndex, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) + if not isinstance(esrc, EventSource): + if "esrc" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) + FeatureTableSchema._conversion_warnings["esrc"] += 1 + esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME + + app_ver = self._getValueFromRow(row=row, indices=self.AppVersionIndex, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) + if not isinstance(app_ver, str): + if "app_ver" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["app_ver"] += 1 + app_ver = str(app_ver) + + app_br = self._getValueFromRow(row=row, indices=self.AppBranchIndex, concatenator=concatenator, fallback=fallbacks.get('app_branch')) + if not isinstance(app_br, str): + if "app_br" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["app_br"] += 1 + app_br = str(app_br) + + log_ver = self._getValueFromRow(row=row, indices=self.LogVersionIndex, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) + if not isinstance(log_ver, str): + if "log_ver" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["log_ver"] += 1 + log_ver = str(log_ver) + + offset = self._getValueFromRow(row=row, indices=self.TimeOffsetIndex, concatenator=concatenator, fallback=fallbacks.get('time_offset')) + if isinstance(offset, timedelta): + if "offset" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) + FeatureTableSchema._conversion_warnings["offset"] += 1 + offset = timezone(offset) + + uid = self._getValueFromRow(row=row, indices=self.UserIDIndex, concatenator=concatenator, fallback=fallbacks.get('user_id')) + if uid is not None and not isinstance(uid, str): + if "uid" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) + FeatureTableSchema._conversion_warnings["uid"] += 1 + uid = str(uid) + + udata = self._getValueFromRow(row=row, indices=self.UserDataIndex, concatenator=concatenator, fallback=fallbacks.get('user_data')) + + state = self._getValueFromRow(row=row, indices=self.GameStateIndex, concatenator=concatenator, fallback=fallbacks.get('game_state')) + + index = self._getValueFromRow(row=row, indices=self.EventSequenceIndexIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) + if index is not None and not isinstance(index, int): + if "index" not in FeatureTableSchema._conversion_warnings: + Logger.Log(f"{self.Name} {self.TableKind} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) + FeatureTableSchema._conversion_warnings["index"] += 1 + index = int(index) + + return Event(session_id=sess_id, app_id=app_id, timestamp=tstamp, + event_name=ename, event_data=edata, event_source=esrc, + app_version=app_ver, app_branch=app_br, log_version=log_ver, + time_offset=offset, user_id=uid, user_data=udata, + game_state=state, event_sequence_index=index) + + # *** PRIVATE STATICS *** diff --git a/src/ogd/common/schemas/tables/TableSchema.py b/src/ogd/common/schemas/tables/TableSchema.py index 078f01d..a9a60a3 100644 --- a/src/ogd/common/schemas/tables/TableSchema.py +++ b/src/ogd/common/schemas/tables/TableSchema.py @@ -1,35 +1,37 @@ ## import standard libraries -import json +import abc import logging -import re -from datetime import datetime, timedelta, timezone -from json.decoder import JSONDecodeError from pathlib import Path -from typing import Any, Dict, Final, List, Tuple, Optional, Union -## import 3rd-party libraries -from dateutil import parser +from typing import Any, Dict, List, Tuple, Optional, TypeAlias ## import local files -from ogd.core import schemas -from ogd.common.models.Event import Event, EventSource +from ogd.common import schemas +from ogd.common.models.enums.TableType import TableType from ogd.common.schemas.Schema import Schema -from ogd.common.schemas.tables.ColumnMapSchema import ColumnMapSchema from ogd.common.schemas.tables.ColumnSchema import ColumnSchema from ogd.common.utils import fileio from ogd.common.utils.Logger import Logger -from ogd.common.utils.typing import Map +from ogd.common.utils.typing import Map, conversions + +ColumnMapIndex : TypeAlias = Optional[int | List[int] | Dict[str,int]] +ColumnMapElement : TypeAlias = Optional[str | List[str] | Dict[str,str]] ## @class TableSchema -# Dumb struct to hold useful info about the structure of database data -# for a particular game. -# This includes the indices of several important database columns, the names -# of the database columns, the max and min levels in the game, and a list of -# IDs for the game sessions in the given requested date range. class TableSchema(Schema): - _DEFAULT_COLUMNS = [] + """Dumb struct to hold info about the structure of data for a particular game, from a particular source. + In particular, it contains an ordered list of columns in the data source table, + and a mapping of those columns to the corresponding elements of a formal OGD structure. + """ + + @abc.abstractmethod + @classmethod + def _fromDict(cls, name:str, table_type:TableType, raw_map:Dict[str, ColumnMapElement], column_schemas:List[ColumnSchema], logger:Optional[logging.Logger]=None) -> "TableSchema": + pass # *** BUILT-INS & PROPERTIES *** - def __init__(self, name, column_map:ColumnMapSchema, columns:List[ColumnSchema], other_elements:Optional[Map]=None): + _DEFAULT_COLUMNS = [] + + def __init__(self, name, table_type:TableType, column_map:Dict[str, ColumnMapIndex], columns:List[ColumnSchema], other_elements:Optional[Map]): """Constructor for the TableSchema class. Given a database connection and a game data request, this retrieves a bit of information from the database to fill in the @@ -44,149 +46,128 @@ class variables. """ # declare and initialize vars # self._schema : Optional[Dict[str, Any]] = all_elements - self._column_map : ColumnMapSchema = column_map - self._columns : List[ColumnSchema] = columns + self._table_type : TableType = table_type + self._column_map : Dict[str, ColumnMapIndex] = column_map + self._table_columns : List[ColumnSchema] = columns # after loading the file, take the stuff we need and store. super().__init__(name=name, other_elements=other_elements) @property - def ColumnNames(self) -> List[str]: - """Function to get the names of all columns in the schema. + def TableKind(self) -> TableType: + """Property to show whether the given table schema is for events or features. - :return: Names of each column in the schema. - :rtype: List[str] + If this TableSchema was read from a file, this will reflect the type indicated in the file, + *even if the specific TableSchema subclass does not match*. + If this TableSchema was generated through some other means, or no type was indicated in the source file, + this will reflect the type of the instance. + + :return: Either TableType.EVENT or TableType.FEATURE + :rtype: TableType """ - return [col.Name for col in self._columns] + return self._table_type @property def Columns(self) -> List[ColumnSchema]: - return self._columns + return self._table_columns @property - def SessionIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.SessionID, int): - ret_val = self.ColumnNames[self._column_map.SessionID] - elif isinstance(self._column_map.SessionID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.SessionID]) - return ret_val + def ColumnNames(self) -> List[str]: + """Function to get the names of all columns in the schema. + + :return: Names of each column in the schema. + :rtype: List[str] + """ + return [col.Name for col in self._table_columns] @property - def AppIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.AppID, int): - ret_val = self.ColumnNames[self._column_map.AppID] - elif isinstance(self._column_map.AppID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppID]) - return ret_val + def ColumnMap(self) -> Dict[str, ColumnMapIndex]: + """Mapping from Event element names to the indices of the database columns mapped to them. + There may be a single index, indicating a 1-to-1 mapping of a database column to the element; + There may be a list of indices, indicating multiple columns will be concatenated to form the element value; + There may be a further mapping of keys to indicies, indicating multiple columns will be joined into a JSON object, with keys mapped to values found at the columns with given indices. + + :return: The dictionary mapping of element names to indices. + :rtype: Dict[str, Union[int, List[int], Dict[str, int], None]] + """ + return self._column_map @property - def TimestampColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.Timestamp, int): - ret_val = self.ColumnNames[self._column_map.Timestamp] - elif isinstance(self._column_map.Timestamp, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.Timestamp]) - return ret_val + def AppIDIndex(self) -> ColumnMapIndex: + return self._column_map['app_id'] @property - def EventNameColumn(self) -> Optional[str]: + def AppIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventName, int): - ret_val = self.ColumnNames[self._column_map.EventName] - elif isinstance(self._column_map.EventName, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventName]) + if isinstance(self.AppIDIndex, int): + ret_val = self.ColumnNames[self.AppIDIndex] + elif isinstance(self.AppIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppIDIndex]) return ret_val @property - def EventDataColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.EventData, int): - ret_val = self.ColumnNames[self._column_map.EventData] - elif isinstance(self._column_map.EventData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventData]) - return ret_val + def UserIDIndex(self) -> ColumnMapIndex: + return self._column_map['user_id'] @property - def EventSourceColumn(self) -> Optional[str]: + def UserIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventSource, int): - ret_val = self.ColumnNames[self._column_map.EventSource] - elif isinstance(self._column_map.EventSource, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSource]) + if isinstance(self.UserIDIndex, int): + ret_val = self.ColumnNames[self.UserIDIndex] + elif isinstance(self.UserIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.UserIDIndex]) return ret_val @property - def AppVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.AppVersion, int): - ret_val = self.ColumnNames[self._column_map.AppVersion] - elif isinstance(self._column_map.AppVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppVersion]) - return ret_val + def SessionIDIndex(self) -> ColumnMapIndex: + return self._column_map['session_id'] @property - def AppBranchColumn(self) -> Optional[str]: + def SessionIDColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.AppBranch, int): - ret_val = self.ColumnNames[self._column_map.AppBranch] - elif isinstance(self._column_map.AppBranch, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.AppBranch]) + if isinstance(self.SessionIDIndex, int): + ret_val = self.ColumnNames[self.SessionIDIndex] + elif isinstance(self.SessionIDIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.SessionIDIndex]) return ret_val @property - def LogVersionColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.LogVersion, int): - ret_val = self.ColumnNames[self._column_map.LogVersion] - elif isinstance(self._column_map.LogVersion, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.LogVersion]) - return ret_val + def AppVersionIndex(self) -> ColumnMapIndex: + return self._column_map['app_version'] @property - def TimeOffsetColumn(self) -> Optional[str]: + def AppVersionColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.TimeOffset, int): - ret_val = self.ColumnNames[self._column_map.TimeOffset] - elif isinstance(self._column_map.TimeOffset, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.TimeOffset]) + if isinstance(self.AppVersionIndex, int): + ret_val = self.ColumnNames[self.AppVersionIndex] + elif isinstance(self.AppVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppVersionIndex]) return ret_val @property - def UserIDColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.UserID, int): - ret_val = self.ColumnNames[self._column_map.UserID] - elif isinstance(self._column_map.UserID, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserID]) - return ret_val + def AppBranchIndex(self) -> ColumnMapIndex: + return self._column_map['app_branch'] @property - def UserDataColumn(self) -> Optional[str]: + def AppBranchColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.UserData, int): - ret_val = self.ColumnNames[self._column_map.UserData] - elif isinstance(self._column_map.UserData, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.UserData]) + if isinstance(self.AppBranchIndex, int): + ret_val = self.ColumnNames[self.AppBranchIndex] + elif isinstance(self.AppBranchIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.AppBranchIndex]) return ret_val @property - def GameStateColumn(self) -> Optional[str]: - ret_val = None - if isinstance(self._column_map.GameState, int): - ret_val = self.ColumnNames[self._column_map.GameState] - elif isinstance(self._column_map.GameState, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.GameState]) - return ret_val + def LogVersionIndex(self) -> ColumnMapIndex: + return self._column_map['log_version'] @property - def EventSequenceIndexColumn(self) -> Optional[str]: + def LogVersionColumn(self) -> Optional[str]: ret_val = None - if isinstance(self._column_map.EventSequenceIndex, int): - ret_val = self.ColumnNames[self._column_map.EventSequenceIndex] - elif isinstance(self._column_map.EventSequenceIndex, list): - ret_val = ", ".join([self.ColumnNames[idx] for idx in self._column_map.EventSequenceIndex]) + if isinstance(self.LogVersionIndex, int): + ret_val = self.ColumnNames[self.LogVersionIndex] + elif isinstance(self.LogVersionIndex, list): + ret_val = ", ".join([self.ColumnNames[idx] for idx in self.LogVersionIndex]) return ret_val # *** IMPLEMENT ABSTRACT FUNCTIONS *** @@ -196,17 +177,17 @@ def AsMarkdown(self) -> str: ret_val = "\n\n".join([ "## Database Columns", "The individual columns recorded in the database for this game.", - "\n".join([item.AsMarkdown for item in self.Columns]), - "## Event Object Elements", + self._columnSetMarkdown, + f"## {self.TableKind} Object Elements", "The elements (member variables) of each Event object, available to programmers when writing feature extractors. The right-hand side shows which database column(s) are mapped to a given element.", - self._column_map.AsMarkdown, + self._columnMapMarkdown, ""]) return ret_val @classmethod def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging.Logger]=None)-> "TableSchema": - _column_map : ColumnMapSchema _column_schemas : List[ColumnSchema] + _table_type : TableType if not isinstance(all_elements, dict): all_elements = {} @@ -215,313 +196,91 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging logger.warning(_msg) else: Logger.Log(_msg, logging.WARN) + _table_type_str = all_elements.get('table_type') + _table_type = TableType.FromString(_table_type_str) if _table_type_str is not None else TableType.EVENT _column_json_list = all_elements.get('columns', []) _column_schemas = [ColumnSchema.FromDict(name=column.get("name", "UNKNOWN COLUMN NAME"), all_elements=column) for column in _column_json_list] - _column_map = ColumnMapSchema.FromDict(name="Column Map", all_elements=all_elements.get('column_map', {}), column_names=[col.Name for col in _column_schemas]) - return TableSchema(name=name, column_map=_column_map, columns=_column_schemas) - - @classmethod - def Default(cls) -> "TableSchema": - return TableSchema( - name="DefaultTableSchema", - column_map=ColumnMapSchema.Default(), - columns=cls._DEFAULT_COLUMNS - ) + return cls._fromDict(name=name, table_type=_table_type, raw_map=all_elements.get('column_map', {}), column_schemas=_column_schemas) # *** PUBLIC STATICS *** - @staticmethod - def FromFile(schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/") -> "TableSchema": + @classmethod + def FromFile(cls, schema_name:str, schema_path:Path = Path(schemas.__file__).parent / "table_schemas/") -> "TableSchema": _table_format_name : str = schema_name if not _table_format_name.lower().endswith(".json"): _table_format_name += ".json" _schema = fileio.loadJSONFile(filename=_table_format_name, path=schema_path) - return TableSchema.FromDict(name=schema_name, all_elements=_schema) + return cls.FromDict(name=schema_name, all_elements=_schema) # *** PUBLIC METHODS *** - _conversion_warnings = [] - def RowToEvent(self, row:Tuple, concatenator:str = '.', fallbacks:Map={}): - """Function to convert a row to an Event, based on the loaded schema. - In general, columns specified in the schema's column_map are mapped to corresponding elements of the Event. - If the column_map gave a list, rather than a single column name, the values from each column are concatenated in order with '.' character separators. - Finally, the concatenated values (or single value) are parsed according to the type required by Event. - One exception: For event_data, we expect to create a Dict object, so each column in the list will have its value parsed according to the type in 'columns', - and placed into a dict mapping the original column name to the parsed value (unless the parsed value is a dict, then it is merged into the top-level dict). - - :param row: The raw row data for an event. Generally assumed to be a tuple, though in principle a list would be fine too. - :type row: Tuple[str] - :param concatenator: A string to use as a separator when concatenating multiple columns into a single Event element. - :type concatenator: str - :return: [description] - :rtype: [type] - """ - # define vars to be passed as params - MAX_WARNINGS : Final[int] = 10 - sess_id : str - app_id : str - time : datetime - ename : str - edata : Map - app_ver : str - app_br : str - log_ver : str - offset : Optional[timezone] - uid : Optional[str] - udata : Optional[Map] - state : Optional[Map] - index : Optional[int] - - # 2) Handle event_data parameter, a special case. - # For this case we've got to parse the json, and then fold in whatever other columns were desired. - # 3) Assign vals to our arg vars and pass to Event ctor. - sess_id = self._getValueFromRow(row=row, indices=self._column_map.SessionID, concatenator=concatenator, fallback=fallbacks.get('session_id')) - if not isinstance(sess_id, str): - if "sess_id" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set session_id as {type(sess_id)}, but session_id should be a string", logging.WARN) - TableSchema._conversion_warnings.append("sess_id") - sess_id = str(sess_id) - - app_id = self._getValueFromRow(row=row, indices=self._column_map.AppID, concatenator=concatenator, fallback=fallbacks.get('app_id')) - if not isinstance(app_id, str): - if "app_id" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set app_id as {type(app_id)}, but app_id should be a string", logging.WARN) - TableSchema._conversion_warnings.append("app_id") - app_id = str(app_id) - - time = self._getValueFromRow(row=row, indices=self._column_map.Timestamp, concatenator=concatenator, fallback=fallbacks.get('timestamp')) - if not isinstance(time, datetime): - if "timestamp" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema parsed timestamp as {type(time)}, but timestamp should be a datetime", logging.WARN) - TableSchema._conversion_warnings.append("timestamp") - time = TableSchema._convertDateTime(time) - - ename = self._getValueFromRow(row=row, indices=self._column_map.EventName, concatenator=concatenator, fallback=fallbacks.get('event_name')) - if not isinstance(ename, str): - if "ename" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set event_name as {type(ename)}, but event_name should be a string", logging.WARN) - TableSchema._conversion_warnings.append("ename") - ename = str(ename) - - datas : Dict[str, Any] = self._getValueFromRow(row=row, indices=self._column_map.EventData, concatenator=concatenator, fallback=fallbacks.get('event_data')) - - # TODO: go bac to isostring function; need 0-padding on ms first, though - edata = dict(sorted(datas.items())) # Sort keys alphabetically - - esrc = self._getValueFromRow(row=row, indices=self._column_map.EventSource, concatenator=concatenator, fallback=fallbacks.get('event_source', EventSource.GAME)) - if not isinstance(esrc, EventSource): - if "esrc" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set event_source as {type(esrc)}, but event_source should be an EventSource", logging.WARN) - TableSchema._conversion_warnings.append("esrc") - esrc = EventSource.GENERATED if esrc == "GENERATED" else EventSource.GAME - - app_ver = self._getValueFromRow(row=row, indices=self._column_map.AppVersion, concatenator=concatenator, fallback=fallbacks.get('app_version', "0")) - if not isinstance(app_ver, str): - if "app_ver" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set app_version as {type(app_ver)}, but app_version should be a string", logging.WARN) - TableSchema._conversion_warnings.append("app_ver") - app_ver = str(app_ver) - - app_br = self._getValueFromRow(row=row, indices=self._column_map.AppBranch, concatenator=concatenator, fallback=fallbacks.get('app_branch')) - if not isinstance(app_br, str): - if "app_br" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set app_branch as {type(app_br)}, but app_branch should be a string", logging.WARN) - TableSchema._conversion_warnings.append("app_br") - app_br = str(app_br) - - log_ver = self._getValueFromRow(row=row, indices=self._column_map.LogVersion, concatenator=concatenator, fallback=fallbacks.get('log_version', "0")) - if not isinstance(log_ver, str): - if "log_ver" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set log_version as {type(log_ver)}, but log_version should be a string", logging.WARN) - TableSchema._conversion_warnings.append("log_ver") - log_ver = str(log_ver) - - offset = self._getValueFromRow(row=row, indices=self._column_map.TimeOffset, concatenator=concatenator, fallback=fallbacks.get('time_offset')) - if isinstance(offset, timedelta): - if "offset" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set offset as {type(offset)}, but offset should be a timezone", logging.WARN) - TableSchema._conversion_warnings.append("offset") - offset = timezone(offset) - - uid = self._getValueFromRow(row=row, indices=self._column_map.UserID, concatenator=concatenator, fallback=fallbacks.get('user_id')) - if uid is not None and not isinstance(uid, str): - if "uid" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set user_id as {type(uid)}, but user_id should be a string", logging.WARN) - TableSchema._conversion_warnings.append("uid") - uid = str(uid) - - udata = self._getValueFromRow(row=row, indices=self._column_map.UserData, concatenator=concatenator, fallback=fallbacks.get('user_data')) - - state = self._getValueFromRow(row=row, indices=self._column_map.GameState, concatenator=concatenator, fallback=fallbacks.get('game_state')) - - index = self._getValueFromRow(row=row, indices=self._column_map.EventSequenceIndex, concatenator=concatenator, fallback=fallbacks.get('event_sequence_index')) - if index is not None and not isinstance(index, int): - if "index" not in TableSchema._conversion_warnings: - Logger.Log(f"{self.Name} table schema set event_sequence_index as {type(index)}, but event_sequence_index should be an int", logging.WARN) - TableSchema._conversion_warnings.append("index") - index = int(index) - - return Event(session_id=sess_id, app_id=app_id, timestamp=time, - event_name=ename, event_data=edata, event_source=esrc, - app_version=app_ver, app_branch=app_br, log_version=log_ver, - time_offset=offset, user_id=uid, user_data=udata, - game_state=state, event_sequence_index=index) - # *** PRIVATE STATICS *** - @staticmethod - def _parse(input:Any, col_schema:ColumnSchema) -> Any: - """Applies whatever parsing is appropriate based on what type the schema said a column contained. - - :param input: _description_ - :type input: str - :param col_schema: _description_ - :type col_schema: ColumnSchema - :return: _description_ - :rtype: Any - """ - if input is None: - return None - if input == "None" or input == "null" or input == "nan": - return None - elif col_schema.ValueType == 'str': - return str(input) - elif col_schema.ValueType == 'int': - return int(input) - elif col_schema.ValueType == 'float': - return float(input) - elif col_schema.ValueType == 'datetime': - return input if isinstance(input, datetime) else TableSchema._convertDateTime(str(input)) - elif col_schema.ValueType == 'timedelta': - return input if isinstance(input, timedelta) else TableSchema._convertTimedelta(str(input)) - elif col_schema.ValueType == 'timezone': - return input if isinstance(input, timezone) else TableSchema._convertTimezone(str(input)) - elif col_schema.ValueType == 'json': - try: - if isinstance(input, dict): - # if input was a dict already, then just give it back. Else, try to load it from string. - return input - elif isinstance(input, str): - if input != 'None' and input != '': # watch out for nasty corner cases. - return json.loads(input) - else: - return None - else: - return json.loads(str(input)) - except JSONDecodeError as err: - Logger.Log(f"Could not parse input '{input}' of type {type(input)} from column {col_schema.Name}, got the following error:\n{str(err)}", logging.WARN) - return {} - elif col_schema.ValueType.startswith('enum'): - # if the column is supposed to be an enum, for now we just stick with the string. - return str(input) - - @staticmethod - def _convertDateTime(time_str:str) -> datetime: - """_summary_ - - TODO : Make use of formats options - - :param time_str: _description_ - :type time_str: str - :raises ValueError: _description_ - :raises err: _description_ - :return: _description_ - :rtype: datetime - """ - ret_val : datetime - - if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - raise ValueError(f"Got a non-timestamp value of {time_str} when converting a datetime column of an Event!") - - # formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S.%f"] - - # for fmt in formats: - try: - ret_val = parser.isoparse(time_str) - # ret_val = datetime.strptime(time_str, fmt) - except ValueError as err: - Logger.Log(f"Could not parse time string '{time_str}', got error {err}") - raise err - else: - return ret_val - # raise ValueError(f"Could not parse timestamp {time_str}, it did not match any expected formats.") - - @staticmethod - def _convertTimedelta(time_str:str) -> Optional[timedelta]: - """_summary_ + # *** PRIVATE METHODS *** - TODO : Sort out/document why we do nothing with ValueError and IndexError + @property + def _columnSetMarkdown(self) -> str: + return "\n".join([item.AsMarkdown for item in self.Columns]) - :param time_str: _description_ - :type time_str: str - :raises ValueError: _description_ - :return: _description_ - :rtype: Optional[timedelta] - """ - ret_val : Optional[timedelta] - - if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - return None - elif re.fullmatch(pattern=r"\d+:\d+:\d+(\.\d+)?", string=time_str): - try: - pieces = time_str.split(':') - seconds_pieces = pieces[2].split('.') - ret_val = timedelta(hours=int(pieces[0]), - minutes=int(pieces[1]), - seconds=int(seconds_pieces[0]), - milliseconds=int(seconds_pieces[1]) if len(seconds_pieces) > 1 else 0) - except ValueError as err: - pass - except IndexError as err: - pass - else: - return ret_val - elif re.fullmatch(pattern=r"-?\d+", string=time_str): - try: - ret_val = timedelta(seconds=int(time_str)) - except ValueError as err: - pass + @property + def _columnMapMarkdown(self) -> str: + ret_val : str + + event_column_list = [] + for event_element,columns_mapped in self._column_map.items(): + if columns_mapped is not None: + if isinstance(columns_mapped, str): + event_column_list.append(f"**{event_element}** = Column '*{columns_mapped}*' ") + elif isinstance(columns_mapped, list): + mapped_list = ", ".join([f"'*{item}*'" for item in columns_mapped]) + event_column_list.append(f"**{event_element}** = Columns {mapped_list} ") # figure out how to do one string foreach item in list. + elif isinstance(columns_mapped, int): + event_column_list.append(f"**{event_element}** = Column '*{self.ColumnNames[columns_mapped]}*' (index {columns_mapped}) ") + else: + event_column_list.append(f"**{event_element}** = Column '*{columns_mapped}*' (DEBUG: Type {type(columns_mapped)}) ") else: - return ret_val - raise ValueError(f"Could not parse timedelta {time_str} of type {type(time_str)}, it did not match any expected formats.") - + event_column_list.append(f"**{event_element}** = null ") + ret_val = "\n".join(event_column_list) + return ret_val + @staticmethod - def _convertTimezone(time_str:str) -> Optional[timezone]: + def _parseElement(elem:Any, name:str) -> Optional[ColumnMapElement]: """_summary_ - TODO : Sort out/document why we do nothing with ValueError + TODO : Pick a better name - :param time_str: _description_ - :type time_str: str - :raises ValueError: _description_ + :param elem: _description_ + :type elem: Any + :param name: _description_ + :type name: str :return: _description_ - :rtype: Optional[timezone] + :rtype: Optional[ColumnMapElement] """ - ret_val : Optional[timezone] - - if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": - return None - elif re.fullmatch(pattern=r"UTC[+-]\d+:\d+", string=time_str): - try: - pieces = time_str.removeprefix("UTC").split(":") - ret_val = timezone(timedelta(hours=int(pieces[0]), minutes=int(pieces[1]))) - except ValueError as err: - pass + ret_val : Optional[str | List[str] | Dict[str, str]] + if elem is not None: + if isinstance(elem, str): + ret_val = elem + elif isinstance(elem, list): + ret_val = elem + elif isinstance(elem, dict): + ret_val = elem else: - return ret_val - raise ValueError(f"Could not parse timezone {time_str} of type {type(time_str)}, it did not match any expected formats.") - - # *** PRIVATE METHODS *** + ret_val = str(elem) + Logger.Log(f"Column name(s) mapped to {name} was not a string or list, defaulting to str(name) == {ret_val} being mapped to {name}", logging.WARN) + else: + ret_val = None + Logger.Log(f"Column name mapped to {name} was left null, nothing will be mapped to {name}", logging.WARN) + return ret_val - def _getValueFromRow(self, row:Tuple, indices:Union[int, List[int], Dict[str, int], None], concatenator:str, fallback:Any) -> Any: + def _getValueFromRow(self, row:Tuple, indices:Optional[int | List[int] | Dict[str, int]], concatenator:str, fallback:Any) -> Any: ret_val : Any if indices is not None: if isinstance(indices, int): # if there's a single index, use parse to get the value it is stated to be # print(f"About to parse value {row[indices]} as type {self.Columns[indices]},\nFull list from row is {row},\nFull list of columns is {self.Columns},\nwith names {self.ColumnNames}") - ret_val = TableSchema._parse(input=row[indices], col_schema=self.Columns[indices]) + ret_val = conversions.ConvertToType(variable=row[indices], to_type=self.Columns[indices].ValueType) elif isinstance(indices, list): ret_val = concatenator.join([str(row[index]) for index in indices]) elif isinstance(indices, dict): @@ -529,7 +288,7 @@ def _getValueFromRow(self, row:Tuple, indices:Union[int, List[int], Dict[str, in for key,column_index in indices.items(): if column_index > len(row): Logger.Log(f"Got column index of {column_index} for column {key}, but row only has {len(row)} columns!", logging.ERROR) - _val = TableSchema._parse(input=row[column_index], col_schema=self._columns[column_index]) + _val = conversions.ConvertToType(variable=row[column_index], to_type=self._table_columns[column_index].ValueType) ret_val.update(_val if isinstance(_val, dict) else {key:_val}) else: ret_val = fallback diff --git a/src/ogd/common/schemas/tables/__init__.py b/src/ogd/common/schemas/tables/__init__.py index e5ef7d4..5ba342e 100644 --- a/src/ogd/common/schemas/tables/__init__.py +++ b/src/ogd/common/schemas/tables/__init__.py @@ -1,5 +1,7 @@ __all__ = [ "TableSchema", + "EventTableSchema" ] -from . import TableSchema \ No newline at end of file +from . import TableSchema +from . import EventTableSchema \ No newline at end of file diff --git a/src/ogd/common/utils/typing.py b/src/ogd/common/utils/typing.py index b420f5f..dfc1a9e 100644 --- a/src/ogd/common/utils/typing.py +++ b/src/ogd/common/utils/typing.py @@ -1,4 +1,136 @@ +## import standard libraries +import json +import logging +import re +from datetime import datetime, timedelta, timezone +from json.decoder import JSONDecodeError from typing import Any, Callable, Dict, List, Optional, TypeAlias +## import 3rd-party libraries +from dateutil import parser +## import local files +from ogd.common.utils.Logger import Logger Map : TypeAlias = Dict[str, Any] # type alias: we'll call any dict using string keys a "Map" ExportRow : TypeAlias = List[Any] + +class conversions: + + @staticmethod + def ConvertToType(variable:Any, to_type:str) -> Any: + """Applies whatever parsing is appropriate based on what type the schema said a column contained. + + :param input: _description_ + :type input: str + :param col_schema: _description_ + :type col_schema: ColumnSchema + :return: _description_ + :rtype: Any + """ + if variable is None: + return None + if variable == "None" or variable == "null" or variable == "nan": + return None + match to_type.upper(): + case 'STR': + return str(variable) + case 'INT': + return int(variable) + case 'FLOAT': + return float(variable) + case 'DATETIME': + return variable if isinstance(variable, datetime) else conversions.DatetimeFromString(str(variable)) + case 'TIMEDELTA': + return variable if isinstance(variable, timedelta) else conversions.TimedeltaFromString(str(variable)) + case 'TIMEZONE': + return variable if isinstance(variable, timezone) else conversions.TimezoneFromString(str(variable)) + case 'JSON': + try: + if isinstance(variable, dict): + # if input was a dict already, then just give it back. Else, try to load it from string. + return variable + elif isinstance(variable, str): + if variable != 'None' and variable != '': # watch out for nasty corner cases. + return json.loads(variable) + else: + return None + else: + return json.loads(str(variable)) + except JSONDecodeError as err: + Logger.Log(f"Could not parse input '{variable}' of type {type(variable)} to type {to_type}, got the following error:\n{str(err)}", logging.WARN) + return {} + case _dummy if _dummy.startswith('ENUM'): + # if the column is supposed to be an enum, for now we just stick with the string. + return str(variable) + case _: + Logger.Log(f"ConvertToType function got an unrecognized type {to_type}, could not complete conversion!", logging.WARNING) + + @staticmethod + def DatetimeFromString(time_str:str) -> datetime: + ret_val : datetime + + if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": + raise ValueError(f"Got a non-timestamp value of {time_str} when converting a datetime column from data source!") + + formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%d %H:%M:%S.%f"] + + try: + ret_val = parser.isoparse(time_str) + except ValueError: + # Logger.Log(f"Could not parse time string '{time_str}', got error {err}") + # raise err + pass + else: + return ret_val + for fmt in formats: + try: + ret_val = datetime.strptime(time_str, fmt) + except ValueError: + pass + else: + return ret_val + raise ValueError(f"Could not parse timestamp {time_str}, it did not match any expected formats!") + + @staticmethod + def TimedeltaFromString(time_str:str) -> Optional[timedelta]: + ret_val : Optional[timedelta] + + if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": + return None + elif re.fullmatch(pattern=r"\d+:\d+:\d+(\.\d+)?", string=time_str): + try: + pieces = time_str.split(':') + seconds_pieces = pieces[2].split('.') + ret_val = timedelta(hours=int(pieces[0]), + minutes=int(pieces[1]), + seconds=int(seconds_pieces[0]), + milliseconds=int(seconds_pieces[1]) if len(seconds_pieces) > 1 else 0) + except ValueError as err: + pass + except IndexError as err: + pass + else: + return ret_val + elif re.fullmatch(pattern=r"-?\d+", string=time_str): + try: + ret_val = timedelta(seconds=int(time_str)) + except ValueError as err: + pass + else: + return ret_val + raise ValueError(f"Could not parse timedelta {time_str} of type {type(time_str)}, it did not match any expected formats.") + + @staticmethod + def TimezoneFromString(time_str:str) -> Optional[timezone]: + ret_val : Optional[timezone] + + if time_str == "None" or time_str == "none" or time_str == "null" or time_str == "nan": + return None + elif re.fullmatch(pattern=r"UTC[+-]\d+:\d+", string=time_str): + try: + pieces = time_str.removeprefix("UTC").split(":") + ret_val = timezone(timedelta(hours=int(pieces[0]), minutes=int(pieces[1]))) + except ValueError as err: + pass + else: + return ret_val + raise ValueError(f"Could not parse timezone {time_str} of type {type(time_str)}, it did not match any expected formats.") \ No newline at end of file diff --git a/tests/TestDriver.py b/tests/TestDriver.py index 67e0ce2..2bd28f7 100644 --- a/tests/TestDriver.py +++ b/tests/TestDriver.py @@ -11,21 +11,29 @@ import logging from ogd.common.utils.Logger import Logger Logger.InitializeLogger(level=logging.INFO, use_logfile=False) -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from config.t_config import settings -_config = TestConfigSchema.FromDict(name="APIUtilsTestConfig", all_elements=settings, logger=None) +_config = TestConfig.FromDict(name="APIUtilsTestConfig", all_elements=settings, logger=None) # loader = TestLoader() # TODO : At the moment, this is just module-level, should eventually go to class-level selection. suite = TestSuite() +if _config.EnabledTests.get('CONFIGS'): + print("***\nAdding configs:") + suite.addTest(defaultTestLoader.discover('./tests/cases/configs/', pattern="t_*.py", top_level_dir="./")) + print("Done\n***") if _config.EnabledTests.get('INTERFACES'): print("***\nAdding interfaces:") suite.addTest(defaultTestLoader.discover('./tests/cases/interfaces/', pattern="t_*.py", top_level_dir="./")) print("Done\n***") +if _config.EnabledTests.get('SCHEMAS'): + print("***\nAdding schemas:") + suite.addTest(defaultTestLoader.discover('./tests/cases/schemas/', pattern="t_*.py", top_level_dir="./")) + print("Done\n***") if _config.EnabledTests.get('UTILS'): - print("***\nAdding APIUtils:") + print("***\nAdding Utils:") suite.addTest(defaultTestLoader.discover('./tests/cases/utils/', pattern="t_*.py", top_level_dir="./")) print("Done\n***") diff --git a/tests/cases/configs/__init__.py b/tests/cases/configs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cases/configs/games/__init__.py b/tests/cases/configs/games/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cases/schemas/games/t_AggregateSchema.py b/tests/cases/configs/games/t_AggregateConfig.py similarity index 92% rename from tests/cases/schemas/games/t_AggregateSchema.py rename to tests/cases/configs/games/t_AggregateConfig.py index 9aa71c8..3b330cf 100644 --- a/tests/cases/schemas/games/t_AggregateSchema.py +++ b/tests/cases/configs/games/t_AggregateConfig.py @@ -3,14 +3,14 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.AggregateSchema import AggregateSchema +from src.ogd.common.configs.games.AggregateConfig import AggregateConfig from tests.config.t_config import settings -class t_AggregateSchema(TestCase): +class t_AggregateConfig(TestCase): """Testbed for the GameSourceSchema class. TODO : Test more 'enabled' options/combinations. @@ -19,7 +19,7 @@ class t_AggregateSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - cls.test_schema = AggregateSchema( + cls.test_schema = AggregateConfig( name="ActiveTime Schema", other_elements=_elems ) @@ -113,7 +113,7 @@ def test_FromDict(self): } } _modes = { ExtractionMode.SESSION, ExtractionMode.PLAYER, ExtractionMode.POPULATION, ExtractionMode.DETECTOR } - _schema = AggregateSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + _schema = AggregateConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "ActiveTime Schema") self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_DetectorSchema.py b/tests/cases/configs/games/t_DetectorConfig.py similarity index 89% rename from tests/cases/schemas/games/t_DetectorSchema.py rename to tests/cases/configs/games/t_DetectorConfig.py index 3c83edd..37a0bb2 100644 --- a/tests/cases/schemas/games/t_DetectorSchema.py +++ b/tests/cases/configs/games/t_DetectorConfig.py @@ -3,15 +3,15 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.DetectorSchema import DetectorSchema +from src.ogd.common.configs.games.DetectorConfig import DetectorConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_DetectorSchema(TestCase): - """Testbed for the DetectorSchema class. +class t_DetectorConfig(TestCase): + """Testbed for the DetectorConfig class. TODO : Implement tests """ @@ -19,7 +19,7 @@ class t_DetectorSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - cls.test_schema = DetectorSchema( + cls.test_schema = DetectorConfig( name="ActiveTime Schema", other_elements=_elems ) @@ -101,7 +101,7 @@ def test_FromDict(self): } } } - _schema = DetectorSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + _schema = DetectorConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "ActiveTime Schema") self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_DetectorMapSchema.py b/tests/cases/configs/games/t_DetectorMapConfig.py similarity index 88% rename from tests/cases/schemas/games/t_DetectorMapSchema.py rename to tests/cases/configs/games/t_DetectorMapConfig.py index e040cb6..12d031e 100644 --- a/tests/cases/schemas/games/t_DetectorMapSchema.py +++ b/tests/cases/configs/games/t_DetectorMapConfig.py @@ -3,14 +3,14 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.DetectorMapSchema import DetectorMapSchema +from src.ogd.common.configs.games.DetectorMapConfig import DetectorMapConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_DetectorMapSchema(TestCase): +class t_DetectorMapConfig(TestCase): """Testbed for the GameSourceSchema class. TODO : Implement and enable tests. @@ -19,12 +19,12 @@ class t_DetectorMapSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) # 2. Set up local instance of testing class - cls.test_schema = DetectorMapSchema( + cls.test_schema = DetectorMapConfig( name="available_building Schema", perlevel_detectors={}, percount_detectors={}, @@ -93,7 +93,7 @@ def test_FromDict(self): }, "description" : "The buildings available for the player to construct" } - _schema = DetectorMapSchema.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) + _schema = DetectorMapConfig.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "available_buildings Schema") diff --git a/tests/cases/schemas/games/t_FeatureSchema.py b/tests/cases/configs/games/t_FeatureConfig.py similarity index 91% rename from tests/cases/schemas/games/t_FeatureSchema.py rename to tests/cases/configs/games/t_FeatureConfig.py index 2c0bb45..32de688 100644 --- a/tests/cases/schemas/games/t_FeatureSchema.py +++ b/tests/cases/configs/games/t_FeatureConfig.py @@ -3,15 +3,15 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.FeatureSchema import FeatureSchema +from src.ogd.common.configs.games.FeatureConfig import FeatureConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_FeatureSchema(TestCase): - """Testbed for the DetectorSchema class. +class t_FeatureConfig(TestCase): + """Testbed for the DetectorConfig class. TODO : Implement tests """ @@ -19,7 +19,7 @@ class t_FeatureSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - # cls.test_schema = FeatureSchema( + # cls.test_schema = FeatureConfig( # name="ActiveTime Schema", # all_elements=_elems # ) @@ -128,7 +128,7 @@ def test_FromDict(self): } } } - # _schema = DetectorSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + # _schema = DetectorConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) # self.assertIsInstance(_schema.Name, str) # self.assertEqual(_schema.Name, "ActiveTime Schema") # self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_FeatureMapSchema.py b/tests/cases/configs/games/t_FeatureMapConfig.py similarity index 89% rename from tests/cases/schemas/games/t_FeatureMapSchema.py rename to tests/cases/configs/games/t_FeatureMapConfig.py index 17fb436..41f118b 100644 --- a/tests/cases/schemas/games/t_FeatureMapSchema.py +++ b/tests/cases/configs/games/t_FeatureMapConfig.py @@ -3,14 +3,14 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.FeatureMapSchema import FeatureMapSchema +from src.ogd.common.configs.games.FeatureMapConfig import FeatureMapConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_FeatureMapSchema(TestCase): +class t_FeatureMapConfig(TestCase): """Testbed for the GameSourceSchema class. TODO : Implement and enable tests. @@ -19,12 +19,12 @@ class t_FeatureMapSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) # 2. Set up local instance of testing class - cls.test_schema = FeatureMapSchema( + cls.test_schema = FeatureMapConfig( name="available_building Schema", legacy_mode=True, legacy_perlevel_feats={}, @@ -101,7 +101,7 @@ def test_FromDict(self): }, "description" : "The buildings available for the player to construct" } - _schema = FeatureMapSchema.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) + _schema = FeatureMapConfig.FromDict(name="available_buildings Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "available_buildings Schema") diff --git a/tests/cases/schemas/games/t_GeneratorSchema.py b/tests/cases/configs/games/t_GeneratorConfig.py similarity index 89% rename from tests/cases/schemas/games/t_GeneratorSchema.py rename to tests/cases/configs/games/t_GeneratorConfig.py index e0574b2..1dcdc6e 100644 --- a/tests/cases/schemas/games/t_GeneratorSchema.py +++ b/tests/cases/configs/games/t_GeneratorConfig.py @@ -3,15 +3,15 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.GeneratorSchema import GeneratorSchema +from src.ogd.common.configs.games.GeneratorConfig import GeneratorConfig from tests.config.t_config import settings @unittest.skip("Not implemented") -class t_GeneratorSchema(TestCase): - """Testbed for the DetectorSchema class. +class t_GeneratorConfig(TestCase): + """Testbed for the DetectorConfig class. TODO : Implement tests TODO : Create a basic testing implementation @@ -20,7 +20,7 @@ class t_GeneratorSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) @@ -38,7 +38,7 @@ def setUpClass(cls) -> None: } } } - # cls.test_schema = GeneratorSchema( + # cls.test_schema = GeneratorConfig( # name="ActiveTime Schema", # all_elements=_elems # ) @@ -108,7 +108,7 @@ def test_FromDict(self): } } } - # _schema = GeneratorSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + # _schema = GeneratorConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) # self.assertIsInstance(_schema.Name, str) # self.assertEqual(_schema.Name, "ActiveTime Schema") # self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/games/t_PerCountSchema.py b/tests/cases/configs/games/t_PerCountConfig.py similarity index 91% rename from tests/cases/schemas/games/t_PerCountSchema.py rename to tests/cases/configs/games/t_PerCountConfig.py index 472e5dd..743a3a1 100644 --- a/tests/cases/schemas/games/t_PerCountSchema.py +++ b/tests/cases/configs/games/t_PerCountConfig.py @@ -3,15 +3,15 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.models.enums.ExtractionMode import ExtractionMode from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.games.PerCountSchema import PerCountSchema +from src.ogd.common.configs.games.PerCountConfig import PerCountConfig from tests.config.t_config import settings -class t_PerCountSchema(TestCase): - """Testbed for the PerCountSchema class. +class t_PerCountConfig(TestCase): + """Testbed for the PerCountConfig class. TODO : Test more 'enabled' options/combinations. """ @@ -19,7 +19,7 @@ class t_PerCountSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) @@ -37,7 +37,7 @@ def setUpClass(cls) -> None: } } } - cls.test_schema = PerCountSchema( + cls.test_schema = PerCountConfig( name="ActiveTime Schema", count=5, prefix="lvl", @@ -115,7 +115,7 @@ def test_FromDict(self): } } _modes = { ExtractionMode.SESSION, ExtractionMode.PLAYER, ExtractionMode.POPULATION, ExtractionMode.DETECTOR } - _schema = PerCountSchema.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) + _schema = PerCountConfig.FromDict(name="ActiveTime Schema", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "ActiveTime Schema") self.assertIsInstance(_schema.TypeName, str) diff --git a/tests/cases/schemas/config/t_GameSourceSchema.py b/tests/cases/configs/t_GameSourceSchema.py similarity index 81% rename from tests/cases/schemas/config/t_GameSourceSchema.py rename to tests/cases/configs/t_GameSourceSchema.py index 862e939..b0fcc7c 100644 --- a/tests/cases/schemas/config/t_GameSourceSchema.py +++ b/tests/cases/configs/t_GameSourceSchema.py @@ -5,12 +5,12 @@ from typing import Any, Dict, Optional from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.data_sources.DataSourceSchema import DataSourceSchema -from ogd.common.schemas.configs.data_sources.BigQuerySourceSchema import BigQuerySchema -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.storage.DataStoreConfig import DataStoreConfig +from ogd.common.configs.storage.BigQueryConfig import BigQueryConfig +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from src.ogd.common.configs.GameSourceSchema import GameSourceSchema from tests.config.t_config import settings class t_GameSourceSchema(TestCase): @@ -20,7 +20,7 @@ class t_GameSourceSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) @@ -32,8 +32,9 @@ def setUpClass(cls) -> None: } cls.test_schema = GameSourceSchema( name="Game Source Schema", + game_id="AQUALAB", source_name="AQUALAB_BQ", - source_schema=BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None), + source_schema=BigQueryConfig.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None), db_name="aqualab", table_name="aqualab_daily", table_schema="OPENGAMEDATA_BIGQUERY", @@ -72,7 +73,7 @@ def test_TableName(self): self.assertEqual(_str, "aqualab_daily") def test_TableSchema(self): - _str = self.test_schema.TableSchema + _str = self.test_schema.TableSchemaName self.assertIsInstance(_str, str) self.assertEqual(_str, "OPENGAMEDATA_BIGQUERY") @@ -91,7 +92,7 @@ def test_NonStandardElementNames(self): def test_FromDict(self): """Test case for whether the FromDict function is working properly. - TODO : Include assertion(s) for DataSourceSchema, as in implementation of test_Source (whenever that gets implemented) + TODO : Include assertion(s) for DataStoreConfig, as in implementation of test_Source (whenever that gets implemented) TODO : Possibly do additional cases where we check that default replacements for missing elements are correct. """ _dict = { @@ -105,20 +106,20 @@ def test_FromDict(self): "PROJECT_ID" : "aqualab-project", "PROJECT_KEY": "./key.txt" } - _sources = { "AQUALAB_BQ" : BigQuerySchema.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } + _sources : Dict[str, DataStoreConfig] = { "AQUALAB_BQ" : BigQueryConfig.FromDict(name="AQUALAB_BQ", all_elements=source_elems, logger=None) } _schema = GameSourceSchema.FromDict(name="AQUALAB", all_elements=_dict, logger=None, data_sources=_sources) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "AQUALAB") self.assertIsInstance(_schema.SourceName, str) self.assertEqual(_schema.SourceName, "AQUALAB_BQ") - # self.assertIsInstance(_schema.Source, DataSourceSchema) + # self.assertIsInstance(_schema.Source, DataStoreConfig) # self.assertEqual(_schema.Source, "AQUALAB") self.assertIsInstance(_schema.DatabaseName, str) self.assertEqual(_schema.DatabaseName, "aqualab") self.assertIsInstance(_schema.TableName, str) self.assertEqual(_schema.TableName, "aqualab_daily") - self.assertIsInstance(_schema.TableSchema, str) - self.assertEqual(_schema.TableSchema, "OPENGAMEDATA_BIGQUERY") + self.assertIsInstance(_schema.TableSchemaName, str) + self.assertEqual(_schema.TableSchemaName, "OPENGAMEDATA_BIGQUERY") @unittest.skip("Not yet implemented") def test_parseSource(self): diff --git a/tests/cases/schemas/config/t_IndexingSchema.py b/tests/cases/configs/t_IndexingConfig.py similarity index 89% rename from tests/cases/schemas/config/t_IndexingSchema.py rename to tests/cases/configs/t_IndexingConfig.py index 308793a..ef79c5b 100644 --- a/tests/cases/schemas/config/t_IndexingSchema.py +++ b/tests/cases/configs/t_IndexingConfig.py @@ -4,25 +4,25 @@ from pathlib import Path from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.IndexingSchema import FileIndexingSchema +from src.ogd.common.configs.IndexingConfig import FileIndexingConfig from tests.config.t_config import settings -class t_IndexingSchema(TestCase): +class t_IndexingConfig(TestCase): """Testbed for the GameSourceSchema class. """ @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) # 2. Set up local instance of testing class - cls.test_schema = FileIndexingSchema( + cls.test_schema = FileIndexingConfig( name="Indexing Schema", local_dir=Path("./data/"), remote_url="https://fieldday-web.ad.education.wisc.edu/opengamedata/", @@ -74,7 +74,7 @@ def test_FromDict(self): "REMOTE_URL" : "https://fieldday-web.ad.education.wisc.edu/opengamedata/", "TEMPLATES_URL" : "https://github.com/opengamedata/opengamedata-samples" } - _schema = FileIndexingSchema.FromDict(name="FILE_INDEXING", all_elements=_dict, logger=None) + _schema = FileIndexingConfig.FromDict(name="FILE_INDEXING", all_elements=_dict, logger=None) self.assertIsInstance(_schema.Name, str) self.assertEqual(_schema.Name, "FILE_INDEXING") self.assertIsInstance(_schema.LocalDirectory, Path) diff --git a/tests/cases/schemas/config/t_TestConfigSchema.py b/tests/cases/configs/t_TestConfig.py similarity index 91% rename from tests/cases/schemas/config/t_TestConfigSchema.py rename to tests/cases/configs/t_TestConfig.py index bbd59f1..9e8f65a 100644 --- a/tests/cases/schemas/config/t_TestConfigSchema.py +++ b/tests/cases/configs/t_TestConfig.py @@ -3,20 +3,20 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals -from src.ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema as TestConfigLocal +from src.ogd.common.configs.TestConfig import TestConfig as TestConfigLocal from tests.config.t_config import settings -class t_TestConfigSchema(TestCase): +class t_TestConfig(TestCase): """Testbed for the GameSourceSchema class. """ @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/interfaces/__init__.py b/tests/cases/interfaces/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cases/interfaces/t_CSVInterface.py b/tests/cases/interfaces/t_CSVInterface.py index 17ad7bc..95a1344 100644 --- a/tests/cases/interfaces/t_CSVInterface.py +++ b/tests/cases/interfaces/t_CSVInterface.py @@ -5,8 +5,8 @@ from unittest import TestCase from zipfile import ZipFile # import locals -from ogd.common.interfaces.CSVInterface import CSVInterface -from ogd.common.schemas.configs.GameSourceSchema import GameSourceSchema +from ogd.common.connectors.interfaces.CSVInterface import CSVInterface +from ogd.common.configs.GameSourceSchema import GameSourceSchema class t_CSVInterface(TestCase): TEST_MIN_DATE : Final[datetime] = datetime(year=2021, month=2, day=1, hour= 0, minute=0, second=0) @@ -64,7 +64,7 @@ def test_IDsFromDates(self): def test_DatesFromIDs(self): with self.zipped_file.open(self.zipped_file.namelist()[0]) as f: - _cfg = GameSourceSchema(name="FILE SOURCE", all_elements={"SCHEMA":"OGD_EVENT_FILE", "DB_TYPE":"FILE"}, data_sources={}) + _cfg = GameSourceSchema(name="FILE SOURCE", other_elements={"SCHEMA":"OGD_EVENT_FILE", "DB_TYPE":"FILE"}, data_sources={}) CSVI = CSVInterface(game_id='BACTERIA', config=_cfg, filepath=f, delim='\t', fail_fast=False) if CSVI.Open(): dates = CSVI.DatesFromIDs(self.TEST_SESSION_LIST) diff --git a/tests/cases/schemas/games/t_DataElementSchema.py b/tests/cases/schemas/games/t_DataElementSchema.py index 1457b97..67fe511 100644 --- a/tests/cases/schemas/games/t_DataElementSchema.py +++ b/tests/cases/schemas/games/t_DataElementSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.DataElementSchema import DataElementSchema @@ -18,7 +18,7 @@ class t_DataElementSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_EventSchema.py b/tests/cases/schemas/games/t_EventSchema.py index 0256c1b..d4e3339 100644 --- a/tests/cases/schemas/games/t_EventSchema.py +++ b/tests/cases/schemas/games/t_EventSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.EventSchema import EventSchema @@ -19,7 +19,7 @@ class t_EventSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_GameSchema.py b/tests/cases/schemas/games/t_GameSchema.py index f79a230..c37314e 100644 --- a/tests/cases/schemas/games/t_GameSchema.py +++ b/tests/cases/schemas/games/t_GameSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GameSchema import GameSchema @@ -19,7 +19,7 @@ class t_GameSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/games/t_GameStateSchema.py b/tests/cases/schemas/games/t_GameStateSchema.py index 3e7f862..3d518c6 100644 --- a/tests/cases/schemas/games/t_GameStateSchema.py +++ b/tests/cases/schemas/games/t_GameStateSchema.py @@ -3,7 +3,7 @@ import unittest from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.games.GameStateSchema import GameStateSchema @@ -19,7 +19,7 @@ class t_GameStateSchema(TestCase): @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO Logger.std_logger.setLevel(_level) diff --git a/tests/cases/schemas/t_Schema.py b/tests/cases/schemas/t_Schema.py index 3f6645c..927aad4 100644 --- a/tests/cases/schemas/t_Schema.py +++ b/tests/cases/schemas/t_Schema.py @@ -5,7 +5,7 @@ from typing import Any, Dict, Optional from unittest import TestCase # import ogd libraries. -from ogd.common.schemas.configs.TestConfigSchema import TestConfigSchema +from ogd.common.configs.TestConfig import TestConfig from ogd.common.utils.Logger import Logger # import locals from src.ogd.common.schemas.Schema import Schema @@ -36,7 +36,7 @@ def FromDict(cls, name:str, all_elements:Dict[str, Any], logger:Optional[logging @classmethod def setUpClass(cls) -> None: # 1. Get testing config - _testing_cfg = TestConfigSchema.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) + _testing_cfg = TestConfig.FromDict(name="SchemaTestConfig", all_elements=settings, logger=None) _level = logging.DEBUG if _testing_cfg.Verbose else logging.INFO _str_level = "DEBUG" if _testing_cfg.Verbose else "INFO" Logger.std_logger.setLevel(_level) diff --git a/tests/config/t_config.py.template b/tests/config/t_config.py.template index 4b2af3f..6b94bbe 100644 --- a/tests/config/t_config.py.template +++ b/tests/config/t_config.py.template @@ -1,6 +1,7 @@ settings = { "VERBOSE" : False, "ENABLED" : { + "CONFIGS":True, "INTERFACES":True, "SCHEMAS":True, "UTILS":True