From b90ff30d7bb2ae2ca4e4a437a3e22aacb0921f2c Mon Sep 17 00:00:00 2001 From: Marcelo Date: Fri, 8 Dec 2023 13:49:07 +0100 Subject: [PATCH] Docs 02/24 [DOG-713] --- .../extractorutils/configtools/__init__.py | 7 ++- .../extractorutils/configtools/elements.py | 25 ++++++++ cognite/extractorutils/exceptions.py | 2 +- cognite/extractorutils/util.py | 29 +++++----- docs/source/conf.py | 2 +- docs/source/docs.rst | 58 ++++++++++++++++--- docs/source/package.rst | 6 +- docs/source/read_csv.rst | 2 +- docs/source/uploader.rst | 30 +++++++--- 9 files changed, 126 insertions(+), 35 deletions(-) diff --git a/cognite/extractorutils/configtools/__init__.py b/cognite/extractorutils/configtools/__init__.py index 6dbcbb0f..a0a570cb 100644 --- a/cognite/extractorutils/configtools/__init__.py +++ b/cognite/extractorutils/configtools/__init__.py @@ -85,10 +85,15 @@ class MyConfig(BaseConfig): However, all of these things will be automatically done for you if you are using the base Extractor class. """ +from cognite.extractorutils.exceptions import InvalidConfigError + from .elements import ( + AuthenticatorConfig, BaseConfig, + CertificateConfig, CogniteConfig, ConfigType, + ConnectionConfig, EitherIdConfig, FileSizeConfig, LocalStateStoreConfig, @@ -99,4 +104,4 @@ class MyConfig(BaseConfig): StateStoreConfig, TimeIntervalConfig, ) -from .loaders import ConfigResolver, load_yaml +from .loaders import ConfigResolver, load_yaml, load_yaml_dict diff --git a/cognite/extractorutils/configtools/elements.py b/cognite/extractorutils/configtools/elements.py index e645a508..11728f0e 100644 --- a/cognite/extractorutils/configtools/elements.py +++ b/cognite/extractorutils/configtools/elements.py @@ -53,6 +53,10 @@ @dataclass class CertificateConfig: + """ + Configuration parameters for certificates + """ + path: str password: Optional[str] authority_url: Optional[str] = None @@ -94,6 +98,11 @@ class ConnectionConfig: @dataclass class EitherIdConfig: + """ + Configuration parameter representing an ID in CDF, which can either be an external or internal ID. + An EitherId can only hold one ID type, not both. + """ + id: Optional[int] external_id: Optional[str] @@ -569,23 +578,39 @@ class BaseConfig(_BaseConfig): @dataclass class RawDestinationConfig: + """ + Configuration parameters for using Raw + """ + database: str table: str @dataclass class RawStateStoreConfig(RawDestinationConfig): + """ + Configuration of the State Store based on CDF RAW + """ + upload_interval: TimeIntervalConfig = TimeIntervalConfig("30s") @dataclass class LocalStateStoreConfig: + """ + Configuration of the State Store when using locally + """ + path: str save_interval: TimeIntervalConfig = TimeIntervalConfig("30s") @dataclass class StateStoreConfig: + """ + Configuration of the State Store, containing ``LocalStateStoreConfig`` or ``RawStateStoreConfig`` + """ + raw: Optional[RawStateStoreConfig] = None local: Optional[LocalStateStoreConfig] = None diff --git a/cognite/extractorutils/exceptions.py b/cognite/extractorutils/exceptions.py index 39cfb893..58e1b055 100644 --- a/cognite/extractorutils/exceptions.py +++ b/cognite/extractorutils/exceptions.py @@ -15,7 +15,7 @@ class InvalidConfigError(Exception): """ - Exception thrown from ``load_yaml`` if config file is invalid. This can be due to + Exception thrown from ``load_yaml`` and ``load_yaml_dict`` if config file is invalid. This can be due to * Missing fields * Incompatible types diff --git a/cognite/extractorutils/util.py b/cognite/extractorutils/util.py index 8d7c084a..19fda7ed 100644 --- a/cognite/extractorutils/util.py +++ b/cognite/extractorutils/util.py @@ -101,7 +101,7 @@ class EitherId: Args: id: Internal ID - externalId or external_id: external ID + external_id: external ID. It can be `external_id` or `externalId` Raises: TypeError: If none of both of id types are set. @@ -197,24 +197,27 @@ def add_extraction_pipeline( added_message: str = "", ) -> Callable[[Callable[..., _T1]], Callable[..., _T1]]: """ - This is to be used as a decorator for extractor functions to add extraction pipeline information + This is to be used as a decorator for extractor functions to add extraction pipeline information. Args: - extraction_pipeline_ext_id: - cognite_client: - heartbeat_waiting_time: - added_message: + extraction_pipeline_ext_id: Id of the extraction pipeline + cognite_client: Assets to create + heartbeat_waiting_time: Interval of the execution of the Thread + added_message: Message that will be displayed at the end of the run Usage: If you have a function named "extract_data(*args, **kwargs)" and want to connect it to an extraction pipeline, you can use this decorator function as: - @add_extraction_pipeline( - extraction_pipeline_ext_id=, - cognite_client=, - logger=, - ) - def extract_data(*args, **kwargs): - + + .. code-block:: python + + @add_extraction_pipeline( + extraction_pipeline_ext_id=, + cognite_client=, + logger=, + ) + def extract_data(*args, **kwargs): + """ # TODO 1. Consider refactoring this decorator to share methods with the Extractor context manager in .base.py diff --git a/docs/source/conf.py b/docs/source/conf.py index 01d06b85..64420c64 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -65,7 +65,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/source/docs.rst b/docs/source/docs.rst index 7692d015..dd969ff3 100644 --- a/docs/source/docs.rst +++ b/docs/source/docs.rst @@ -25,6 +25,7 @@ Extractor configurasions are conventionally written in *hyphen-cased YAML*. Thes Config loader ^^^^^^^^^^^^^ .. autofunction:: cognite.extractorutils.configtools.load_yaml +.. autofunction:: cognite.extractorutils.configtools.load_yaml_dict Base classes @@ -55,6 +56,14 @@ The ``configtools`` module contains several prebuilt config classes for many com :undoc-members: .. autoclass:: cognite.extractorutils.configtools.CogniteConfig :undoc-members: +.. autoclass:: cognite.extractorutils.configtools.EitherIdConfig + :undoc-members: +.. autoclass:: cognite.extractorutils.configtools.ConnectionConfig + :undoc-members: +.. autoclass:: cognite.extractorutils.configtools.AuthenticatorConfig + :undoc-members: +.. autoclass:: cognite.extractorutils.configtools.CertificateConfig + :undoc-members: .. autoclass:: cognite.extractorutils.configtools.LoggingConfig :undoc-members: .. autoclass:: cognite.extractorutils.configtools.MetricsConfig @@ -75,14 +84,14 @@ Exceptions .. autoexception:: cognite.extractorutils.configtools.InvalidConfigError -``throttle`` - Tools for throttling ------------------------------------ +.. ``throttle`` - Tools for throttling +.. ----------------------------------- -.. automodule:: cognite.extractorutils.throttle - :members: - :undoc-members: - :inherited-members: - :show-inheritance: +.. .. automodule:: cognite.extractorutils.throttle +.. :members: +.. :undoc-members: +.. :inherited-members: +.. :show-inheritance: @@ -115,6 +124,41 @@ Exceptions :inherited-members: :show-inheritance: +.. automodule:: cognite.extractorutils.uploader._base + :members: + :undoc-members: + :inherited-members: + :show-inheritance: + +.. automodule:: cognite.extractorutils.uploader.assets + :members: + :show-inheritance: + +.. automodule:: cognite.extractorutils.uploader.events + :members: + :show-inheritance: + +.. automodule:: cognite.extractorutils.uploader.files + :members: + :show-inheritance: + +.. automodule:: cognite.extractorutils.uploader.raw + :members: + :show-inheritance: + +.. automodule:: cognite.extractorutils.uploader.time_series + :members: + :show-inheritance: + +.. ``uploader_extractor`` - A module containing a more advanced base extractor class +.. --------------------------------------------------------------------------------- + +.. .. automodule:: cognite.extractorutils.uploader_extractor +.. :members: +.. :undoc-members: +.. :inherited-members: +.. :show-inheritance: + ``util`` - Miscellaneous utilities ---------------------------------- diff --git a/docs/source/package.rst b/docs/source/package.rst index c9a52acc..866cdfd7 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -7,11 +7,11 @@ The final step is to ship our extractor. Some times we can depend on a Python in our extractor in, in other cases out extractor needs to be fully self-contained. How we ship our extractor will differ slightly between those scenarios. -When developing the extractor, running it is fairly easy: +When developing the extractor, running it is fairly easy: [#]_ .. code-block:: bash - poetry run + poetry run Sometimes we could just send our project to the production environment like this, for example by cloning the git repo (which would also make updating to future versions very easy). However, when shipping the extractor to a production @@ -68,6 +68,8 @@ executable you have to run ``cogex build`` from Windows, likewise for Linux or M The resulting executable will contain your extractor, all dependencies [#]_ and the Python runtime. +.. [#] Replace ```` with the name of your specific extractor, and ```` with the (absolute or + relative) path to a config file. .. [#] Note that this could be in violation of some licenses, particularly GPL or it's derivatives. Make sure that the licenses of your dependencies allows this type of linking. diff --git a/docs/source/read_csv.rst b/docs/source/read_csv.rst index a71b49b1..ad06b5b3 100644 --- a/docs/source/read_csv.rst +++ b/docs/source/read_csv.rst @@ -40,7 +40,7 @@ If we wanted to make our extractor even more generic, we could have made the ``d We now have a ``reader`` object. This is an iterator that will return each row in the CSV file as a dictionary where column names are keys and the row are values. We can now iterate over this reader and add all the rows to the upload -queue using the :meth:`add_to_upload_queue ` method. +queue using the :meth:`add_to_upload_queue ` method. .. code-block:: python diff --git a/docs/source/uploader.rst b/docs/source/uploader.rst index c9fec780..002617ca 100644 --- a/docs/source/uploader.rst +++ b/docs/source/uploader.rst @@ -13,8 +13,9 @@ Using an upload queue --------------------- We begin by looking at the upload queue. Since this extractor will write to CDF RAW, we will use the -:meth:`RawUploadQueue `. Similar queues exists for time series data -points, events, sequence rows and files. +:meth:`RawUploadQueue `. Similar queues exists for +:meth:`time series data points `, +:meth:`events `, sequence rows and :meth:`files `. The reason for using an upload queue is to batch together data into larger requests to CDF. This will increase performance, some times quite dramatically since network latencies can often be a bottleneck for extractors. We can add @@ -27,7 +28,7 @@ manager (with a ``with`` statement). The advantage is that we can then set one o happen (such as every *x* seconds or when the queue has *y* elements in it), and don't think about it again. If the queue is not empty when we exit the context, a final upload will be made to make sure no data is left behind. -To create a :meth:`RawUploadQueue `, we write therefore start with +To create a :meth:`RawUploadQueue `, we write therefore start with .. code-block:: python @@ -47,7 +48,7 @@ function. This function is called from the ``__main__.py`` file, and is provided * ``cognite`` is an initiated ``CogniteClient`` that is set up to to use the CDF project that the user configured in their config file. -* ``states`` is a state store object, we will not cover these in this tutorial, but in short it allows us to keep track +* ``states`` is a :meth:`State Store ` object, we will not cover these in this tutorial, but in short it allows us to keep track of extraction state between runs to avoid duplicate work * ``config`` is the config file the user have provided, which has been loaded and stored as an instance of the Config class we made in the :ref:`Read CSV files` chapter. @@ -72,6 +73,9 @@ this: extract_file(file, queue) +This will call the ``start()`` and ``stop()`` methods from :meth:`AbstractUploadQueue ` +class automatically once all files are processed or the limit of the queue, defined by the ``max_queue_size`` keyword argument, is reached. + Extraction pipeline runs ------------------------ @@ -104,8 +108,16 @@ section of the config file, containing either an ``external-id`` or (internal) ` .. code-block:: yaml cognite: - project: publicdata - api-key: ${COGNITE_API_KEY} - - extraction-pipeline: - external-id: abc123 + # Read these from environment variables + host: ${COGNITE_BASE_URL} + project: ${COGNITE_PROJECT} + + idp-authentication: + token-url: ${COGNITE_TOKEN_URL} + client-id: ${COGNITE_CLIENT_ID} + secret: ${COGNITE_CLIENT_SECRET} + scopes: + - ${COGNITE_BASE_URL}/.default + + extraction-pipeline: + external-id: abc123