Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

global: add class based registry to extend idutils schemes #106

Merged
merged 4 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ scholarly communication.
Features
========

- Addition of custom schemes supporting all features of predefined schemes
- Validation and normalization of persistent identifiers.
- Detection of persistent identifier scheme.
- Generation of resolving links for persistent identifiers.
Expand All @@ -46,11 +47,18 @@ API
.. automodule:: idutils
:members: is_isbn10, is_isbn13, is_isbn, is_issn, is_istc, is_doi, is_handle, is_ean8, is_ean13, is_ean, is_isni, is_orcid, is_purl, is_url, is_lsid, is_urn, is_ads, is_arxiv_post_2007, is_arxiv_pre_2007, is_arxiv, is_pmid, is_pmcid, is_gnd, is_sra, is_bioproject, is_biosample, is_ensembl, is_uniprot, is_refseq, is_genome, is_geo, is_arrayexpress_array, is_arrayexpress_experiment, detect_identifier_schemes, normalize_doi, normalize_handle, normalize_ads, normalize_orcid, normalize_gnd, normalize_pmid, normalize_arxiv, normalize_pid, to_url


.. include:: ../CHANGES.rst

.. include:: ../CONTRIBUTING.rst


How to add your own schemes
===========================

.. automodule:: idutils.ext
:members:


License
=======

Expand Down
37 changes: 1 addition & 36 deletions idutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,42 +14,7 @@
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Small library for persistent identifiers used in scholarly communication.
Setting up custom schemes
-------------------------
In order to define your own custom schemes you can use the following entrypoint to
register them
.. code-block:: python
[options.entry_points]
idutils.custom_schemes =
my_new_scheme = my_module.get_scheme_config_func
The entry point ``'my_new_scheme = my_module.get_scheme_config_func'`` defines an entry
point named ``my_new_scheme`` pointing to the function ``my_module.get_scheme_config_func``
which returns the config for your new registered scheme.
That function must return a dictionary with the following format:
.. code-block:: python
def get_scheme_config_func():
return {
# See examples in `idutils.validators` file.
"validator": lambda value: True else False,
# Used in `idutils.normalizers.normalize_pid` function.
"normalizer": lambda value: normalized_value,
# See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
"filter": ["list_of_schemes_to_filter_out"],
# Used in `idutils.normalizers.to_url` function.
"url_generator": lambda scheme, normalized_pid: "normalized_url",
}
Each key is optional and if not provided a default value is defined in
`idutils.ext._set_default_custom_scheme_config()` function.
"""
"""Small library for persistent identifiers used in scholarly communication."""

import importlib
import pkgutil
Expand Down
68 changes: 9 additions & 59 deletions idutils/detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,70 +14,18 @@
"""Functions for detecting the persistent identifier."""

from . import validators
from .proxies import current_idutils
from .proxies import custom_schemes_registry
from .schemes import IDUTILS_PID_SCHEMES as _IDUTILS_PID_SCHEMES
from .schemes import IDUTILS_SCHEME_FILTER as _IDUTILS_SCHEME_FILTER

IDUTILS_PID_SCHEMES = [
("doi", validators.is_doi),
("ark", validators.is_ark),
("handle", validators.is_handle),
("purl", validators.is_purl),
("lsid", validators.is_lsid),
("urn", validators.is_urn),
("ads", validators.is_ads),
("arxiv", validators.is_arxiv),
("ascl", validators.is_ascl),
("hal", validators.is_hal),
("pmcid", validators.is_pmcid),
("isbn", validators.is_isbn),
("issn", validators.is_issn),
("orcid", validators.is_orcid),
("isni", validators.is_isni),
("ean13", validators.is_ean13),
("ean8", validators.is_ean8),
("istc", validators.is_istc),
("gnd", validators.is_gnd),
("ror", validators.is_ror),
("pmid", validators.is_pmid),
("url", validators.is_url),
("sra", validators.is_sra),
("bioproject", validators.is_bioproject),
("biosample", validators.is_biosample),
("ensembl", validators.is_ensembl),
("uniprot", validators.is_uniprot),
("refseq", validators.is_refseq),
("genome", validators.is_genome),
("geo", validators.is_geo),
("arrayexpress_array", validators.is_arrayexpress_array),
("arrayexpress_experiment", validators.is_arrayexpress_experiment),
("swh", validators.is_swh),
("viaf", validators.is_viaf),
]
IDUTILS_PID_SCHEMES = _IDUTILS_PID_SCHEMES
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kept for backwards compatibility when importing.

"""Definition of scheme name and associated test function.

Order of list is important, as identifier scheme detection will test in the
order given by this list."""


IDUTILS_SCHEME_FILTER = [
(
"url",
# None these can have URLs, in which case we exclude them
["isbn", "istc", "urn", "lsid", "issn", "ean8", "viaf"],
),
("ean8", ["gnd", "pmid", "viaf"]),
("ean13", ["gnd", "pmid"]),
("isbn", ["gnd", "pmid"]),
("orcid", ["gnd", "pmid"]),
("isni", ["gnd", "pmid"]),
(
"issn",
[
"gnd",
"viaf",
],
),
("pmid", ["viaf"]),
]
IDUTILS_SCHEME_FILTER = _IDUTILS_SCHEME_FILTER
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kept for backwards compatibility when importing.

"""(present_scheme, [list of schemes to remove if present_scheme found])."""


Expand All @@ -87,7 +35,7 @@ def detect_identifier_schemes(val):
.. note:: Some schemes like PMID are very generic.
"""
schemes = []
scheme_validators = IDUTILS_PID_SCHEMES + current_idutils.pick_scheme_key(
scheme_validators = IDUTILS_PID_SCHEMES + custom_schemes_registry().pick_scheme_key(
"validator"
)
for scheme, test in scheme_validators:
Expand All @@ -111,7 +59,9 @@ def detect_identifier_schemes(val):
if val.startswith(viaf_url):
schemes.remove("handle")

scheme_filter = IDUTILS_SCHEME_FILTER + current_idutils.pick_scheme_key("filter")
scheme_filter = IDUTILS_SCHEME_FILTER + custom_schemes_registry().pick_scheme_key(
"filter"
)
for first, remove_schemes in scheme_filter:
if first in schemes:
schemes = list(filter(lambda x: x not in remove_schemes, schemes))
Expand Down
137 changes: 83 additions & 54 deletions idutils/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,48 @@
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Invenio IDUtils module for managing persistent identifiers used in scholarly communication."""
"""Extension class to collect and register new schemes via entrypoints.

In order to define your own custom schemes you can use the following entrypoint to
register them

.. code-block:: python

[options.entry_points]
idutils.custom_schemes =
my_new_scheme = my_module.get_scheme_config_func

The entry point ``'my_new_scheme = my_module.get_scheme_config_func'`` defines an entry
point named ``my_new_scheme`` pointing to the function ``my_module.get_scheme_config_func``
which returns the config for your new registered scheme.

That function must return a dictionary with the following format:

.. code-block:: python

def get_scheme_config_func():
return {
# See examples in `idutils.validators` file.
"validator": lambda value: True else False,
# Used in `idutils.normalizers.normalize_pid` function.
"normalizer": lambda value: normalized_value,
# See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
"filter": ["list_of_schemes_to_filter_out"],
# Used in `idutils.normalizers.to_url` function.
"url_generator": lambda scheme, normalized_pid: "normalized_url",
}

Each key is optional and if not provided a default value is defined in
`idutils.ext._set_default_custom_scheme_config()` function.

Note: You can only add new schemes but not override existing ones.
"""

from threading import Lock

from importlib_metadata import entry_points

from .detectors import IDUTILS_PID_SCHEMES
from .proxies import current_idutils
from .schemes import IDUTILS_PID_SCHEMES


def _set_default_custom_scheme_config(scheme_config):
Expand All @@ -33,43 +69,50 @@ def _set_default_custom_scheme_config(scheme_config):
scheme_key in default_config.keys() for scheme_key in scheme_config.keys()
)

# Merge the provided scheme config with defaults
return {**default_config, **scheme_config}


class IDUtils(object):
"""Invenio extension."""
class CustomSchemesRegistry:
"""Singleton class for loading and storing custom schemes from entry points."""

def __init__(self, app=None):
"""Extension initialization."""
if app:
self.init_app(app)
_instance = None
_lock = Lock() # To ensure thread-safe singleton creation

def init_app(self, app):
"""Flask application initialiation."""
self.init_idutils_registry()
app.extensions["idutils"] = self
def __new__(cls):
"""Create a new instance."""
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._custom_schemes_registry = (
{}
) # Internal dictionary to store schemes
cls._instance._load_entry_points("idutils.custom_schemes")
return cls._instance

@property
def custom_schemes(self):
"""Return the registered custom registered schemes.

Each item of the registry is of the format:
{
"custom_scheme": {
# See examples in `idutils.validators` file.
"validator": lambda value: True else False,
# Used in `idutils.normalizers.normalize_pid` function.
"normalizer": lambda value: normalized_value,
# See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
"filter": ["list_of_schemes_to_filter_out"],
# Used in `idutils.normalizers.to_url` function.
"url_generator": lambda scheme, normalized_pid: "normalized_url",
{
"custom_scheme": {

# See examples in `idutils.validators` file.
"validator": lambda value: True else False,
# Used in `idutils.normalizers.normalize_pid` function.
"normalizer": lambda value: normalized_value,
# See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
"filter": ["list_of_schemes_to_filter_out"],
# Used in `idutils.normalizers.to_url` function.
"url_generator": lambda scheme, normalized_pid: "normalized_url"

}

}
}

See examples in `idutils.validators` file.
"""
return self._custom_schemes
return self._custom_schemes_registry

def pick_scheme_key(self, key):
"""Serialize the registered custom registered schemes by key.
Expand All @@ -78,40 +121,26 @@ def pick_scheme_key(self, key):
"""
return [(scheme, config[key]) for scheme, config in self.custom_schemes.items()]

def init_idutils_registry(self):
"""Initialize custom schemes registries."""
self._custom_schemes = {}
self._load_entry_point(
self._custom_schemes,
"idutils.custom_schemes",
)

def _load_entry_point(self, registry, ep_name):
"""Load entry points inton the given registry."""
def _load_entry_points(self, ep_name):
"""Load entry points into the internal registry."""
existing_id_names = set(scheme[0] for scheme in IDUTILS_PID_SCHEMES)

# Load entry points from the specified group
for ep in set(entry_points(group=ep_name)):
name = ep.name
# Assert that the custom scheme is not overriding any existing scheme
assert name not in existing_id_names

# Ensure no custom scheme overrides existing ones
assert name not in existing_id_names, f"Scheme {name} already exists!"

# Load the function from entry point
scheme_register_func = ep.load()
assert callable(scheme_register_func)
assert callable(scheme_register_func), f"{name} must be callable!"

# Call the function to get the scheme config
scheme_config = scheme_register_func()
scheme_config = _set_default_custom_scheme_config(scheme_config)
registry.setdefault(name, scheme_config)


def finalize_app(app):
"""Finalize app."""
init(app)


def api_finalize_app(app):
"""Finalize app."""
init(app)

# Set default config values if needed
scheme_config = _set_default_custom_scheme_config(scheme_config)

def init(app):
"""Init app."""
ext = app.extensions["idutils"]
# Store in the registry
self._custom_schemes_registry.setdefault(name, scheme_config)
8 changes: 5 additions & 3 deletions idutils/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import isbnlib

from .proxies import current_idutils
from .proxies import custom_schemes_registry
from .utils import *
from .validators import is_arxiv_post_2007, is_arxiv_pre_2007

Expand Down Expand Up @@ -172,7 +172,9 @@ def normalize_pid(val, scheme):
elif scheme == "viaf":
return normalize_viaf(val)
else:
for custom_scheme, normalizer in current_idutils.pick_scheme_key("normalizer"):
for custom_scheme, normalizer in custom_schemes_registry().pick_scheme_key(
"normalizer"
):
if scheme == custom_scheme:
return normalizer(val)
return val
Expand Down Expand Up @@ -234,7 +236,7 @@ def to_url(val, scheme, url_scheme="http"):
elif scheme in ["purl", "url"]:
return pid
else:
for custom_scheme, url_generator in current_idutils.pick_scheme_key(
for custom_scheme, url_generator in custom_schemes_registry().pick_scheme_key(
"url_generator"
):
if scheme == custom_scheme:
Expand Down
7 changes: 3 additions & 4 deletions idutils/proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

"""Proxy definitions."""

from flask import current_app
from werkzeug.local import LocalProxy
from .ext import CustomSchemesRegistry

current_idutils = LocalProxy(lambda: current_app.extensions["idutils"])
"""Proxy to the extension."""
custom_schemes_registry = lambda: CustomSchemesRegistry()
"""Proxy to the custom scheme registrty."""
Loading
Loading