Skip to content

Commit

Permalink
global: add class based registry to extend idutils schemes
Browse files Browse the repository at this point in the history
  • Loading branch information
zzacharo committed Oct 16, 2024
1 parent aded7b9 commit 5734251
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 129 deletions.
80 changes: 80 additions & 0 deletions idutils/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
#
# This file is part of IDUtils
# Copyright (C) 2024 CERN.
#
# IDUtils is free software; you can redistribute it and/or modify
# it under the terms of the Revised BSD License; see LICENSE file for
# more details.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Functions for detecting the persistent identifier."""

from . import validators

IDUTILS_PID_SCHEMES_CONFIG = [
("doi", validators.is_doi),
("ark", validators.is_ark),
("handle", validators.is_handle),
("purl", validators.is_purl),
("lsid", validators.is_lsid),
("urn", validators.is_urn),
("ads", validators.is_ads),
("arxiv", validators.is_arxiv),
("ascl", validators.is_ascl),
("hal", validators.is_hal),
("pmcid", validators.is_pmcid),
("isbn", validators.is_isbn),
("issn", validators.is_issn),
("orcid", validators.is_orcid),
("isni", validators.is_isni),
("ean13", validators.is_ean13),
("ean8", validators.is_ean8),
("istc", validators.is_istc),
("gnd", validators.is_gnd),
("ror", validators.is_ror),
("pmid", validators.is_pmid),
("url", validators.is_url),
("sra", validators.is_sra),
("bioproject", validators.is_bioproject),
("biosample", validators.is_biosample),
("ensembl", validators.is_ensembl),
("uniprot", validators.is_uniprot),
("refseq", validators.is_refseq),
("genome", validators.is_genome),
("geo", validators.is_geo),
("arrayexpress_array", validators.is_arrayexpress_array),
("arrayexpress_experiment", validators.is_arrayexpress_experiment),
("swh", validators.is_swh),
("viaf", validators.is_viaf),
]
"""Definition of scheme name and associated test function.
Order of list is important, as identifier scheme detection will test in the
order given by this list."""


IDUTILS_SCHEME_FILTER_CONFIG = [
(
"url",
# None these can have URLs, in which case we exclude them
["isbn", "istc", "urn", "lsid", "issn", "ean8", "viaf"],
),
("ean8", ["gnd", "pmid", "viaf"]),
("ean13", ["gnd", "pmid"]),
("isbn", ["gnd", "pmid"]),
("orcid", ["gnd", "pmid"]),
("isni", ["gnd", "pmid"]),
(
"issn",
[
"gnd",
"viaf",
],
),
("pmid", ["viaf"]),
]
"""(present_scheme, [list of schemes to remove if present_scheme found])."""
67 changes: 8 additions & 59 deletions idutils/detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,70 +14,17 @@
"""Functions for detecting the persistent identifier."""

from . import validators
from .proxies import current_idutils
from .config import IDUTILS_PID_SCHEMES_CONFIG, IDUTILS_SCHEME_FILTER_CONFIG
from .proxies import custom_schemes_registry

IDUTILS_PID_SCHEMES = [
("doi", validators.is_doi),
("ark", validators.is_ark),
("handle", validators.is_handle),
("purl", validators.is_purl),
("lsid", validators.is_lsid),
("urn", validators.is_urn),
("ads", validators.is_ads),
("arxiv", validators.is_arxiv),
("ascl", validators.is_ascl),
("hal", validators.is_hal),
("pmcid", validators.is_pmcid),
("isbn", validators.is_isbn),
("issn", validators.is_issn),
("orcid", validators.is_orcid),
("isni", validators.is_isni),
("ean13", validators.is_ean13),
("ean8", validators.is_ean8),
("istc", validators.is_istc),
("gnd", validators.is_gnd),
("ror", validators.is_ror),
("pmid", validators.is_pmid),
("url", validators.is_url),
("sra", validators.is_sra),
("bioproject", validators.is_bioproject),
("biosample", validators.is_biosample),
("ensembl", validators.is_ensembl),
("uniprot", validators.is_uniprot),
("refseq", validators.is_refseq),
("genome", validators.is_genome),
("geo", validators.is_geo),
("arrayexpress_array", validators.is_arrayexpress_array),
("arrayexpress_experiment", validators.is_arrayexpress_experiment),
("swh", validators.is_swh),
("viaf", validators.is_viaf),
]
IDUTILS_PID_SCHEMES = IDUTILS_PID_SCHEMES_CONFIG
"""Definition of scheme name and associated test function.
Order of list is important, as identifier scheme detection will test in the
order given by this list."""


IDUTILS_SCHEME_FILTER = [
(
"url",
# None these can have URLs, in which case we exclude them
["isbn", "istc", "urn", "lsid", "issn", "ean8", "viaf"],
),
("ean8", ["gnd", "pmid", "viaf"]),
("ean13", ["gnd", "pmid"]),
("isbn", ["gnd", "pmid"]),
("orcid", ["gnd", "pmid"]),
("isni", ["gnd", "pmid"]),
(
"issn",
[
"gnd",
"viaf",
],
),
("pmid", ["viaf"]),
]
IDUTILS_SCHEME_FILTER = IDUTILS_SCHEME_FILTER_CONFIG
"""(present_scheme, [list of schemes to remove if present_scheme found])."""


Expand All @@ -87,7 +34,7 @@ def detect_identifier_schemes(val):
.. note:: Some schemes like PMID are very generic.
"""
schemes = []
scheme_validators = IDUTILS_PID_SCHEMES + current_idutils.pick_scheme_key(
scheme_validators = IDUTILS_PID_SCHEMES + custom_schemes_registry().pick_scheme_key(
"validator"
)
for scheme, test in scheme_validators:
Expand All @@ -111,7 +58,9 @@ def detect_identifier_schemes(val):
if val.startswith(viaf_url):
schemes.remove("handle")

scheme_filter = IDUTILS_SCHEME_FILTER + current_idutils.pick_scheme_key("filter")
scheme_filter = IDUTILS_SCHEME_FILTER + custom_schemes_registry().pick_scheme_key(
"filter"
)
for first, remove_schemes in scheme_filter:
if first in schemes:
schemes = list(filter(lambda x: x not in remove_schemes, schemes))
Expand Down
78 changes: 34 additions & 44 deletions idutils/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@

"""Invenio IDUtils module for managing persistent identifiers used in scholarly communication."""

from threading import Lock

from importlib_metadata import entry_points

from .detectors import IDUTILS_PID_SCHEMES
from .proxies import current_idutils
from .config import IDUTILS_PID_SCHEMES_CONFIG


def _set_default_custom_scheme_config(scheme_config):
Expand All @@ -33,21 +34,26 @@ def _set_default_custom_scheme_config(scheme_config):
scheme_key in default_config.keys() for scheme_key in scheme_config.keys()
)

# Merge the provided scheme config with defaults
return {**default_config, **scheme_config}


class IDUtils(object):
"""Invenio extension."""
class CustomSchemesRegistry:
"""Singleton class for loading and storing custom schemes from entry points."""

def __init__(self, app=None):
"""Extension initialization."""
if app:
self.init_app(app)
_instance = None
_lock = Lock() # To ensure thread-safe singleton creation

def init_app(self, app):
"""Flask application initialiation."""
self.init_idutils_registry()
app.extensions["idutils"] = self
def __new__(cls):
"""Create a new instance."""
with cls._lock:
if cls._instance is None:
cls._instance = super(CustomSchemesRegistry, cls).__new__(cls)
cls._instance._custom_schemes_registry = (
{}
) # Internal dictionary to store schemes
cls._instance._load_entry_points("idutils.custom_schemes")
return cls._instance

@property
def custom_schemes(self):
Expand All @@ -66,10 +72,8 @@ def custom_schemes(self):
"url_generator": lambda scheme, normalized_pid: "normalized_url",
}
}
See examples in `idutils.validators` file.
"""
return self._custom_schemes
return self._custom_schemes_registry

def pick_scheme_key(self, key):
"""Serialize the registered custom registered schemes by key.
Expand All @@ -78,40 +82,26 @@ def pick_scheme_key(self, key):
"""
return [(scheme, config[key]) for scheme, config in self.custom_schemes.items()]

def init_idutils_registry(self):
"""Initialize custom schemes registries."""
self._custom_schemes = {}
self._load_entry_point(
self._custom_schemes,
"idutils.custom_schemes",
)

def _load_entry_point(self, registry, ep_name):
"""Load entry points inton the given registry."""
existing_id_names = set(scheme[0] for scheme in IDUTILS_PID_SCHEMES)
def _load_entry_points(self, ep_name):
"""Load entry points into the internal registry."""
existing_id_names = set(scheme[0] for scheme in IDUTILS_PID_SCHEMES_CONFIG)

# Load entry points from the specified group
for ep in set(entry_points(group=ep_name)):
name = ep.name
# Assert that the custom scheme is not overriding any existing scheme
assert name not in existing_id_names

# Ensure no custom scheme overrides existing ones
assert name not in existing_id_names, f"Scheme {name} already exists!"

# Load the function from entry point
scheme_register_func = ep.load()
assert callable(scheme_register_func)
assert callable(scheme_register_func), f"{name} must be callable!"

# Call the function to get the scheme config
scheme_config = scheme_register_func()
scheme_config = _set_default_custom_scheme_config(scheme_config)
registry.setdefault(name, scheme_config)


def finalize_app(app):
"""Finalize app."""
init(app)


def api_finalize_app(app):
"""Finalize app."""
init(app)

# Set default config values if needed
scheme_config = _set_default_custom_scheme_config(scheme_config)

def init(app):
"""Init app."""
ext = app.extensions["idutils"]
# Store in the registry
self._custom_schemes_registry.setdefault(name, scheme_config)
8 changes: 5 additions & 3 deletions idutils/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import isbnlib

from .proxies import current_idutils
from .proxies import custom_schemes_registry
from .utils import *
from .validators import is_arxiv_post_2007, is_arxiv_pre_2007

Expand Down Expand Up @@ -172,7 +172,9 @@ def normalize_pid(val, scheme):
elif scheme == "viaf":
return normalize_viaf(val)
else:
for custom_scheme, normalizer in current_idutils.pick_scheme_key("normalizer"):
for custom_scheme, normalizer in custom_schemes_registry().pick_scheme_key(
"normalizer"
):
if scheme == custom_scheme:
return normalizer(val)
return val
Expand Down Expand Up @@ -234,7 +236,7 @@ def to_url(val, scheme, url_scheme="http"):
elif scheme in ["purl", "url"]:
return pid
else:
for custom_scheme, url_generator in current_idutils.pick_scheme_key(
for custom_scheme, url_generator in custom_schemes_registry().pick_scheme_key(
"url_generator"
):
if scheme == custom_scheme:
Expand Down
7 changes: 3 additions & 4 deletions idutils/proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

"""Proxy definitions."""

from flask import current_app
from werkzeug.local import LocalProxy
from .ext import CustomSchemesRegistry

current_idutils = LocalProxy(lambda: current_app.extensions["idutils"])
"""Proxy to the extension."""
custom_schemes_registry = lambda: CustomSchemesRegistry()
"""Proxy to the custom scheme registrty."""
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ install_requires =

[options.extras_require]
tests =
invenio-app>=1.4.0
pytest-black-ng>=0.4.0
pytest-cache>=1.0
pytest-runner>=2.6.2
Expand Down
14 changes: 0 additions & 14 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,6 @@
"""Pytest configuration."""

import pytest
from invenio_app.factory import create_api


@pytest.fixture(scope="module")
def create_app(entry_points):
"""Application factory fixture."""
return create_api


@pytest.fixture(scope="module")
def base_app(base_app):
"""Application factory fixture."""
with base_app.app_context():
yield base_app


@pytest.fixture(scope="module")
Expand Down
27 changes: 27 additions & 0 deletions tests/test_custom_scheme_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
#
# This file is part of IDUtils
# Copyright (C) 2024 CERN.
#
# IDUtils is free software; you can redistribute it and/or modify
# it under the terms of the Revised BSD License; see LICENSE file for
# more details.
#
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Persistent identifier utilities tests."""

import pytest

from idutils.proxies import custom_schemes_registry


def test_custom_registry_singleton(entry_points):
"""Test that the registry is instantiated only once."""
instance1 = custom_schemes_registry()

instance2 = custom_schemes_registry()

assert instance1 is instance2
Loading

0 comments on commit 5734251

Please sign in to comment.