From 6449ddf52079ea4e4092bffe0c3e74c767f0f326 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 16 Sep 2023 21:57:20 +0200 Subject: [PATCH] bibcode/ads: normalize unicode * Closes #85. --- idutils/normalizers.py | 2 ++ idutils/validators.py | 1 + tests/test_idutils.py | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/idutils/normalizers.py b/idutils/normalizers.py index baeb3eb..2c624ea 100644 --- a/idutils/normalizers.py +++ b/idutils/normalizers.py @@ -13,6 +13,7 @@ """ID normalizer helper functions.""" +import unicodedata import isbnlib from .proxies import custom_schemes_registry @@ -34,6 +35,7 @@ def normalize_handle(val): def normalize_ads(val): """Normalize an ADS bibliographic code.""" + val = unicodedata.normalize("NFKD", val) m = ads_regexp.match(val) return m.group(2) diff --git a/idutils/validators.py b/idutils/validators.py index 364279f..e478a52 100644 --- a/idutils/validators.py +++ b/idutils/validators.py @@ -187,6 +187,7 @@ def is_urn(val): def is_ads(val): """Test if argument is an ADS bibliographic code.""" + val = unicodedata.normalize("NFKD", val) return ads_regexp.match(val) diff --git a/tests/test_idutils.py b/tests/test_idutils.py index c5eeaa0..1da757c 100644 --- a/tests/test_idutils.py +++ b/tests/test_idutils.py @@ -268,6 +268,12 @@ "2017zndo....495787v", "http://ui.adsabs.harvard.edu/#abs/2017zndo....495787v", ), + ( + "1992ApJ…400L…1W", + ["ads"], + "1992ApJ...400L...1W", + "http://ui.adsabs.harvard.edu/#abs/1992ApJ...400L...1W", + ), ( "0000000218250097", ["orcid", "isni"],