Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

component_templates: allow the usage of component_templates instead of mappings for the definition of the records. #248

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions invenio_oaiserver/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,32 @@
from invenio_oaiserver.query import query_string_parser


def _build_percolator_index_name(index):
def _build_percolator_index_name(index, mapping_path=None):
"""Build percolator index name."""
suffix = "-percolators"
return build_index_name(index, suffix=suffix, app=current_app)
if index in current_search.mappings:
# We have some mappings. Let's create a new index
index_name = build_index_name(index, suffix="-percolators", app=current_app)
if not current_search_client.indices.exists(index_name):
_create_percolator_mapping(index, index_name, mapping_path)
else:
# There are no mappings. Let's hope the entry uses component templates and has the field `query` defined
# If it does not use the component templates, it will get an error like:
# field 'query' not defined
index_name = build_index_name(index, suffix="", app=current_app)
return index_name


def _create_percolator_mapping(index, mapping_path=None):
def _create_percolator_mapping(index, percolator_index, mapping_path=None):
"""Update mappings with the percolator field.

.. note::

This is only needed from ElasticSearch v5 onwards, because percolators
are now just a special type of field inside mappings.

This is not needed at all if the record has been defined using component templates, and includes the
oairecord component-template
"""
percolator_index = _build_percolator_index_name(index)
if not mapping_path:
mapping_path = current_search.mappings[index]
if not current_search_client.indices.exists(percolator_index):
Expand All @@ -57,12 +68,9 @@ def _new_percolator(spec, search_pattern):
# Skip indices/mappings not used by OAI-PMH
if not index.startswith(oai_records_index):
continue
# Create the percolator doc_type in the existing index for >= ES5
# TODO: Consider doing this only once in app initialization
try:
_create_percolator_mapping(index, mapping_path)
current_search_client.index(
index=_build_percolator_index_name(index),
index=_build_percolator_index_name(index, mapping_path),
id="oaiset-{}".format(spec),
body={"query": query},
)
Expand Down Expand Up @@ -166,13 +174,9 @@ def sets_search_all(records):
return []

record_index = str(current_app.config["OAISERVER_RECORD_INDEX"])
# TODO: We shouldn't have to always create the percolator mapping here
_create_percolator_mapping(record_index)
percolator_index = _build_percolator_index_name(record_index)
record_sets = [[] for _ in range(len(records))]

result = percolate_query(percolator_index, documents=records)

prefix = "oaiset-"
prefix_len = len(prefix)

Expand Down
9 changes: 9 additions & 0 deletions invenio_oaiserver/search/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016-2018 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Module with JSON schemas for an internal ``_oai`` field."""
9 changes: 9 additions & 0 deletions invenio_oaiserver/search/component_templates/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016-2018 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Module with JSON schemas for an internal ``oai`` field."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016-2018 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Module with JSON schemas for an internal ``oai`` field."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"template": {
"mappings": {
"properties": {
"pids": {
"properties": {
"oai": {
"properties": {
"id": {
"type": "keyword"
}
}
}
}
},
"query": {
"type": "percolator"
}
}
}
}
}
2 changes: 1 addition & 1 deletion invenio_oaiserver/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def sanitize_unicode(value):
Following W3C recommandation : https://www.w3.org/TR/REC-xml/#charsets
Based on https://lsimons.wordpress.com/2011/03/17/stripping-illegal-characters-out-of-xml-in-python/ # noqa
"""
return re.sub("[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]", "", value)
return re.sub("[\x00-\x08\x0b\x0c\x0e-\x1f\ud800-\udfff\ufffe\uffff]", "", value)


def record_sets_fetcher(record):
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ invenio_pidstore.minters =
oaiid = invenio_oaiserver.minters:oaiid_minter
invenio_pidstore.fetchers =
oaiid = invenio_oaiserver.fetchers:oaiid_fetcher
invenio_search.component_templates =
oairecord = invenio_oaiserver.search.component_templates

[build_sphinx]
source-dir = docs/
Expand Down