Skip to content

Commit

Permalink
Merge pull request #4146 from michamos/hal-dont-match-francesco
Browse files Browse the repository at this point in the history
workflows: improve France match for HAL candidates
  • Loading branch information
michamos authored Dec 20, 2021
2 parents c3f1288 + ef1e47a commit 6a472df
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
4 changes: 2 additions & 2 deletions inspirehep/modules/workflows/tasks/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,8 +1193,8 @@ def check_if_france_in_fulltext(obj, eng):
fulltext = get_fulltext(obj)
if not fulltext:
return
fulltext_lower = fulltext.lower()
return 'france' in fulltext_lower or 'in2p3' in fulltext_lower
regex = re.compile(r"\bfrance\b|in2p3", re.UNICODE | re.IGNORECASE)
return regex.search(fulltext)


def check_if_france_in_raw_affiliations(obj, eng):
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/workflows/test_workflows_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,38 @@ def test_check_if_france_in_fulltext_when_france_in_header(mocked_get_document,
assert france_in_fulltext


@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow")
def test_check_if_france_in_fulltext_doesnt_include_francesco(mocked_get_document, app):
fake_grobid_response = "<author>Francesco, Papa</author>"

obj = MagicMock()
obj.data = {
'authors': [
{"full_name": "author 1"},
{"full_name": "author 2"},
{"full_name": "author 3"}
]
}

obj.extra_data = {}
eng = None

new_config = {"GROBID_URL": "http://grobid_url.local"}
with patch.dict(current_app.config, new_config):
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'POST', 'http://grobid_url.local/api/processFulltextDocument',
text=fake_grobid_response,
headers={'content-type': 'application/xml'},
status_code=200,
)
with tempfile.NamedTemporaryFile() as tmp_file:
mocked_get_document.return_value.__enter__.return_value = tmp_file.name
france_in_fulltext = check_if_france_in_fulltext(obj, eng)

assert not france_in_fulltext


def test_check_if_france_in_affiliations(app):
obj = MagicMock()
obj.data = {
Expand Down

0 comments on commit 6a472df

Please sign in to comment.