From a359d1057ce23ce83bd67d5901b89155071e9597 Mon Sep 17 00:00:00 2001 From: Isaac Schifferer Date: Tue, 12 Dec 2023 14:50:03 -0600 Subject: [PATCH] Create more robust check for old vs new book selection syntax (#87) * Create more robust check for old vs new book selection syntax and give more descriptive error messages when syntaxes are mixed * Fix comment formatting * Give correct error message when a range of books is used with the comma-separated syntax after the first selection --- machine/scripture/parse.py | 31 ++++++++++++++++++++++++++----- tests/scripture/test_parse.py | 15 +++++++++++---- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/machine/scripture/parse.py b/machine/scripture/parse.py index a0ff8dc..73611a9 100644 --- a/machine/scripture/parse.py +++ b/machine/scripture/parse.py @@ -1,9 +1,14 @@ +import re from typing import Dict, List, Set, Union from .canon import book_id_to_number from .constants import ORIGINAL_VERSIFICATION from .verse_ref import Versification +COMMA_SEPARATED_BOOKS = re.compile(r"([A-Z\d]{3}|OT|NT)(, ?([A-Z\d]{3}|OT|NT))*") +BOOK_RANGE = re.compile(r"[A-Z\d]{3}-[A-Z\d]{3}") +CHAPTER_SELECTION = re.compile(r"[A-Z\d]{3} ?(\d+|\d+-\d+)(, ?(\d+|\d+-\d+))*") + def get_books(books: Union[str, List[str]]) -> Set[int]: if isinstance(books, str): @@ -43,13 +48,29 @@ def get_chapters( chapters = {} if isinstance(chapter_selections, str): - if ";" not in chapter_selections and not any( - s.isdigit() and (i == len(chapter_selections) - 1 or not chapter_selections[i + 1].isalpha()) - for i, s in enumerate(chapter_selections) - ): # Backwards compatibility with get_books syntax: + chapter_selections = chapter_selections.strip() + + if ";" in chapter_selections: + chapter_selections = chapter_selections.split(";") + elif re.fullmatch(COMMA_SEPARATED_BOOKS, chapter_selections) is not None: chapter_selections = chapter_selections.split(",") + elif chapter_selections.startswith("-"): + raise ValueError(f"Cannot subtract before adding sections: {chapter_selections}") + elif re.search(BOOK_RANGE, chapter_selections): + if len(chapter_selections) == 7: + chapter_selections = [chapter_selections] + else: + raise ValueError( + "Invalid syntax. If one of your selections is a range of books, \ + selections must be seprated with semicolons." + ) + elif re.fullmatch(CHAPTER_SELECTION, chapter_selections) is None: + raise ValueError( + "Invalid syntax. If one of your selections includes specific chapters or subtraction, \ + selections must be separated with semicolons." + ) else: - chapter_selections = chapter_selections.split(";") + chapter_selections = [chapter_selections] for section in chapter_selections: section = section.strip() diff --git a/tests/scripture/test_parse.py b/tests/scripture/test_parse.py index fa9e800..be11f63 100644 --- a/tests/scripture/test_parse.py +++ b/tests/scripture/test_parse.py @@ -43,10 +43,6 @@ def test_get_chapters() -> None: whole_bible = {i: [] for i in range(1, 67)} assert get_chapters("NT,OT") == whole_bible - del whole_bible[2] # EXO - del whole_bible[41] # MRK - assert get_chapters("NT,OT,-MRK,-EXO") == whole_bible - assert get_chapters("MAT;MRK") == {40: [], 41: []} assert get_chapters("MAT; MRK") == {40: [], 41: []} assert get_chapters("MAT1,2,3") == {40: [1, 2, 3]} @@ -57,7 +53,10 @@ def test_get_chapters() -> None: assert get_chapters("2JN-3JN;EXO1,8,3-5;GEN") == {1: [], 2: [1, 3, 4, 5, 8], 63: [], 64: []} assert get_chapters("1JN 1;1JN 2;1JN 3-5") == {62: []} assert get_chapters("MAT-ROM;-ACT4-28") == {40: [], 41: [], 42: [], 43: [], 44: [1, 2, 3], 45: []} + assert get_chapters("2JN;-2JN 1") == {} + del whole_bible[2] # EXO + del whole_bible[41] # MRK assert get_chapters("NT;OT;-MRK;-EXO") == whole_bible test_bible = {i: [] for i in range(40, 67)} test_chapters_mat = [1, 2] + [i for i in range(6, 17)] + [i for i in range(18, 29)] @@ -110,3 +109,11 @@ def test_get_chapters() -> None: get_chapters("NT;OT;-ABC") with raises(ValueError): get_chapters("MAT;-ABC 1") + + # mixing old (comma-separated) and new syntax + with raises(ValueError): + get_chapters("NT,OT,-MRK,-EXO") + with raises(ValueError): + get_chapters("OT,MAT1") + with raises(ValueError): + get_chapters("OT,MAT-LUK")