diff --git a/src/mijnbib/mijnbibliotheek.py b/src/mijnbib/mijnbibliotheek.py index fd29d54..75c5f83 100644 --- a/src/mijnbib/mijnbibliotheek.py +++ b/src/mijnbib/mijnbibliotheek.py @@ -72,6 +72,9 @@ def __init__(self, username: str, password: str, city: str | None = None, login_ self._br.set_handle_robots(False) self._br.set_header("User-Agent", USER_AGENT) + # Open the door for overriding parsers (but still keep private for now) + self._loans_page_parser = LoansListPageParser() + # *** PUBLIC METHODS *** def login(self) -> None: @@ -113,7 +116,7 @@ def get_loans(self, account_id: str) -> list[Loan]: url = self.BASE_URL + f"/mijn-bibliotheek/lidmaatschappen/{account_id}/uitleningen" html_string = self._open_account_loans_page(url) try: - loans = LoansListPageParser(html_string, self.BASE_URL, account_id).parse() + loans = self._loans_page_parser.parse(html_string, self.BASE_URL, account_id) except TemporarySiteError as e: raise e except Exception as e: diff --git a/src/mijnbib/parsers.py b/src/mijnbib/parsers.py index 0c8ecae..e73260c 100644 --- a/src/mijnbib/parsers.py +++ b/src/mijnbib/parsers.py @@ -31,13 +31,14 @@ def parse(self): pass -class LoansListPageParser(Parser): - def __init__(self, html: str, base_url: str, account_id: str): - self._html = html - self._base_url = base_url - self._acc_id = account_id +class ParserNew(ABC): + @abstractmethod + def parse(self, html: str, *args, **kwargs): + pass + - def parse(self) -> list[Loan]: +class LoansListPageParser(ParserNew): + def parse(self, html: str, base_url: str, account_id: str) -> list[Loan]: """Return loans. >>> html_string=''' @@ -87,7 +88,7 @@ def parse(self) -> list[Loan]: ... ... ... ''' - >>> LoansListPageParser(html_string,"https://city.bibliotheek.be","123456").parse() # doctest: +NORMALIZE_WHITESPACE + >>> LoansListPageParser().parse(html_string,"https://city.bibliotheek.be","123456") # doctest: +NORMALIZE_WHITESPACE [Loan(title='Erebus', loan_from=datetime.date(2023, 11, 25), loan_till=datetime.date(2023, 12, 23), author='Palin, Michael', type='Boek', extendable=True, extend_url='https://city.bibliotheek.be/mijn-bibliotheek/lidmaatschappen/374052/uitleningen/verlengen?loan-ids=6207416', @@ -96,7 +97,6 @@ def parse(self) -> list[Loan]: cover_url='https://webservices.bibliotheek.be/index.php?func=cover&ISBN=9789000359325&VLACCnr=10157217&CDR=&EAN=&ISMN=&EBS=&coversize=medium', account_id='123456')] """ - html = self._html loans = [] soup = BeautifulSoup(html, "html.parser") @@ -129,7 +129,9 @@ def parse(self) -> list[Loan]: elif child.name == "div": # loan div # we convert child soup object to string, so called function # can be used also easily for unit tests - loan = self._get_loan_info_from_div(str(child), branch_name) + loan = self._get_loan_info_from_div( + str(child), base_url, branch_name, account_id + ) loans.append(loan) else: # should not happen, fail gracefully for now. @@ -137,7 +139,9 @@ def parse(self) -> list[Loan]: _log.debug("Number of loans found: %s", len(loans)) return loans - def _get_loan_info_from_div(self, loan_div_html: str, branch: str) -> Loan: + def _get_loan_info_from_div( + self, loan_div_html: str, base_url: str, branch: str, acc_id: str + ) -> Loan: """Return loan from html loan_div blob.""" loan_div = BeautifulSoup(loan_div_html, "html.parser") loan = {} @@ -196,7 +200,7 @@ def _get_loan_info_from_div(self, loan_div_html: str, branch: str) -> Loan: else: loan["extendable"] = True extend_url = extend_loan_div.a["href"] # type:ignore - extend_url = urllib.parse.urljoin(self._base_url, extend_url) # type:ignore + extend_url = urllib.parse.urljoin(base_url, extend_url) # type:ignore loan["extend_url"] = extend_url loan["extend_id"] = extend_loan_div.input.get("id") except AttributeError: @@ -219,7 +223,7 @@ def _get_loan_info_from_div(self, loan_div_html: str, branch: str) -> Loan: id=loan.get("id", ""), url=loan.get("url", ""), cover_url=loan.get("cover_url", ""), - account_id=self._acc_id, + account_id=acc_id, ) diff --git a/tests/test_mijnbibliotheek.py b/tests/test_mijnbibliotheek.py index 93a1728..331f8c1 100644 --- a/tests/test_mijnbibliotheek.py +++ b/tests/test_mijnbibliotheek.py @@ -9,6 +9,7 @@ from mijnbib import MijnBibliotheek from mijnbib.errors import AuthenticationError from mijnbib.login_handlers import LoginByForm, LoginByOAuth +from mijnbib.models import Loan CONFIG_FILE = "mijnbib.ini" @@ -25,6 +26,12 @@ class X(str): class FakeMechanizeBrowser: + """Fake Browser for easier testing. + + Set the string to be returned upon form submission using `form_response`. + Customize the string for faking (in)valid login responses. + """ + def __init__(self, form_response: str) -> None: self._form_response = form_response.encode("utf8") # trick for nested prop, from https://stackoverflow.com/a/35190607/50899 @@ -146,3 +153,21 @@ def test_login_by_oauth_already_logged_in(self, creds_config, caplog): assert "already logged in" in caplog.text # to verify we do take fast lane assert mb._logged_in + + +class TestCustomParser: + def test_loan_page_parser_can_be_overridden(self): + # Arrange + class MyCustomLoanParser: + def parse(self, _html, _base_url, _account_id): + return [Loan("some title")] + + mb = MijnBibliotheek("user", "pwd") + # Fake both (a) valid login, and (b) some reponse on fetching loans page + mb._br = FakeMechanizeBrowser(form_response="Profiel") # type: ignore + + # Act + mb._loans_page_parser = MyCustomLoanParser() # type:ignore + + # Assert + assert mb.get_loans(account_id="whatever") == [Loan("some title")] diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 96cf8d8..c4f1d37 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -21,8 +21,8 @@ def test_parse_item_count_from_li(self): class TestLoansListPageParser: def test_parse_account_loans_page(self): # Happy flow test --> see doctest - assert LoansListPageParser("", "", "").parse() == [] - assert LoansListPageParser("bogus", "", "").parse() == [] + assert LoansListPageParser().parse(html="", base_url="", account_id="") == [] + assert LoansListPageParser().parse(html="bogus", base_url="", account_id="") == [] class TestReservationsPageParser: