From bc7f056f86f8fac523ca3838851274f5bf4e120a Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Fri, 28 Jun 2024 19:31:30 +0200 Subject: [PATCH] Refactor Accounts parsers (params from instance to method) --- src/mijnbib/mijnbibliotheek.py | 3 ++- src/mijnbib/parsers.py | 18 +++++++----------- tests/test_mijnbibliotheek.py | 22 ++++++++++++++++++++-- tests/test_parsers.py | 2 +- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/mijnbib/mijnbibliotheek.py b/src/mijnbib/mijnbibliotheek.py index 75c5f83..b47a5f9 100644 --- a/src/mijnbib/mijnbibliotheek.py +++ b/src/mijnbib/mijnbibliotheek.py @@ -74,6 +74,7 @@ def __init__(self, username: str, password: str, city: str | None = None, login_ # Open the door for overriding parsers (but still keep private for now) self._loans_page_parser = LoansListPageParser() + self._accounts_page_parser = AccountsListPageParser() # *** PUBLIC METHODS *** @@ -163,7 +164,7 @@ def get_accounts(self) -> list[Account]: response = self._br.open(url, timeout=TIMEOUT) # pylint: disable=assignment-from-none html_string = response.read().decode("utf-8") # type:ignore try: - accounts = AccountsListPageParser(html_string, self.BASE_URL).parse() + accounts = self._accounts_page_parser.parse(html_string, self.BASE_URL) except Exception as e: raise IncompatibleSourceError( f"Problem scraping accounts ({str(e)})", html_body="" diff --git a/src/mijnbib/parsers.py b/src/mijnbib/parsers.py index e73260c..48807f4 100644 --- a/src/mijnbib/parsers.py +++ b/src/mijnbib/parsers.py @@ -227,12 +227,8 @@ def _get_loan_info_from_div( ) -class AccountsListPageParser(Parser): - def __init__(self, html: str, base_url: str): - self._html = html - self._base_url = base_url - - def parse(self) -> list[Account]: +class AccountsListPageParser(ParserNew): + def parse(self, html: str, base_url: str) -> list[Account]: """Return list of accounts. >>> html_string = ''' @@ -266,13 +262,13 @@ def parse(self) -> list[Account]: ... ... ... ... ''' - >>> AccountsListPageParser(html_string,"https://example.com").parse() # doctest: +NORMALIZE_WHITESPACE + >>> AccountsListPageParser().parse(html_string,"https://example.com") # doctest: +NORMALIZE_WHITESPACE [Account(library_name='Dijk92', user='Johny', id='374047', loans_count=0, loans_url='https://example.com/mijn-bibliotheek/lidmaatschappen/374047/uitleningen', reservations_count=5, reservations_url='https://example.com/mijn-bibliotheek/lidmaatschappen/384767/reservaties', open_amounts=0, open_amounts_url='')] """ accounts = [] - soup = BeautifulSoup(self._html, "html.parser") + soup = BeautifulSoup(html, "html.parser") library_divs = soup.find_all( "div", class_="my-library-user-library-account-list__library" @@ -320,7 +316,7 @@ def parse(self) -> list[Account]: ) try: - loans_url = self._base_url + acc_div.find( + loans_url = base_url + acc_div.find( "a", href=re.compile("uitleningen") ).get("href") except AttributeError: @@ -331,7 +327,7 @@ def parse(self) -> list[Account]: ) try: - holds_url = self._base_url + acc_div.find( + holds_url = base_url + acc_div.find( "a", href=re.compile("reservaties") ).get("href") except AttributeError: @@ -357,7 +353,7 @@ def parse(self) -> list[Account]: open_amounts = 0 try: - open_amounts_url = self._base_url + acc_div.find( + open_amounts_url = base_url + acc_div.find( "a", href=re.compile("betalen") ).get("href") except AttributeError: diff --git a/tests/test_mijnbibliotheek.py b/tests/test_mijnbibliotheek.py index 331f8c1..dd815df 100644 --- a/tests/test_mijnbibliotheek.py +++ b/tests/test_mijnbibliotheek.py @@ -9,7 +9,7 @@ from mijnbib import MijnBibliotheek from mijnbib.errors import AuthenticationError from mijnbib.login_handlers import LoginByForm, LoginByOAuth -from mijnbib.models import Loan +from mijnbib.models import Account, Loan CONFIG_FILE = "mijnbib.ini" @@ -156,7 +156,7 @@ def test_login_by_oauth_already_logged_in(self, creds_config, caplog): class TestCustomParser: - def test_loan_page_parser_can_be_overridden(self): + def test_loans_page_parser_can_be_overridden(self): # Arrange class MyCustomLoanParser: def parse(self, _html, _base_url, _account_id): @@ -171,3 +171,21 @@ def parse(self, _html, _base_url, _account_id): # Assert assert mb.get_loans(account_id="whatever") == [Loan("some title")] + + def test_accounts_page_parser_can_be_overridden(self): + # Arrange + acc = Account("libname", "user", "id", 1, "loans_url", 1, "res_url", 1, "oa_url") + + class MyCustomAccountsParser: + def parse(self, _html, _base_url): + return [acc] + + mb = MijnBibliotheek("user", "pwd") + # Fake both (a) valid login, and (b) some reponse on fetching loans page + mb._br = FakeMechanizeBrowser(form_response="Profiel") # type: ignore + + # Act + mb._accounts_page_parser = MyCustomAccountsParser() # type:ignore + + # Assert + assert mb.get_accounts() == [acc] diff --git a/tests/test_parsers.py b/tests/test_parsers.py index c4f1d37..2bc9cfd 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -11,7 +11,7 @@ class TestAccountsListPageParser: def test_parse_accounts_list_page(self): # Happy flow test --> see doctest - assert AccountsListPageParser("", "https://example.com").parse() == [] + assert AccountsListPageParser().parse("", "https://example.com") == [] def test_parse_item_count_from_li(self): assert AccountsListPageParser._parse_item_count_from_li("", "") is None