From 5d0e6ff5455dbb25e58c82e4e0b4064c1442cc1a Mon Sep 17 00:00:00 2001 From: Ward Date: Sun, 3 Dec 2023 22:31:50 +0100 Subject: [PATCH] Initial commit v0.0.1 --- .github/workflows/python-app.yml | 35 ++ .gitignore | 11 + LICENSE | 21 + Makefile | 29 + README.md | 73 +++ examples/example.py | 57 ++ pyproject.toml | 34 ++ src/mijnbib/__init__.py | 13 + src/mijnbib/mijnbibliotheek.py | 903 +++++++++++++++++++++++++++++++ src/mijnbib/plugin_errors.py | 53 ++ tests/__init__.py | 0 tests/save_testref.py | 68 +++ tests/test_mijnbibliotheek.py | 125 +++++ tests/tst_mijnbibliotheek.py | 61 +++ 14 files changed, 1483 insertions(+) create mode 100644 .github/workflows/python-app.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 examples/example.py create mode 100644 pyproject.toml create mode 100644 src/mijnbib/__init__.py create mode 100644 src/mijnbib/mijnbibliotheek.py create mode 100644 src/mijnbib/plugin_errors.py create mode 100644 tests/__init__.py create mode 100644 tests/save_testref.py create mode 100644 tests/test_mijnbibliotheek.py create mode 100644 tests/tst_mijnbibliotheek.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 0000000..52cbf36 --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,35 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python application + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +permissions: + contents: read + +jobs: + test: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: "3.8" + - name: Install app & dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[dev] + - name: Validate code formatted + run: | + make blackcheck + - name: Run tests + run: | + make test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b7e59cb --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.dat +*venv* +**/test_config.py + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +*.egg-info/ +dist/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0d0c814 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Ward + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1f8dfec --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +test: + pytest -v + python -m doctest src/mijnbib/*.py + +black: + isort --skip-glob="**/venv*" \ + --profile=black \ + . + + black -l 95 --exclude "venv*" . + +# For CI/CD pipeline +blackcheck: + isort --skip-glob="**/venv*" \ + --profile=black \ + --check \ + . + black -l 95 --exclude "venv*" \ + --check \ + . + +clean: + rm -rf dist + rm -rf src/*.egg-info + +build: + pip install --upgrade pip + pip install --upgrade build + python -m build \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9c0fd03 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +# mijnbib + +Python API voor (mijn.) bibliotheek.be + +Met deze Python library kan je jouw ontleende items, reservaties en +accountinfo opvragen indien je een account hebt op . + +## Installatie + +Installeer via: + + pip install mijnbib + +## Gebruik + +Bijvoorbeeld, het opvragen van je ontleende items kan als volgt (na installatie): + + from mijnbib import MijnBibliotheek + + city = "gent" # jouw gemeente of stad + username = "johndoe" + password = "12345678" + account_id = "12345" # zie het getal in de URL, of via mb.get_accounts() + + mb = MijnBibliotheek(username, password, city) + loans = mb.get_loans(account_id) + print(loans) + +Voor meer voorbeelden, zie de code in de folder `examples`. +Daarin wordt ook `pprint` gebruikt voor een meer leesbare output. + +## Opmerkingen + +Deze Python API haalt zijn gegevens via webscraping van de bibliotheek.be website. +Daardoor is ze afhankelijk van de structuur van de website. Bij een wijziging aan +de structuur van de website is het dus heel waarschijnlijk dat alle of bepaalde +functionaliteit plots niet meer werkt. + +In dat geval is het wachten tot deze Python library geupdate is om met de nieuwe +structuur om te gaan. + +## Alternatieven + +De Home Assistant plugin scraped +op een gelijkaardige manier de bibliotheek.be website. + +## Development + +To install all dependecies for development, install (in a virtualenv) via: + + pip install -e .[dev] # 'dev' is defined in pyproject.toml + +Running the tests and applying code formatting can be done via: + + make test + make black + +To work around the challenge of testing a web scraper, the following *snapshot +testing* approach can be used to get some confidence when applying refactoring: + +1. Create a file `test_config.py` in the project root folder, and make it contain + the `city`, `username`, `password` and and `account_id` (see the next file for + expected variable naming and format) +2. Run `python tests/save_testref.py` to capture and store the current output + (a couple of files will be created) +4. Perform refactoring as needed +5. Run `pytest tests/tst_mijnbibliotheek.py` (note: it's `pytest` here!) to check + if the output still matches the earlier captured output + +Creating a distribution archive: + + make clean + make build diff --git a/examples/example.py b/examples/example.py new file mode 100644 index 0000000..aeb2f7b --- /dev/null +++ b/examples/example.py @@ -0,0 +1,57 @@ +import logging +import pprint +from dataclasses import asdict + +from mijnbib import MijnBibliotheek + +logging.basicConfig(format="%(levelname)s %(message)s") +logging.getLogger().setLevel(logging.DEBUG) +pp = pprint.PrettyPrinter() + +try: + import test_config as test_config +except ModuleNotFoundError: + print("First, create a file 'test_config.py' with the required data") + exit(-1) + +# Create a test_config file with the following variables +# Or assign directly here +city = test_config.city +username = test_config.mijnbib_user.split("#")[0] +password = test_config.mijnbib_pass +account_id = test_config.mijnbib_user.split("#")[1] + +print("\nFetching accounts...") +mb = MijnBibliotheek(username, password, city) +accounts = mb.get_accounts() +pp.pprint([asdict(acc) for acc in accounts]) + +print("\nFetching loans...") +mb = MijnBibliotheek(username, password, city) +loans = mb.get_loans(account_id) +pp.pprint([asdict(loan) for loan in loans]) + +print("\nFetching reservations...") +mb = MijnBibliotheek(username, password, city) +reservations = mb.get_reservations(account_id) +pp.pprint([asdict(res) for res in reservations]) + +print("\nFetching all info...") +mb = MijnBibliotheek(username, password, city) +info = mb.get_all_info(all_as_dicts=True) +pp.pprint(info) + +print("\nExtendable loans are:") +extendable_loans = [] +for _key, acc in info.items(): + extendable_loans.extend([loan for loan in acc["loans"] if loan["extendable"]]) +pp.pprint(extendable_loans) + +# print("Extending loan...") +# mb = MijnBibliotheek(username, password, city) +# success, details = mb.extend_loans( +# "", # adapt this +# False, # set tot True, to actually extend a loan +# ) +# pp.pprint(f"Extending loans success = {success}") +# pp.pprint(details) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e3aaa1a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools>=61.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "mijnbib" +version = "0.0.1" +description = "Python API voor de website mijn.bibliotheek.be" +readme = "README.md" +authors = [{ name = "Ward Van Heddeghem", email = "wardvh@fastmail.fm" }] +license = { file = "LICENSE" } +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", +] +keywords = ["mijn bibliotheek", "bibliotheek"] +dependencies = ["mechanize", "bs4"] +requires-python = ">=3.8" + +[project.optional-dependencies] +dev = ["black", "isort", "pytest", "ruff"] + +[project.urls] +Homepage = "https://github.com/wvanhed/mijnbib" + +[tool.pytest.ini_options] +minversion = "7.0" +python_files = ["tests.py", "test_*.py", "*_tests.py"] +# addopts = "--doctest-modules --doctest-continue-on-failure" + +[tool.isort] +profile = "black" +known_first_party = ["test_config"] # if placed in the root diff --git a/src/mijnbib/__init__.py b/src/mijnbib/__init__.py new file mode 100644 index 0000000..a69d902 --- /dev/null +++ b/src/mijnbib/__init__.py @@ -0,0 +1,13 @@ +# So user can do e.g. +# from mijnbib import MijnBibliotheek, Loan + +from mijnbib.mijnbibliotheek import Account, Loan, MijnBibliotheek, Reservation +from mijnbib.plugin_errors import ( + AccessError, + AuthenticationError, + CanNotConnectError, + ExtendLoanError, + GeneralPluginError, + IncompatibleSourceError, + PluginError, +) diff --git a/src/mijnbib/mijnbibliotheek.py b/src/mijnbib/mijnbibliotheek.py new file mode 100644 index 0000000..9490f71 --- /dev/null +++ b/src/mijnbib/mijnbibliotheek.py @@ -0,0 +1,903 @@ +""" +Webscraper module, for interacting with the mijn.bibliotheek.be website. +Created on July 14, 2015 + +For example usage of this module, see the main method at the bottom of the file +""" + +from __future__ import annotations + +import logging +import re +import urllib.error +import urllib.parse +from dataclasses import asdict, dataclass +from datetime import date, datetime + +import mechanize +from bs4 import BeautifulSoup + +from mijnbib.plugin_errors import ( + AccessError, + AuthenticationError, + CanNotConnectError, + ExtendLoanError, + IncompatibleSourceError, +) + +_log = logging.getLogger(__name__) + +# CHEAT SHEET - BeautifulSoup (aka, things I always forget) +# +# .get_text() It returns all the text in a document or beneath a tag, as a +# single Unicode string. Probably preferred over .string +# .text Seems to be shorthand for .get_text(). +# .string Typically returns the single string withing a tag, but can also +# return None, or the string from the single-containing tag. It's +# complicated, see also https://stackoverflow.com/a/25328374/50899 + + +class MijnBibliotheek: + """API for interacting with the mijn.bibliotheek.be website.""" + + BASE_DOMAIN = "bibliotheek.be" + DATE_FORMAT = "%d/%m/%Y" + + def __init__(self, username: str, password: str, city: str) -> None: + self._username = username + self._pwd = password + + self.BASE_URL = f"https://{city.lower().strip()}.{self.BASE_DOMAIN}" + self._logged_in = False + + self._br = mechanize.Browser() + self._br.set_handle_robots(False) + + # *** PUBLIC METHODS *** + + def login(self) -> None: + """Log in. Is auto-called by other methods if needed. + + Raises: + CanNotConnectError + AuthenticationError + IncompatibleSourceError + """ + url = self.BASE_URL + "/mijn-bibliotheek/aanmelden" + _log.debug(f"Will log in at url : {url}") + _log.debug(f" with id : {self._username}") + + response = self._log_in(url) + self._validate_logged_in(response) # raises AuthenticationError if not ok + + self._logged_in = True + + def get_loans(self, account_id: str) -> list[Loan]: + """Return list of loans. Will login first if needed. + + Raises: + AccessError: something went wrong fetching loans + AuthenticationError + IncompatibleSourceError + """ + if not self._logged_in: + self.login() + + url = self.BASE_URL + f"/mijn-bibliotheek/lidmaatschappen/{account_id}/uitleningen" + html_string = self._open_account_loans_page(url) + try: + loans = self._parse_account_loan_page(html_string, self.BASE_URL) + except Exception as e: + raise IncompatibleSourceError(f"Problem scraping loans ({str(e)})", "") from e + return loans + + def get_reservations(self, account_id: str) -> list[Reservation]: + """Return list of reservations. Will login first if needed. + + Raises: + AccessError: something went wrong fetching reservations + AuthenticationError + IncompatibleSourceError + """ + if not self._logged_in: + self.login() + + url = self.BASE_URL + f"/mijn-bibliotheek/lidmaatschappen/{account_id}/reservaties" + html_string = self._open_account_loans_page(url) # same structure as for loans + try: + holds = self._parse_account_reservations_page(html_string) + except Exception as e: + raise IncompatibleSourceError( + f"Problem scraping reservations ({str(e)})", "" + ) from e + return holds + + def get_accounts(self) -> list[Account]: + """Return list of accounts. Will login first if needed. + + Raises: + IncompatibleSourceError + """ + if not self._logged_in: + self.login() + + url = self.BASE_URL + "/mijn-bibliotheek/lidmaatschappen" + _log.debug("Opening page 'lidmaatschappen' ... ") + response = self._br.open(url) # pylint: disable=assignment-from-none + html_string = response.read().decode("utf-8") # type:ignore + try: + accounts = self._parse_accounts_list_page(html_string, self.BASE_URL) + except Exception as e: + raise IncompatibleSourceError(f"Problem scraping accounts ({str(e)})", "") from e + return accounts + + def get_all_info(self, all_as_dicts=False) -> dict: + """Returns all available information, for all accounts. + + Information is returned as a dict, with account ids as keys. + + Args: + all_as_dicts When True, do not return dataclass objects, but dicts + instead. + Raises: + AccessError: something went wrong fetching loans or reservations + AuthenticationError + IncompatibleSourceError + """ + info = {} + accounts = self.get_accounts() + for a in accounts: + loans = self.get_loans(a.id) if a.loans_count != 0 else [] + holds = self.get_reservations(a.id) if a.reservations_count != 0 else [] + info[a.id] = { + "account_details": a if not all_as_dicts else asdict(a), + "loans": loans if not all_as_dicts else [asdict(loan) for loan in loans], + "reservations": ( + holds if not all_as_dicts else [asdict(hold) for hold in holds] + ), + } + return info + + def extend_loans(self, extend_url: str, execute: bool = False) -> tuple[bool, dict]: + """Extend given loan(s) via extend_url. Will login first if needed. + + The url looks like + https://city.bibliotheek.be/mijn-bibliotheek/lidmaatschappen/123/uitleningen/verlengen?loan-ids=456%2C789 + Multiple ids can be given for the loan-ids query parameter, separated by + a comma (which is url-encoded as '%2C'). In the example above the IDs 456 + and 789 will be extended. + + Evaluating if a loan extension was successful, is currently a bit of black + wizardry. You should consider both the `success` response value (True/False) + as well as the absence or occurrence of an error as /suggesting/ success. + This is partially due to the ambiguity of the server response; however + there is also room for handling it more consistently (e.g. returning + `success==False`, rather then raising an ExtendLoanError) + + Args: + execute: A development flag; set to True actually perform loan extension + Returns: + A result tuple (success, details). + The `success` element is True if extension was successful, False otherwise. + The `details` element contains a dictionary with more details; consider + it for debugging purposes. + Raises: + ExtendLoanError: raised when a loan could not be extended + IncompatibleSourceError + """ + # TODO: would make more sense to return loan list (since final page is loan page) + if not self._logged_in: + self.login() + + _log.debug(f"Will extend loan via url: {extend_url}") + try: + response = self._br.open(extend_url) # pylint: disable=assignment-from-none + except mechanize.HTTPError as e: + if e.code == 500: + raise ExtendLoanError(f"Probably invalid extend loan URL: {extend_url}") + else: + raise e + + try: + self._br.select_form(id="my-library-extend-loan-form") + except mechanize.FormNotFoundError: + raise IncompatibleSourceError("Can not find extend loan form", html_body="") + + if not execute: + _log.warning("SIMULATING extending the loan. Will stop now.") + return False, {} + + try: + response = self._br.submit() # pylint: disable=assignment-from-none + except mechanize.HTTPError as e: + if e.code == 500: + # duh, server crashes on unexpected id or id combinations + # (e.g. nonexisting id, ids that belong to different library accounts) + # However, if multiple id's, some of them *might* have been extended, + # even if 500 response + raise ExtendLoanError(f"Could not extend loans using url: {extend_url}") + else: + raise e + + # disclaimer: not sure if other codes are realistic + success = True if response.code == 200 else False + + # Try to add result details, but don't fail if we fail to parse details, it's tricky :-) + try: + # On submit, we arrive at "uitleningen" (loans) page, which lists the result + html_string = response.read().decode("utf-8") + # Path("response.html").write_text("html_string") # for debugging + details = self._parse_extend_response_page(html_string) + if "likely_success" in details and details["likely_success"] is False: + # Probably valid page (http=200) but with 'Foutmelding' + success = False + except Exception as e: + _log.warning(f"Could not parse loan extending result. Error: {e}") + details = {} + + return success, details + + # *** INTERNAL METHODS *** + + def _log_in(self, url): + # NOTE:consider replacing with oauth-based authentication flow + + html_string_start_page = "not yet set" # placeholder for troubleshooting + try: + _log.debug("Opening login page ... ") + response = self._br.open(url) # pylint: disable=assignment-from-none + html_string_start_page = response.read().decode("utf-8") # type:ignore + self._br.select_form(nr=0) + self._br["email"] = self._username + self._br["password"] = self._pwd + response = self._br.submit() # pylint: disable=assignment-from-none + except mechanize.FormNotFoundError: + raise IncompatibleSourceError( + "Can not find login form", html_body=html_string_start_page + ) + except urllib.error.URLError as e: + raise CanNotConnectError(f"Error while trying to log in at: {url} ({str(e)})") + return response + + def _validate_logged_in(self, response): + _log.debug("Checking if login is successful ...") + html_string = response.read().decode("utf-8") if response is not None else "" + if "Profiel" not in html_string: + if ( + "privacyverklaring is gewijzigd" in html_string + or "akkoord met de privacyverklaring" in html_string + ): + raise AuthenticationError( + "Login not accepted (likely need to accept privacy statement again)" + ) + else: + raise AuthenticationError("Login not accepted") + _log.debug("Login was successful") + + def _open_account_loans_page(self, acc_url: str) -> str: + _log.debug(f"Opening page ({acc_url}) ... ") + try: + response = self._br.open(acc_url) # pylint: disable=assignment-from-none + except mechanize.HTTPError as e: + if e.code == 500: + # duh, server crashes on incorrect or nonexisting ID in the link + raise AccessError( + f"Loans url can not be opened. Likely incorrect or " + f"nonexisting account ID in the url '{acc_url}'" + ) from e + raise AccessError( + f"Loans url can not be opened. Reason unknown. Error: {e}" + ) from e + + html = response.read().decode("utf-8") if response is not None else "" + return html + + @classmethod + def _parse_accounts_list_page(cls, html: str, base_url: str) -> list[Account]: + """Return list of accounts + + >>> html_string = ''' + ... ... + ... + ... ... + ... ''' + >>> MijnBibliotheek._parse_accounts_list_page(html_string,"https://example.com") # doctest: +NORMALIZE_WHITESPACE + [Account(library_name='Dijk92', user='Johny', id='374047', loans_count=0, loans_url='https://example.com/mijn-bibliotheek/lidmaatschappen/374047/uitleningen', + reservations_count=5, reservations_url='https://example.com/mijn-bibliotheek/lidmaatschappen/384767/reservaties', + open_amounts=0, open_amounts_url='')] + >>> MijnBibliotheek._parse_accounts_list_page("","https://example.com") + [] + """ + accounts = [] + soup = BeautifulSoup(html, "html.parser") + + library_divs = soup.find_all( + "div", class_="my-library-user-library-account-list__library" + ) + if not library_divs: + _log.warning("No library accounts detected. Weird; expected at least 1.") + + for lib_div in library_divs: + lib_title = ( + lib_div.find( + "div", class_="my-library-user-library-account-list__title-content" + ) + .find(string=True, recursive=False) + .get_text() + .strip() + ) + + # Get accounts + acc_divs = lib_div.find_all( + "div", class_="my-library-user-library-account-list__account" + ) + for acc_div in acc_divs: + # TODO: get details from json object, see https://github.com/myTselection/bibliotheek_be/blob/fec95c3481f78d98062c1117627da652ec8d032d/custom_components/bibliotheek_be/utils.py#L145C53-L145C75 + # Get id from + acc_id = acc_div.a["href"].strip().split("/")[3] + + acc_user = ( + acc_div.find("div", class_="my-library-user-library-account-list__name") + .get_text() + .strip() + ) + + loans_count = cls._parse_item_count_from_li( + acc_div, "my-library-user-library-account-list__loans-link" + ) + + try: + loans_url = base_url + acc_div.find( + "a", href=re.compile("uitleningen") + ).get("href") + except AttributeError: + loans_url = "" + + holds_count = cls._parse_item_count_from_li( + acc_div, "my-library-user-library-account-list__holds-link" + ) + + try: + holds_url = base_url + acc_div.find( + "a", href=re.compile("reservaties") + ).get("href") + except AttributeError: + holds_url = "" + + try: + open_amounts = acc_div.find( + "li", class_="my-library-user-library-account-list__open-amount-link" + ).a.get_text() + if "geen" in open_amounts.lower(): + open_amounts = 0 + else: + # Copied from https://github.com/myTselection/bibliotheek_be + open_amounts = float( + open_amounts.lower() + .replace(" openstaande bedragen", "") + .replace(" openstaand bedrag", "") + .replace(" openstaande kosten", "") + .replace("€", "") + .replace(",", ".") + ) + except AttributeError: + open_amounts = 0 + + try: + open_amounts_url = base_url + acc_div.find( + "a", href=re.compile("betalen") + ).get("href") + except AttributeError: + open_amounts_url = "" + + account = Account( + id=acc_id, + library_name=lib_title, + user=acc_user, + loans_count=loans_count, + loans_url=loans_url, + reservations_count=holds_count, + reservations_url=holds_url, + open_amounts=open_amounts, + open_amounts_url=open_amounts_url, + ) + accounts.append(account) + return accounts + + @staticmethod + def _parse_item_count_from_li(acc_div, class_: str) -> int | None: + """Return None if no info found, otherwise return item count (potentially 0)""" + item_count = None + try: + acc_a_text = acc_div.find("li", class_=class_).a.get_text().strip() + if "Geen" in acc_a_text: # 'Geen uitleningen' or 'Geen reservaties' + item_count = 0 + else: + numbers = [int(s) for s in acc_a_text.split() if s.isdigit()] + if numbers: + item_count = numbers[0] + except Exception: + _log.warning("Unexpected html structure. Ignore item count") + return item_count + + @classmethod + def _parse_account_loan_page(cls, html: str, base_url: str) -> list[Loan]: + """Return loans + + >>> html_string=''' + ... + ... ''' + >>> MijnBibliotheek._parse_account_loan_page(html_string,"https://city.bibliotheek.be") # doctest: +NORMALIZE_WHITESPACE + [Loan(title='Erebus', loan_from=datetime.date(2023, 11, 25), loan_till=datetime.date(2023, 12, 23), + author='Palin, Michael', type='Boek', extendable=True, + extend_url='https://city.bibliotheek.be/mijn-bibliotheek/lidmaatschappen/374052/uitleningen/verlengen?loan-ids=6207416', + extend_id='6207416', branchname='Gent Hoofdbibiliotheek', id='1324927', + url='https://city.bibliotheek.be/resolver.ashx?extid=%7Cwise-oostvlaanderen%7C1324927', + cover_url='https://webservices.bibliotheek.be/index.php?func=cover&ISBN=9789000359325&VLACCnr=10157217&CDR=&EAN=&ISMN=&EBS=&coversize=medium')] + """ + loans = [] + soup = BeautifulSoup(html, "html.parser") + + loansection_div = soup.find( + "div", class_="my-library-user-library-account-loans__loan-wrapper" + ) + if not loansection_div: + error_msg = ( + "Er is een fout opgetreden bij het ophalen van informatie uit het " + "bibliotheeksysteem. Probeer het later opnieuw." + ) + # Sometimes, this error is present + if soup.find(string=re.compile(error_msg)) is not None: + _log.warning( + f"Loans or reservations can not be retrieved. Site reports: {error_msg}" + ) + return loans + + # Unfortunately, the branch names are interwoven siblings of the loans, + # so we have to parse all items as we go along, and track branch name + children = loansection_div.find_all(recursive=False) # type:ignore + branch_name = "??" + for child in children: + if child.name == "h2": # we expect this to be the first child + branch_name = child.get_text().strip() + # TODO: check if this resolves to the same https://github.com/myTselection/bibliotheek_be/blob/fec95c3481f78d98062c1117627da652ec8d032d/custom_components/bibliotheek_be/utils.py#L306 + elif child.name == "div": # loan div + # we convert child soup object to string, so called function + # can be used also easily for unit tests + loan = cls._get_loan_info_from_div(str(child), branch_name, base_url) + loans.append(loan) + else: + # should not happen, fail gracefully for now. + _log.warning("Unexpected html structure. Did not find loan nor branch.") + _log.debug("Number of loans found: %s", len(loans)) + return loans + + @classmethod + def _get_loan_info_from_div(cls, loan_div_html: str, branch: str, base_url: str) -> Loan: + """Return loan from html loan_div blob""" + loan_div = BeautifulSoup(loan_div_html, "html.parser") + loan = {} + + try: + loan_a = loan_div.find( + "h3", class_="my-library-user-library-account-loans__loan-title card--title" + ).a + loan["title"] = loan_a.get_text().strip() + loan["url"] = loan_a["href"] + # Since id is only used to differentiate between titles, use last id-like part from url + # URL looks like 'https://city.bibliotheek.be/resolver.ashx?extid=%7Cwise-oostvlaanderen%7C1144255' + loan["id"] = loan_a["href"].encode("utf-8").split(b"%7C")[-1].decode("utf-8") + except AttributeError: + _log.warning("Unexpected html structure. Ignoring loan title, url and id") + + try: + loan["author"] = loan_div.find("div", class_="author").get_text().strip() + except AttributeError: + loan["author"] = "" # Likely, not all loans have an author + + try: + loan["type"] = ( + loan_div.find( + "div", class_="my-library-user-library-account-loans__loan-type-label" + ) + .get_text() + .strip() + ) + except AttributeError: + loan["type"] = "" # Not all loans have a type + + try: + loan["cover_url"] = loan_div.find( + "img", class_="my-library-user-library-account-loans__loan-cover-img" + )["src"] + except AttributeError: + loan["cover_url"] = "" + + try: + fromto_div = loan_div.find( + "div", + class_="my-library-user-library-account-loans__loan-from-to", + ) + from_ = fromto_div.find_all("span")[1].get_text().strip() # type:ignore + to_ = fromto_div.find_all("span")[3].get_text().strip() # type:ignore + loan["loan_from"] = datetime.strptime(from_, cls.DATE_FORMAT).date() + loan["loan_till"] = datetime.strptime(to_, cls.DATE_FORMAT).date() + except AttributeError: + _log.warning("Unexpected html structure. Ignoring loan start and end date") + + try: + extend_loan_div = loan_div.find("div", class_="card--extend-loan") + if extend_loan_div.get_text().strip() == "Verlengen niet mogelijk": + loan["extendable"] = False + else: + loan["extendable"] = True + extend_url = extend_loan_div.a["href"] # type:ignore + extend_url = urllib.parse.urljoin(base_url, extend_url) # type:ignore + loan["extend_url"] = extend_url + loan["extend_id"] = extend_loan_div.input.get("id") + except AttributeError: + loan["extendable"] = None + loan["extend_url"] = "" + loan["extend_id"] = "" + + loan["branchname"] = branch + + return Loan( + title=loan.get("title", ""), + loan_from=loan.get("loan_from", None), + loan_till=loan.get("loan_till", None), + author=loan.get("author", ""), + type=loan.get("type", ""), + extendable=loan.get("extendable", None), + extend_url=loan.get("extend_url", ""), + extend_id=loan.get("extend_id", ""), + branchname=loan.get("branchname", ""), + id=loan.get("id", ""), + url=loan.get("url", ""), + cover_url=loan.get("cover_url", ""), + ) + + @classmethod + def _parse_account_reservations_page(cls, html_string: str) -> list[Reservation]: + """Return list of holds + + >>> html_string=''' + ... + ... ''' + >>> MijnBibliotheek._parse_account_reservations_page(html_string) # doctest: +NORMALIZE_WHITESPACE + [Reservation(title='Vastberaden!', type='', url='https://city.bibliotheek.be/resolver.ashx?extid=%7Cwise-oostvlaanderen%7C12345', + author='John Doe', location='MyCity', available=False, available_till=None, + request_on=datetime.date(2023, 11, 25), valid_till=datetime.date(2024, 11, 24))] + >>> MijnBibliotheek._parse_account_reservations_page("") # doctest: +NORMALIZE_WHITESPACE + [] + """ + holds = [] + soup = BeautifulSoup(html_string, "html.parser") + + holds_section_div = soup.find( + "div", class_="my-library-user-library-account-holds__hold-wrapper" + ) + if not holds_section_div: + return holds + + children = holds_section_div.find_all(recursive=False) # type:ignore + # child is "class==my-library-user-library-account-holds__hold card" + for child in children: + hold = {} + + try: + hold["type"] = ( + child.find("div", class_="catalog-item__content") + .find("span") + .get_text() + .strip() + ) + except AttributeError: + pass # some holds don't have a type + + try: + hold["request_on"] = ( + child.find("p", string=re.compile("Aangevraagd op")) + .get_text() + .replace("Aangevraagd op ", "") + .strip() + ) + hold["request_on"] = datetime.strptime( + hold["request_on"], cls.DATE_FORMAT + ).date() + except AttributeError: + _log.warning("Unexpected html structure. Ignoring hold request date") + + try: + hold["valid_till"] = ( + child.find("p", string=re.compile("Aanvraag geldig tot")) + .get_text() + .replace("Aanvraag geldig tot ", "") + .strip() + ) + hold["valid_till"] = datetime.strptime( + hold["valid_till"], cls.DATE_FORMAT + ).date() + except AttributeError: + pass # once available, date not present anymore + + try: + hold_a = child.find("h2", class_="catalog-item__title").a + hold["title"] = hold_a.get_text().strip() + hold["url"] = hold_a["href"] + except AttributeError: + _log.warning("Unexpected html structure. Ignoring hold title and url") + + try: + hold["author"] = ( + child.find("div", class_="catalog-item__authors").get_text().strip() + ) + except AttributeError: + pass # likely, not all items have an author + + try: + hold_div = child.find( + "div", + class_="my-library-user-library-account-holds__hold-third card--third-section", + ) + hold["location"] = hold_div.find("strong").get_text().strip() + except AttributeError: + _log.warning("Unexpected html structure. Ignoring hold location.") + + try: + hold_div = child.find( + "div", + class_="my-library-user-library-account-holds__hold-fourth card--fourth-section", + ) + hold["available"] = ( + "Klaar om af te halen" in hold_div.find("h3").get_text().strip() + ) + if hold["available"] is True: + date_info_end = hold_div.find("strong").get_text().strip() + hold["endavailable"] = datetime.strptime( + date_info_end, cls.DATE_FORMAT + ).date() + except AttributeError: + _log.warning("Unexpected html structure. Ignoring hold availability.") + + reservation = Reservation( + title=hold.get("title", ""), + type=hold.get("type", ""), + url=hold.get("url", ""), + author=hold.get("author", ""), + location=hold.get("location", ""), + available=hold.get("available", False), + available_till=hold.get("endavailable", None), + request_on=hold.get("request_on", None), + valid_till=hold.get("valid_till", None), + ) + holds.append(reservation) + _log.debug("Number of holds found: %s", len(holds)) + return holds + + @classmethod + def _parse_extend_response_page(cls, html_string: str) -> dict: + """For dict structure, see the called method""" + html_blob = cls._extract_html_from_response_script_tag(html_string) + return cls._parse_extend_response_status_blob(html_blob) + + @staticmethod + def _extract_html_from_response_script_tag(raw_html: str): + """ + The extending loan response contains the result in a ajax script thingy. + This function extracts the part we are interested in and returns the decoded html. + + See the tests for an example. + """ + + def find_between(s: str, start: str, end: str): + return s[s.find(start) + len(start) : s.rfind(end)] + + # find relevant snippet + soup = BeautifulSoup(raw_html, "html.parser") + script_txt = soup.find( + "script", string=re.compile("(Statusbericht|Foutmelding)") + ).get_text() + script_txt = find_between(script_txt, '"data":"', '","settings') + + # decode + html_blob = script_txt.replace(r"\/", "/") + html_blob = bytes(html_blob, "ascii").decode("unicode_escape") + + return html_blob + + @classmethod + def _parse_extend_response_status_blob(cls, html_string: str) -> dict: + """Return details on loans that where extended succesfully + + >>> html_string = ''' + ... + ... ''' + >>> MijnBibliotheek._parse_extend_response_status_blob(html_string) # doctest: +NORMALIZE_WHITESPACE + {'likely_success': True, 'count': 2, 'details': + [{'title': 'Vastberaden!', 'until': datetime.date(2024, 1, 13)}, + {'title': 'Iemand moet het doen', 'until': datetime.date(2024, 1, 13)}]} + >>> MijnBibliotheek._parse_extend_response_status_blob("") + {'likely_success': False, 'count': 0, 'details': []} + """ + # NOTE: Unclear when & what response when no success (500 server crash on most tests with + # different IDs and combinations) + # If trying to extend loan which has already been extended, there is a red message saying + # "Er ging iets fout bij het verlengen" + count = 0 + details = [] + success = False + soup = BeautifulSoup(html_string, "html.parser") + try: + msg_lis = soup.find("ul", class_="messages__list").find_all("li") # type:ignore + if msg_lis and "werden succesvol verlengd" in msg_lis[0].get_text(): + success = True + count = len(msg_lis) - 1 + for li in msg_lis[1:]: + until = li.get_text().rsplit(" ", 1)[-1].strip(".") + until = datetime.strptime(until, cls.DATE_FORMAT).date() + details.append({"title": li.em.get_text(), "until": until}) + if msg_lis and "Er ging iets fout bij het verlengen" in msg_lis[0].get_text(): + # Probably, messages could be mix of success and failures. However, unclear. + # So, just play safe and report it was no success at all + count = 0 + success = False + except AttributeError: + _log.warning("Unexpected html structure. Reporting 0 extensions; could be wrong") + + return {"likely_success": success, "count": count, "details": details} + + +@dataclass +class Loan: + title: str = "" + loan_from: date | None = None + loan_till: date | None = None + author: str = "" + type: str = "" + extendable: bool | None = None + extend_url: str = "" # empty when `extendable` is False or None + extend_id: str = "" # can be used as input to extend multiple loans + branchname: str = "" + id: str = "" + url: str = "" + cover_url: str = "" + + +@dataclass +class Reservation: + title: str + type: str + url: str + author: str + location: str + available: bool + available_till: date | None = None + request_on: date | None = None + valid_till: date | None = None + + +@dataclass +class Account: + library_name: str + user: str + id: str + loans_count: int | None + loans_url: str + reservations_count: int | None + reservations_url: str + open_amounts: float + open_amounts_url: str diff --git a/src/mijnbib/plugin_errors.py b/src/mijnbib/plugin_errors.py new file mode 100644 index 0000000..3bce353 --- /dev/null +++ b/src/mijnbib/plugin_errors.py @@ -0,0 +1,53 @@ +""" +Defines the various errors that can be thrown be a plugin +""" + + +class PluginError(Exception): + pass + + +class GeneralPluginError(PluginError): + pass + + +class AccessError(PluginError): + pass + + +class AuthenticationError(PluginError): + """Exception raised when authentication has failed.""" + + +class ExtendLoanError(PluginError): + """Exception raised when extending loan(s) failed.""" + + +class CanNotConnectError(PluginError): + """Exception raised when the source (usually a website) can not be reached. + + Attributes: + url -- url that could not be reached + """ + + def __init__(self, url=""): + self.url = url + + def __str__(self): + return str(self.url) + + +class IncompatibleSourceError(PluginError): + """Exception raised for any general errors in parsing the source. + + Attributes: + msg -- explanation of the error + html_body -- html source that was used in parsing and caused error + """ + + def __init__(self, msg, html_body: str): + self.msg = msg + self.html_body = html_body + + def __str__(self): + return str(self.msg) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/save_testref.py b/tests/save_testref.py new file mode 100644 index 0000000..8633b93 --- /dev/null +++ b/tests/save_testref.py @@ -0,0 +1,68 @@ +"""This script allows to create a reference response set for mijnbibliotheek. + +When ran, it create reference files, which can be used in the mijnbibliotheek +tests as expected data. When the files do not exist, the idea is that the +relevant tests will be skipped. +""" +import pickle +import sys +from pathlib import Path + +from mijnbib.mijnbibliotheek import MijnBibliotheek + +try: + import test_config as test_config +except ModuleNotFoundError: + print("First, create a file 'test_config.py' with the required data") + sys.exit(0) + +REF_ACCOUNTS = "test_ref_accounts.dat" +REF_LOANS = "test_ref_loans.dat" +REF_HOLDS = "test_ref_holds.dat" +REF_ALLINFO = "test_ref_allinfo.dat" + +username = test_config.mijnbib_user.split("#")[0] +password = test_config.mijnbib_pass +account_id = test_config.mijnbib_user.split("#")[1] +city = test_config.city + + +def save_accounts(): + print(f"Fetching accounts; saving to `{REF_ACCOUNTS}`") + mb = MijnBibliotheek(username, password, city) + data = mb.get_accounts() + _save(data, REF_ACCOUNTS) + + +def save_loans(): + print(f"Fetching loans; saving to `{REF_LOANS}`") + mb = MijnBibliotheek(username, password, city) + data = mb.get_loans(account_id) + _save(data, REF_LOANS) + + +def save_holds(): + print(f"Fetching holds; saving to `{REF_HOLDS}`") + mb = MijnBibliotheek(username, password, city) + data = mb.get_reservations(account_id) + _save(data, REF_HOLDS) + + +def save_all_info(): + print(f"Fetching all info; saving to `{REF_ALLINFO}`") + mb = MijnBibliotheek(username, password, city) + data = mb.get_all_info() + _save(data, REF_ALLINFO) + + +def _save(data, filename): + with Path(filename).open("wb") as f: + pickle.dump(data, f) + + +if __name__ == "__main__": + save_accounts() + save_loans() + save_holds() + save_all_info() + print("Done. You can now run `pytest`.") diff --git a/tests/test_mijnbibliotheek.py b/tests/test_mijnbibliotheek.py new file mode 100644 index 0000000..b61d1a6 --- /dev/null +++ b/tests/test_mijnbibliotheek.py @@ -0,0 +1,125 @@ +import datetime + +from mijnbib import MijnBibliotheek + + +def test_mijnbib_available_imports(): + import mijnbib + + # make sure we don't expose too few, or too much + imps = [i for i in dir(mijnbib) if not i.startswith("__")] + assert set(imps) == set( + [ + "MijnBibliotheek", + "Loan", + "Reservation", + "Account", + "AccessError", + "AuthenticationError", + "CanNotConnectError", + "ExtendLoanError", + "IncompatibleSourceError", + "GeneralPluginError", + "PluginError", + "plugin_errors", + "mijnbibliotheek", + ] + ) + + +def test_parse_accounts_list_page(): + # Happy flow test --> see doctest + assert MijnBibliotheek._parse_accounts_list_page("", "") == [] + + +def test_parse_item_count_from_li(): + assert MijnBibliotheek._parse_item_count_from_li("", "") is None + + +def test_parse_account_loan_page(): + # Happy flow test --> see doctest + assert MijnBibliotheek._parse_account_loan_page("", "") == [] + + +def test_parse_account_reservations_page(): + # Happy flow test --> see doctest + assert MijnBibliotheek._parse_account_reservations_page("") == [] + + +def test_extract_html_from_response_script_tag(): + raw_html = r""" + ... + + ... + """ + + expected_result = """ +
+ +
+ """ + + def clean_whitespace(s: str) -> str: + return s.replace(" ", "").replace("\n", "") + + actual_result = MijnBibliotheek._extract_html_from_response_script_tag(raw_html) + assert clean_whitespace(actual_result) == clean_whitespace(expected_result) + + +def test_parse_extend_response_status_blob__success_case(): + html_string = """ +
+ +
+ """ + + actual_result = MijnBibliotheek._parse_extend_response_status_blob(html_string) + assert actual_result == { + "likely_success": True, + "count": 1, + "details": [{"title": "Het schip der doden", "until": datetime.date(2024, 1, 8)}], + } + + +def test_parse_extend_response_status_blob__foutmelding_case(): + html_string = """ +
+ +
+ """ + + actual_result = MijnBibliotheek._parse_extend_response_status_blob(html_string) + assert actual_result == { + "likely_success": False, + "count": 0, + "details": [], + } diff --git a/tests/tst_mijnbibliotheek.py b/tests/tst_mijnbibliotheek.py new file mode 100644 index 0000000..5658409 --- /dev/null +++ b/tests/tst_mijnbibliotheek.py @@ -0,0 +1,61 @@ +import pickle +from pathlib import Path + +import pytest + +import test_config as test_config +from mijnbib.mijnbibliotheek import MijnBibliotheek +from tests.save_testref import REF_ACCOUNTS, REF_ALLINFO, REF_HOLDS, REF_LOANS + +username = test_config.mijnbib_user.split("#")[0] +password = test_config.mijnbib_pass +account_id = test_config.mijnbib_user.split("#")[1] +city = test_config.city + + +@pytest.mark.skipif( + not Path(REF_ACCOUNTS).exists(), + reason="No ref file. Create using save_testref script", +) +def test_get_accounts(): + with Path(REF_ACCOUNTS).open("rb") as f: + data_expected = pickle.load(f) + mb = MijnBibliotheek(username, password, city) + data = mb.get_accounts() + assert data_expected == data + + +@pytest.mark.skipif( + not Path(REF_LOANS).exists(), + reason="No ref file. Create using save_testref script", +) +def test_get_loans(): + with Path(REF_LOANS).open("rb") as f: + data_expected = pickle.load(f) + mb = MijnBibliotheek(username, password, city) + data = mb.get_loans(account_id) + assert data_expected == data + + +@pytest.mark.skipif( + not Path(REF_HOLDS).exists(), + reason="No ref file. Create using save_testref script", +) +def test_get_holds(): + with Path(REF_HOLDS).open("rb") as f: + data_expected = pickle.load(f) + mb = MijnBibliotheek(username, password, city) + data = mb.get_reservations(account_id) + assert data_expected == data + + +@pytest.mark.skipif( + not Path(REF_ALLINFO).exists(), + reason="No ref file. Create using save_testref script", +) +def test_get_allinfo(): + with Path(REF_ALLINFO).open("rb") as f: + data_expected = pickle.load(f) + mb = MijnBibliotheek(username, password, city) + data = mb.get_all_info() + assert data_expected == data