From 026d98a93d37b4f9fa2ef472c9a9c9aa62960993 Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 11:08:35 +0100 Subject: [PATCH 1/7] Fix (and simplify) oauth login --- src/mijnbib/login_handlers.py | 155 +++++++++++----------------------- 1 file changed, 47 insertions(+), 108 deletions(-) diff --git a/src/mijnbib/login_handlers.py b/src/mijnbib/login_handlers.py index da4f26a..c426346 100644 --- a/src/mijnbib/login_handlers.py +++ b/src/mijnbib/login_handlers.py @@ -14,7 +14,6 @@ AuthenticationError, CanNotConnectError, IncompatibleSourceError, - TemporarySiteError, ) _log = logging.getLogger(__name__) @@ -92,120 +91,60 @@ def __init__(self, *args, **kwargs): # Set some general request parameters, see https://stackoverflow.com/a/59317604/50899 self._s.request = functools.partial(self._s.request, timeout=TIMEOUT) # type: ignore self._s.headers["User-Agent"] = USER_AGENT - self._s.headers["Content-Type"] = "application/json" def login(self) -> mechanize.Browser: - self._log_in() + response = self._log_in() + html = response.text if response is not None else "" + self._validate_logged_in(html) # raises AuthenticationError if not ok self._br.set_cookiejar(self._s.cookies) # Transfer from requests to mechanize session return self._br def _log_in(self): - # Note: flow adapted from https://github.com/myTselection/bibliotheek_be - - # (1) Get OAuth2 state / nonce - # GET https://gent.bibliotheek.be/mijn-bibliotheek/aanmelden - # example response: - # header Location: https://mijn.bibliotheek.be/openbibid/rest/auth/authorize?hint=login&oauth_callback=https://gent.bibliotheek.be/my-library/login/callback&oauth_token=5abee3c0f5c04beead64d8e625ead0e7&uilang=nl - _log.debug("----") - response = self._s.get(self._url, allow_redirects=False) - _log.debug(f"login (1) status code : {response.status_code}") - _log.debug(f"login (1) headers : {response.headers}") - _log.debug(f"login (1) cookies : {response.cookies}") - oauth_location_url = response.headers.get("location", "") - oauth_locationurl_parsed = urlsplit(oauth_location_url) - query_params = parse_qs(oauth_locationurl_parsed.query) - oauth_callback_url = query_params.get("oauth_callback") - oauth_token = query_params.get("oauth_token") - hint = query_params.get("hint") - _log.debug(f"login (1) oauth_location_url: {oauth_location_url}") - _log.debug(f"login (1) oauth_callback_url: {oauth_callback_url}") - _log.debug(f"login (1) oauth_token : {oauth_token}") - _log.debug(f"login (1) hint : {hint}") - if response.status_code >= 500: # we've observed 500, 502 and 503: - raise TemporarySiteError( - f"Expected status code 302 during log in. Got '{response.status_code}'" - ) - if response.status_code != 302: - raise IncompatibleSourceError( - f"Expected status code 302 during log in. Got '{response.status_code}'", - response.text, - ) - if "/mijn-bibliotheek/overzicht" in oauth_location_url: - _log.info("Already authenticated. No need to log in again.") - return response # better for extensibility (i.e. sOlid) - - # (2) Authorize based on Location url (get session id) - _log.debug("----") - response = self._s.get(oauth_location_url, allow_redirects=False) - _log.debug(f"login (2) status code : {response.status_code}") - _log.debug(f"login (2) headers : {response.headers}") - _log.debug(f"login (2) cookies : {response.cookies}") - if response.status_code != 200: - raise IncompatibleSourceError( - f"Expected status code 200 during log in. Got '{response.status_code}'", - response.text, - ) - - # (3) Login with username, password & token - # example response: - # header Location: https://gent.bibliotheek.be/my-library/login/callback?oauth_token=*********&oauth_verifier=*********&uilang=nl - url = "https://mijn.bibliotheek.be/openbibid/rest/auth/login" + # Flow is: + # (1) GET https://bibliotheek.be/mijn-bibliotheek/aanmelden ?destination=/mijn-bibliotheek/lidmaatschappen + # then, via 302 auto-redirect + # GET https://mijn.bibliotheek.be/openbibid/rest/auth/authorize ?hint=login&oauth_callback=...&oauth_token=...&uilang=nl + # (2) POST https://mijn.bibliotheek.be/openbibid/rest/auth/login + # then, via 303 auto-redirect + # GET https://bibliotheek.be/my-library/login/callback ?oauth_token=...&oauth_verifier=...&uilang=nl + # then, via 302 auto-redirect (from destination param) + # GET https://bibliotheek.be/mijn-bibliotheek/lidmaatschappen + + _log.debug("Opening login page ... ") + response = self._s.get(self._url) + # Will perform auto-redirect to + # https://mijn.bibliotheek.be/openbibid/rest/auth/authorize? + # hint=login&oauth_callback=https%3A//bibliotheek.be/my-library/login/callback + # &oauth_token=*******&uilang=nl + auth_url = response.url + auth_url_parsed = urlsplit(auth_url) + qp = parse_qs(auth_url_parsed.query) + _log.debug(f"auth_url : {auth_url}") + _log.debug(f"query params : {qp}") + + _log.debug("Doing login call ... ") data = { - "hint": hint, - "token": oauth_token, - "callback": oauth_callback_url, + "hint": qp.get("hint"), # "login" + "token": qp.get("oauth_token"), # 32-char string + "callback": qp.get("oauth_callback"), # "https://bibliotheek.be/my-library/login/callback" # fmt:skip "email": self._username, "password": self._pwd, } - _log.debug("----") - response = self._s.post(url, data=data, allow_redirects=False) - _log.debug(f"login (3) status code : {response.status_code}") - _log.debug(f"login (3) headers : {response.headers}") - login_location_url = response.headers.get("location", "") - login_locationurl_parsed = urlsplit(login_location_url) - login_query_params = parse_qs(login_locationurl_parsed.query) - oauth_verifier = login_query_params.get("oauth_verifier") - oauth_token = query_params.get("oauth_token") - hint = query_params.get("hint") - _log.debug(f"login (3) login_location_url: {login_location_url}") - _log.debug(f"login (3) oauth_verifier : {oauth_verifier}") - _log.debug(f"login (3) oauth_token : {oauth_token}") - _log.debug(f"login (3) hint : {hint}") - if response.status_code == 200: - raise AuthenticationError("Login not accepted. Correct credentials?") - if response.status_code >= 500: # we've observed 500 - raise TemporarySiteError( - f"Expected status code 303 during log in. Got '{response.status_code}'" - ) - if response.status_code != 303: - raise IncompatibleSourceError( - f"Expected status code 303 during log in. Got '{response.status_code}'", - response.text, - ) - - # (4) Call login callback based on Location url - _log.debug("----") - response = self._s.get(login_location_url, allow_redirects=False) - _log.debug(f"login (4) status code : {response.status_code}") - _log.debug(f"login (4) headers : {response.headers}") - _log.debug(f"login (4) cookies : {response.cookies}") - # _log.debug(f"login (4) text : {response.text}") - - # Note: the above request gives a 302 redirection, but we don't follow it (too costly) - - # Soft verification if we are logged in - if ("mijn-bibliotheek/overzicht" not in response.headers.get("location", "")) and ( - "mijn-bibliotheek/lidmaatschappen" not in response.headers.get("location", "") - ): - _log.warning( - f"Not clear if properly logged in (as '{self._username}'). Was expecting " - "'mijn-bibliotheek/overzicht' or 'mijn-bibliotheek/lidmaatschappen' " - "in location header, but couldn't find it" - ) - _log.warning( - "Related troubleshooting info. " - f"Status code: '{response.status_code}'. " - f"Headers: '{response.headers}'" - ) - - return response # better for extensibility (i.e. sOlid) + url = "https://mijn.bibliotheek.be/openbibid/rest/auth/login" + response = self._s.post(url, data=data) + return response + + @staticmethod + def _validate_logged_in(html: str): + _log.debug("Checking if login is successful ...") + if "Profiel" not in html: + if ( + "privacyverklaring is gewijzigd" in html + or "akkoord met de privacyverklaring" in html + ): + raise AuthenticationError( + "Login not accepted (likely need to accept privacy statement again)" + ) + else: + raise AuthenticationError("Login not accepted") + _log.debug("Login was successful") From 95d0ec593f33c3e866324517c393d193230fc8cc Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 11:20:22 +0100 Subject: [PATCH 2/7] Handle already logged in case (oauth) better (=faster) --- src/mijnbib/login_handlers.py | 5 ++++- tests/test_mijnbibliotheek.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/mijnbib/login_handlers.py b/src/mijnbib/login_handlers.py index c426346..bd75e6a 100644 --- a/src/mijnbib/login_handlers.py +++ b/src/mijnbib/login_handlers.py @@ -87,7 +87,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._s = requests.Session() - # self._s.cookies = self._br.cookiejar # load cookies from earlier session(s) + self._s.cookies = self._br.cookiejar # load cookies from earlier session(s) # Set some general request parameters, see https://stackoverflow.com/a/59317604/50899 self._s.request = functools.partial(self._s.request, timeout=TIMEOUT) # type: ignore self._s.headers["User-Agent"] = USER_AGENT @@ -121,6 +121,9 @@ def _log_in(self): qp = parse_qs(auth_url_parsed.query) _log.debug(f"auth_url : {auth_url}") _log.debug(f"query params : {qp}") + if qp == {}: + _log.debug("Looks like we are still or already logged in. Skip auth/login call") + return response _log.debug("Doing login call ... ") data = { diff --git a/tests/test_mijnbibliotheek.py b/tests/test_mijnbibliotheek.py index 5fd0304..93a1728 100644 --- a/tests/test_mijnbibliotheek.py +++ b/tests/test_mijnbibliotheek.py @@ -1,5 +1,6 @@ import configparser import io +import logging from pathlib import Path from typing import BinaryIO @@ -135,3 +136,13 @@ def test_login_by_oauth_wrong_creds(self, creds_config): with pytest.raises(AuthenticationError, match=r".*Login not accepted.*"): mb.login() assert mb._logged_in is False + + def test_login_by_oauth_already_logged_in(self, creds_config, caplog): + d = creds_config + mb = MijnBibliotheek(d["username"], d["password"], login_by="oauth") + caplog.set_level(logging.DEBUG) + mb.login() + mb.login() # should be faster, and emit debug message + + assert "already logged in" in caplog.text # to verify we do take fast lane + assert mb._logged_in From e71458cda8f09c1eb5d96cd61ab537b36db50e41 Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 11:25:00 +0100 Subject: [PATCH 3/7] Refactor login validation --- src/mijnbib/login_handlers.py | 48 +++++++++++++---------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/src/mijnbib/login_handlers.py b/src/mijnbib/login_handlers.py index bd75e6a..891d77a 100644 --- a/src/mijnbib/login_handlers.py +++ b/src/mijnbib/login_handlers.py @@ -35,7 +35,7 @@ class LoginByForm(LoginHandler): def login(self) -> mechanize.Browser: response = self._log_in() html = response.read().decode("utf-8") if response is not None else "" - self._validate_logged_in(html) # raises AuthenticationError if not ok + _validate_logged_in(html) # raises AuthenticationError if not ok return self._br def _log_in(self): @@ -66,21 +66,6 @@ def _log_in(self): ) from e return response - @staticmethod - def _validate_logged_in(html: str): - _log.debug("Checking if login is successful ...") - if "Profiel" not in html: - if ( - "privacyverklaring is gewijzigd" in html - or "akkoord met de privacyverklaring" in html - ): - raise AuthenticationError( - "Login not accepted (likely need to accept privacy statement again)" - ) - else: - raise AuthenticationError("Login not accepted") - _log.debug("Login was successful") - class LoginByOAuth(LoginHandler): def __init__(self, *args, **kwargs): @@ -95,7 +80,7 @@ def __init__(self, *args, **kwargs): def login(self) -> mechanize.Browser: response = self._log_in() html = response.text if response is not None else "" - self._validate_logged_in(html) # raises AuthenticationError if not ok + _validate_logged_in(html) # raises AuthenticationError if not ok self._br.set_cookiejar(self._s.cookies) # Transfer from requests to mechanize session return self._br @@ -137,17 +122,18 @@ def _log_in(self): response = self._s.post(url, data=data) return response - @staticmethod - def _validate_logged_in(html: str): - _log.debug("Checking if login is successful ...") - if "Profiel" not in html: - if ( - "privacyverklaring is gewijzigd" in html - or "akkoord met de privacyverklaring" in html - ): - raise AuthenticationError( - "Login not accepted (likely need to accept privacy statement again)" - ) - else: - raise AuthenticationError("Login not accepted") - _log.debug("Login was successful") + +def _validate_logged_in(html: str) -> None: + """Raises AuthenticationError if login failed.""" + _log.debug("Checking if login is successful ...") + if "Profiel" not in html: + if ( + "privacyverklaring is gewijzigd" in html + or "akkoord met de privacyverklaring" in html + ): + raise AuthenticationError( + "Login not accepted (likely need to accept privacy statement again)" + ) + else: + raise AuthenticationError("Login not accepted") + _log.debug("Login was successful") From fa57d4dd53b0314af89a7c6beffb0252a705ae7a Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 11:29:55 +0100 Subject: [PATCH 4/7] Remove readme remark 'oauth broken" --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 7a48741..0dc7b29 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,6 @@ Tenslotte, via de commandline kan je de module ook als volgt aanroepen: mb = MijnBibliotheek(username, password, login_by="oauth") accounts = mb.get_accounts() - ! Opmerking: De oauth flow is sinds maart 2024 broken, en vereist nog een aanpassing. - - **Foutafhandeling**. Afhankelijk van de toepassing, kan het aangeraden zijn om foutafhandeling te voorzien. Het bestand `errors.py` bevat de lijst van Mijnbib-specifieke exceptions. De docstrings van de publieke methods bevatten From 8089669af03d3b8f1fcfb921fb190df5cafb0e46 Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 19:50:07 +0100 Subject: [PATCH 5/7] Add docstrings --- src/mijnbib/login_handlers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mijnbib/login_handlers.py b/src/mijnbib/login_handlers.py index 891d77a..9a21664 100644 --- a/src/mijnbib/login_handlers.py +++ b/src/mijnbib/login_handlers.py @@ -32,6 +32,8 @@ def login(self) -> mechanize.Browser: class LoginByForm(LoginHandler): + """Uses mechanize library to log in""" + def login(self) -> mechanize.Browser: response = self._log_in() html = response.read().decode("utf-8") if response is not None else "" @@ -68,6 +70,12 @@ def _log_in(self): class LoginByOAuth(LoginHandler): + """Uses requests library to log in. + + This login handler is planned to replace the LoginByForm handler completely, + to allow to get rid of the mechanize external dependency. + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From 64db59096601bb8b6b8208cfcd84ddd94f010c51 Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 19:53:18 +0100 Subject: [PATCH 6/7] Enable SIM linting --- pyproject.toml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5bc2857..26a231b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,11 @@ line-length = 95 # same as Black [tool.ruff.lint] extend-select = [ - "W", # warning - "B", # flake8-bugbear - # "SIM", # flake8-simplify, for simplified code + "W", # warning + "B", # flake8-bugbear + "SIM", # flake8-simplify, for simplified code +] +ignore = [ + "SIM105", # suppressible-exception (contextlib usage) + "SIM910", # dict-get-with-none-default ] From 2a138cbb9e2738a7173713da2701e38489374f52 Mon Sep 17 00:00:00 2001 From: Ward Van Heddeghem Date: Sat, 9 Mar 2024 19:55:22 +0100 Subject: [PATCH 7/7] Bump version to v0.5.6 --- changelog.md | 6 +++++- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/changelog.md b/changelog.md index fc72f3b..bddfa72 100644 --- a/changelog.md +++ b/changelog.md @@ -2,9 +2,13 @@ new: new feature / impr: improvement / fix: bug fix +## v0.5.6 - 2024-03-09 + +- fix: broken login (oauth) + ## v0.5.5 - 2024-03-05 -- fix: broken login (form) because of change at site. +- fix: broken login (form) because of change at site. Note: alternative oauth login still broken. - impr: raise TemporarySiteError at oauth login when 5xx (part 3) diff --git a/pyproject.toml b/pyproject.toml index 26a231b..dc37ae9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mijnbib" -version = "0.5.5" +version = "0.5.6" description = "Python API voor de website mijn.bibliotheek.be" readme = "README.md" authors = [{ name = "Ward Van Heddeghem", email = "wardvh@fastmail.fm" }]