From fedc61333989548dc879b76f1f40b1a86dcd7b7c Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Mon, 18 Mar 2024 21:18:19 +0100 Subject: [PATCH] GoogleOAuthenticator.hosted_domain: check against hd, not email domain Co-authored-by: Simon Li Co-authored-by: Min RK --- oauthenticator/google.py | 80 +++++++++++++------- oauthenticator/tests/test_google.py | 109 +++++++++++++++++----------- 2 files changed, 121 insertions(+), 68 deletions(-) diff --git a/oauthenticator/google.py b/oauthenticator/google.py index 0fad0152..6f4ea807 100644 --- a/oauthenticator/google.py +++ b/oauthenticator/google.py @@ -111,19 +111,27 @@ def _userdata_url_default(self): help=""" This config has two functions. - 1. Restrict sign-in to a list of email domain names, such as - `["mycollege.edu"]` or `["college1.edu", "college2.edu"]`. - 2. If a single domain is specified, the username will be stripped to exclude the `@domain` part. + 1. Restrict sign-in to users part of Google organizations/workspaces + managing domains, such as `["mycollege.edu"]` or `["college1.edu", + "college2.edu"]`. + 2. If a single domain is specified, usernames with that domain will be + stripped to exclude the `@domain` part. - Note that users with email domains in this list must still be allowed - via another config, such as `allow_all`, `allowed_users`, or - `allowed_google_groups`. + Users not restricted by this configuration must still be explicitly + allowed by a configuration intended to allow users, like `allow_all`, + `allowed_users`, or `allowed_google_groups`. + + .. warning:: + + Changing this config either to or from having a single entry is a + disruptive change as the same Google user will get a new username, + either without or with a domain name included. + ::: - ```{warning} Disruptive config changes - Changing this config either to or from having a single entry is a - disruptive change as the same Google user will get a new username, - either without or with a domain name included. - ``` + .. versionchanged:: 16.1 + + Now restricts sign-in based on the hd claim, not the domain in the + user's email. """, ) @@ -174,8 +182,19 @@ def user_info_to_username(self, user_info): user_email = user_info["email"] user_domain = user_info["domain"] = user_email.split("@")[1].lower() - if len(self.hosted_domain) == 1 and self.hosted_domain[0] == user_domain: - # unambiguous domain, use only base name + # NOTE: This is not an authorization check, it just about username + # derivation. Decoupling hosted_domain from this is considered in + # https://github.com/jupyterhub/oauthenticator/issues/733. + # + # NOTE: This code is written with without knowing for sure if the user + # email's domain could be different from the domain in hd, so we + # assume it could be even though it seems like it can't be. If a + # Google organization/workspace manages users in a "primary + # domain" and a "secondary domain", users with respective email + # domain have their hd field set respectively. + # + if len(self.hosted_domain) == 1 and user_domain == self.hosted_domain[0]: + # strip the domain in this situation username = username.split("@")[0] return username @@ -214,6 +233,28 @@ async def update_auth_model(self, auth_model): return auth_model + def check_blocked_users(self, username, auth_model): + """ + Overrides `Authenticator.check_blocked_users` to not only block users in + `Authenticator.blocked_users`, but to also enforce + `GoogleOAuthenticator.hosted_domain` if its configured. + + When hosted_domain is configured, users are required to be part of + listed Google organizations/workspaces. + + Returns False if the user is blocked, otherwise True. + """ + user_info = auth_model["auth_state"][self.user_auth_state_key] + + # hd ref: https://developers.google.com/identity/openid-connect/openid-connect#id_token-hd + hd = user_info.get("hd", "") + + if self.hosted_domain and hd not in self.hosted_domain: + self.log.warning(f"Blocked {username} with 'hd={hd}' not in hosted_domain") + return False + + return super().check_blocked_users(username, auth_model) + async def check_allowed(self, username, auth_model): """ Overrides the OAuthenticator.check_allowed to also allow users part of @@ -236,19 +277,6 @@ async def check_allowed(self, username, auth_model): self.log.warning(message) raise HTTPError(403, message) - # NOTE: If hosted_domain is configured as ["a.com", "b.com"], and - # allowed_google_groups is declared as {"a.com": {"a-group"}}, a - # "b.com" user won't be authorized unless allowed in another way. - # - # This means that its not possible to allow all users of a given - # domain if one wants to restrict another. - # - if self.hosted_domain: - if user_domain not in self.hosted_domain: - message = f"Login with domain @{user_domain} is not allowed" - self.log.warning(message) - raise HTTPError(403, message) - if await super().check_allowed(username, auth_model): return True diff --git a/oauthenticator/tests/test_google.py b/oauthenticator/tests/test_google.py index 28adeb0c..3aeb652d 100644 --- a/oauthenticator/tests/test_google.py +++ b/oauthenticator/tests/test_google.py @@ -5,22 +5,23 @@ from unittest import mock from pytest import fixture, mark, raises -from tornado.web import HTTPError from traitlets.config import Config from ..google import GoogleOAuthenticator from .mocks import setup_oauth_mock -def user_model(email, username="user1"): +def user_model(email, username="user1", hd=None): """Return a user model""" - return { + model = { 'sub': hashlib.md5(email.encode()).hexdigest(), 'email': email, 'custom': username, - 'hd': email.split('@')[1], 'verified_email': True, } + if hd: + model['hd'] = hd + return model @fixture @@ -187,37 +188,49 @@ async def test_google( assert auth_model == None -async def test_hosted_domain_single_entry(google_client): +@mark.parametrize( + "test_variation_id,user_email,user_hd,expect_username,expect_allowed,expect_admin", + [ + ("01", "user1@ok-hd.orG", "ok-hd.org", "user1", True, True), + ("02", "user2@ok-hd.orG", "ok-hd.org", "user2", True, None), + ("03", "blocked@ok-hd.org", "ok-hd.org", None, False, None), + ("04", "user2@ok-hd.org", "", None, False, None), + ("05", "user1@not-ok.org", "", None, False, None), + # Test variation 06 below isn't believed to be possible, but since we + # aren't sure this test clarifies what we expect to happen. + ("06", "user1@other.org", "ok-hd.org", "user1@other.org", True, None), + ], +) +async def test_hosted_domain_single_entry( + google_client, + test_variation_id, + user_email, + user_hd, + expect_username, + expect_allowed, + expect_admin, +): """ Tests that sign in is restricted to the listed domain and that the username represents the part before the `@domain.com` as expected when hosted_domain contains a single entry. """ c = Config() - c.GoogleOAuthenticator.hosted_domain = ["In-Hosted-Domain.com"] + c.GoogleOAuthenticator.hosted_domain = ["ok-hd.org"] c.GoogleOAuthenticator.admin_users = {"user1"} - c.GoogleOAuthenticator.allowed_users = {"user2"} + c.GoogleOAuthenticator.allowed_users = {"user2", "blocked", "user1@other.org"} + c.GoogleOAuthenticator.blocked_users = {"blocked"} authenticator = GoogleOAuthenticator(config=c) - handled_user_model = user_model("user1@iN-hosteD-domaiN.com") + handled_user_model = user_model(user_email, hd=user_hd) handler = google_client.handler_for_user(handled_user_model) auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user1" - assert auth_model["admin"] == True - - handled_user_model = user_model("user2@iN-hosteD-domaiN.com") - handler = google_client.handler_for_user(handled_user_model) - auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user2" - assert auth_model["admin"] == None - - handled_user_model = user_model("user1@not-in-hosted-domain.com") - handler = google_client.handler_for_user(handled_user_model) - with raises(HTTPError) as exc: - await authenticator.get_authenticated_user(handler, None) - assert exc.value.status_code == 403 + if expect_allowed: + assert auth_model + assert auth_model["name"] == expect_username + assert auth_model["admin"] == expect_admin + else: + assert auth_model == None @mark.parametrize( @@ -235,7 +248,27 @@ async def test_check_allowed_no_auth_state(google_client, name, allowed): assert await authenticator.check_allowed(name, None) -async def test_hosted_domain_multiple_entries(google_client): +@mark.parametrize( + "test_variation_id,user_email,user_hd,expect_username,expect_allowed", + [ + ("01", "user1@ok-hd1.orG", "ok-hd1.org", "user1@ok-hd1.org", True), + ("02", "user2@ok-hd2.orG", "ok-hd2.org", "user2@ok-hd2.org", True), + ("03", "blocked@ok-hd1.org", "ok-hd1.org", None, False), + ("04", "user3@ok-hd1.org", "", None, False), + ("05", "user1@not-ok.org", "", None, False), + # Test variation 06 below isn't believed to be possible, but since we + # aren't sure this test clarifies what we expect to happen. + ("06", "user1@other.org", "ok-hd1.org", "user1@other.org", True), + ], +) +async def test_hosted_domain_multiple_entries( + google_client, + test_variation_id, + user_email, + user_hd, + expect_username, + expect_allowed, +): """ Tests that sign in is restricted to the listed domains and that the username represents the full email as expected when hosted_domain contains multiple @@ -243,29 +276,21 @@ async def test_hosted_domain_multiple_entries(google_client): """ c = Config() c.GoogleOAuthenticator.hosted_domain = [ - "In-Hosted-Domain1.com", - "In-Hosted-Domain2.com", + "ok-hd1.org", + "ok-hd2.ORG", ] + c.GoogleOAuthenticator.blocked_users = ["blocked@ok-hd1.org"] c.GoogleOAuthenticator.allow_all = True authenticator = GoogleOAuthenticator(config=c) - handled_user_model = user_model("user1@iN-hosteD-domaiN1.com") + handled_user_model = user_model(user_email, hd=user_hd) handler = google_client.handler_for_user(handled_user_model) auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user1@in-hosted-domain1.com" - - handled_user_model = user_model("user2@iN-hosteD-domaiN2.com") - handler = google_client.handler_for_user(handled_user_model) - auth_model = await authenticator.get_authenticated_user(handler, None) - assert auth_model - assert auth_model["name"] == "user2@in-hosted-domain2.com" - - handled_user_model = user_model("user1@not-in-hosted-domain.com") - handler = google_client.handler_for_user(handled_user_model) - with raises(HTTPError) as exc: - await authenticator.get_authenticated_user(handler, None) - assert exc.value.status_code == 403 + if expect_allowed: + assert auth_model + assert auth_model["name"] == expect_username + else: + assert auth_model == None @mark.parametrize(