From 4ac3e46e1565355a464417d443077ed4cb933233 Mon Sep 17 00:00:00 2001 From: LilDojd Date: Wed, 18 Sep 2024 16:13:46 +0400 Subject: [PATCH 01/12] refactor(auth): replace passlib CryptContext with bcrypt-based handler --- alchemiscale/security/auth.py | 62 ++++++++++++++++++++++-- alchemiscale/tests/unit/test_security.py | 7 +++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index f4782ea1..a0a8403b 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -4,21 +4,73 @@ """ -from datetime import datetime, timedelta -from typing import Union, Optional import secrets +from datetime import datetime, timedelta +from typing import Optional, Union +import bcrypt from fastapi import HTTPException, status from fastapi.security import OAuth2PasswordBearer from jose import JWTError, jwt -from passlib.context import CryptContext from pydantic import BaseModel -from .models import Token, TokenData, CredentialedEntity +from .models import CredentialedEntity, Token, TokenData + +MAX_PASSWORD_SIZE = 4096 +_dummy_secret = "dummy" + + +class BcryptPasswordHandler(object): + rounds: int = 12 + ident: str = "$2b$" + salt: str = "" + checksum: str = "" + + def __init__(self, rounds: int = 12, ident: str = "$2b$"): + self.rounds = rounds + self.ident = ident + + def _get_config(self) -> bytes: + config = bcrypt.gensalt( + self.rounds, prefix=self.ident.strip("$").encode("ascii") + ) + self.salt = config.decode("ascii")[len(self.ident) + 3 :] + return config + + def to_string(self) -> str: + return "%s%02d$%s%s" % (self.ident, self.rounds, self.salt, self.checksum) + + def hash(self, key: str) -> str: + validate_secret(key) + config = self._get_config() + hash_ = bcrypt.hashpw(key.encode("utf-8"), config) + if not hash_.startswith(config) or len(hash_) != len(config) + 31: + raise ValueError("bcrypt.hashpw returned an invalid hash") + self.checksum = hash_[-31:].decode("ascii") + return self.to_string() + + def verify(self, key: str, hash: str) -> bool: + validate_secret(key) + + if hash is None: + self.hash(_dummy_secret) + return False + + return bcrypt.checkpw(key.encode("utf-8"), hash.encode("utf-8")) -pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") +pwd_context = BcryptPasswordHandler() + + +def validate_secret(secret): + """ensure secret has correct type & size""" + if not isinstance(secret, (str, bytes)): + raise TypeError("secret must be a string or bytes") + if len(secret) > MAX_PASSWORD_SIZE: + raise ValueError( + f"secret is too long, maximum length is {MAX_PASSWORD_SIZE} characters" + ) def generate_secret_key(): diff --git a/alchemiscale/tests/unit/test_security.py b/alchemiscale/tests/unit/test_security.py index fca015e3..0f120414 100644 --- a/alchemiscale/tests/unit/test_security.py +++ b/alchemiscale/tests/unit/test_security.py @@ -30,3 +30,10 @@ def test_token_data(secret_key): token_data = auth.get_token_data(token=token, secret_key=secret_key) assert token_data.scopes == ["*-*-*"] + + +def test_bcrypt_password_handler(): + handler = auth.BcryptPasswordHandler() + hash_ = handler.hash("test") + assert handler.verify("test", hash_) + assert not handler.verify("deadbeef", hash_) From 71e0efb19c31e5e4337294f2aaa46525623a5017 Mon Sep 17 00:00:00 2001 From: LilDojd Date: Wed, 18 Sep 2024 16:15:32 +0400 Subject: [PATCH 02/12] :recycle:(deps): remove passlib dependency fixes OpenFreeEnergy/alchemiscale/#304 --- devtools/conda-envs/alchemiscale-server.yml | 1 - devtools/conda-envs/test.yml | 3 +-- docs/conf.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/devtools/conda-envs/alchemiscale-server.yml b/devtools/conda-envs/alchemiscale-server.yml index a175762f..2b764e8c 100644 --- a/devtools/conda-envs/alchemiscale-server.yml +++ b/devtools/conda-envs/alchemiscale-server.yml @@ -29,7 +29,6 @@ dependencies: - uvicorn - gunicorn - python-jose - - passlib - bcrypt - python-multipart - starlette diff --git a/devtools/conda-envs/test.yml b/devtools/conda-envs/test.yml index a23320c2..144051cb 100644 --- a/devtools/conda-envs/test.yml +++ b/devtools/conda-envs/test.yml @@ -17,7 +17,7 @@ dependencies: - monotonic - docker-py # for grolt - # user client printing + # user client printing - rich ## object store @@ -28,7 +28,6 @@ dependencies: - uvicorn - gunicorn - python-jose - - passlib - bcrypt - python-multipart - starlette diff --git a/docs/conf.py b/docs/conf.py index 895c4f15..fdf43e2b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,7 +43,6 @@ "jose", "networkx", "numpy", - "passlib", "py2neo", "pydantic", "starlette", From 37095e677d7ed117c0b7d44c51650b9b5989f2ff Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 22:32:43 -0700 Subject: [PATCH 03/12] Simplified bcrypt-based password handling --- alchemiscale/security/auth.py | 48 +++++++++++++++-------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index a0a8403b..cf442bb7 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -5,6 +5,8 @@ """ import secrets +import base64 +import hashlib from datetime import datetime, timedelta from typing import Optional, Union @@ -17,46 +19,36 @@ from .models import CredentialedEntity, Token, TokenData MAX_PASSWORD_SIZE = 4096 -_dummy_secret = "dummy" class BcryptPasswordHandler(object): - rounds: int = 12 - ident: str = "$2b$" - salt: str = "" - checksum: str = "" - def __init__(self, rounds: int = 12, ident: str = "$2b$"): + def __init__(self, rounds: int = 12, ident: str = "2b"): self.rounds = rounds self.ident = ident - def _get_config(self) -> bytes: - config = bcrypt.gensalt( - self.rounds, prefix=self.ident.strip("$").encode("ascii") - ) - self.salt = config.decode("ascii")[len(self.ident) + 3 :] - return config - - def to_string(self) -> str: - return "%s%02d$%s%s" % (self.ident, self.rounds, self.salt, self.checksum) - def hash(self, key: str) -> str: validate_secret(key) - config = self._get_config() - hash_ = bcrypt.hashpw(key.encode("utf-8"), config) - if not hash_.startswith(config) or len(hash_) != len(config) + 31: - raise ValueError("bcrypt.hashpw returned an invalid hash") - self.checksum = hash_[-31:].decode("ascii") - return self.to_string() - - def verify(self, key: str, hash: str) -> bool: + + # generate a salt unique to this key + salt = bcrypt.gensalt(rounds=self.rounds, prefix=self.ident.encode("ascii")) + + # bcrypt can handle up to 72 characters + # to go beyond this, we first perform sha256 hashing, + # then base64 encode to avoid NULL byte problems + # details: https://github.com/pyca/bcrypt/?tab=readme-ov-file#maximum-password-length + hashed = base64.b64encode(hashlib.sha256(key.encode("utf-8")).digest()) + hashed_salted = bcrypt.hashpw(hashed, salt) + + return hashed_salted + + def verify(self, key: str, hashed_salted: str) -> bool: validate_secret(key) - if hash is None: - self.hash(_dummy_secret) - return False + # see note above on why we perform sha256 hashing first + key_hashed = base64.b64encode(hashlib.sha256(key.encode("utf-8")).digest()) - return bcrypt.checkpw(key.encode("utf-8"), hash.encode("utf-8")) + return bcrypt.checkpw(key_hashed, hashed_salted.encode("utf-8")) oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") From d86eac5784bb7246fbedd934a64af2827dc1e6bb Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 22:41:03 -0700 Subject: [PATCH 04/12] Added note on max password size --- alchemiscale/security/auth.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index cf442bb7..c0b94ade 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -18,7 +18,13 @@ from .models import CredentialedEntity, Token, TokenData -MAX_PASSWORD_SIZE = 4096 + +# we set a max size to avoid denial-of-service attacks +# since an extremely large secret attempted by an attacker can take +# increasing amounts of time or memory to validate; +# this is deliberately higher than any reasonable key length +# this is the same max size that `passlib` defaults to +MAX_SECRET_SIZE = 4096 class BcryptPasswordHandler(object): @@ -59,9 +65,9 @@ def validate_secret(secret): """ensure secret has correct type & size""" if not isinstance(secret, (str, bytes)): raise TypeError("secret must be a string or bytes") - if len(secret) > MAX_PASSWORD_SIZE: + if len(secret) > MAX_SECRET_SIZE: raise ValueError( - f"secret is too long, maximum length is {MAX_PASSWORD_SIZE} characters" + f"secret is too long, maximum length is {MAX_SECRET_SIZE} characters" ) From 994fdafbe63ba88edbd848b361c6318ae982c89e Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 23:02:19 -0700 Subject: [PATCH 05/12] Make sure we return a string from BcryptPasswordHandler.hash --- alchemiscale/security/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index c0b94ade..fdfc7872 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -46,7 +46,7 @@ def hash(self, key: str) -> str: hashed = base64.b64encode(hashlib.sha256(key.encode("utf-8")).digest()) hashed_salted = bcrypt.hashpw(hashed, salt) - return hashed_salted + return hashed_salted.decode('utf-8') def verify(self, key: str, hashed_salted: str) -> bool: validate_secret(key) From 82d8c4bd3c905f19d86c4c5b220525a8cfed92dc Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 23:06:21 -0700 Subject: [PATCH 06/12] Black! --- alchemiscale/security/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index fdfc7872..5e1f8c3b 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -46,7 +46,7 @@ def hash(self, key: str) -> str: hashed = base64.b64encode(hashlib.sha256(key.encode("utf-8")).digest()) hashed_salted = bcrypt.hashpw(hashed, salt) - return hashed_salted.decode('utf-8') + return hashed_salted.decode("utf-8") def verify(self, key: str, hashed_salted: str) -> bool: validate_secret(key) From e2bd4fc86d1ee5644dd977766fae7722cc943e7b Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 23:29:51 -0700 Subject: [PATCH 07/12] Only apply sha256 to passwords longer than 64 characters This reproduces passlib behavior. --- alchemiscale/security/auth.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index 5e1f8c3b..81f8b226 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -43,8 +43,14 @@ def hash(self, key: str) -> str: # to go beyond this, we first perform sha256 hashing, # then base64 encode to avoid NULL byte problems # details: https://github.com/pyca/bcrypt/?tab=readme-ov-file#maximum-password-length - hashed = base64.b64encode(hashlib.sha256(key.encode("utf-8")).digest()) - hashed_salted = bcrypt.hashpw(hashed, salt) + + # to reproduce `passlib` behavior, we only perform sha256 hashing if + # key is longer than the sha256 default block size of 64 + key_ = key.encode("utf-8") + if len(key_) > 64: + key_ = base64.b64encode(hashlib.sha256(key_).digest()) + + hashed_salted = bcrypt.hashpw(key_, salt) return hashed_salted.decode("utf-8") @@ -52,9 +58,13 @@ def verify(self, key: str, hashed_salted: str) -> bool: validate_secret(key) # see note above on why we perform sha256 hashing first - key_hashed = base64.b64encode(hashlib.sha256(key.encode("utf-8")).digest()) + # to reproduce `passlib` behavior, we only perform sha256 hashing if + # key is longer than the sha256 default block size of 64 + key_ = key.encode("utf-8") + if len(key_) > 64: + key_ = base64.b64encode(hashlib.sha256(key_).digest()) - return bcrypt.checkpw(key_hashed, hashed_salted.encode("utf-8")) + return bcrypt.checkpw(key_, hashed_salted.encode("utf-8")) oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") From 28791da7f89b4767f20fde4ce9bf9e5a859d8866 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 23:50:18 -0700 Subject: [PATCH 08/12] Realized on closer look that passlib *doesn't* do sha256 under our usage Instead, truncation is what will happen for passwords beyond 72 characters. This is current behavior. --- alchemiscale/security/auth.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index 81f8b226..67a16e07 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -38,33 +38,14 @@ def hash(self, key: str) -> str: # generate a salt unique to this key salt = bcrypt.gensalt(rounds=self.rounds, prefix=self.ident.encode("ascii")) - - # bcrypt can handle up to 72 characters - # to go beyond this, we first perform sha256 hashing, - # then base64 encode to avoid NULL byte problems - # details: https://github.com/pyca/bcrypt/?tab=readme-ov-file#maximum-password-length - - # to reproduce `passlib` behavior, we only perform sha256 hashing if - # key is longer than the sha256 default block size of 64 - key_ = key.encode("utf-8") - if len(key_) > 64: - key_ = base64.b64encode(hashlib.sha256(key_).digest()) - - hashed_salted = bcrypt.hashpw(key_, salt) + hashed_salted = bcrypt.hashpw(key.encode("utf-8"), salt) return hashed_salted.decode("utf-8") def verify(self, key: str, hashed_salted: str) -> bool: validate_secret(key) - # see note above on why we perform sha256 hashing first - # to reproduce `passlib` behavior, we only perform sha256 hashing if - # key is longer than the sha256 default block size of 64 - key_ = key.encode("utf-8") - if len(key_) > 64: - key_ = base64.b64encode(hashlib.sha256(key_).digest()) - - return bcrypt.checkpw(key_, hashed_salted.encode("utf-8")) + return bcrypt.checkpw(key.encode("utf-8"), hashed_salted.encode("utf-8")) oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") From 0056efa8e730f933a73d6ba0bc0561063a67113b Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 18 Nov 2024 23:51:34 -0700 Subject: [PATCH 09/12] Set max secret size to reflect our use of bcrypt directly, 72 character limit --- alchemiscale/security/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index 67a16e07..cad8db4f 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -24,7 +24,7 @@ # increasing amounts of time or memory to validate; # this is deliberately higher than any reasonable key length # this is the same max size that `passlib` defaults to -MAX_SECRET_SIZE = 4096 +MAX_SECRET_SIZE = 72 class BcryptPasswordHandler(object): From 718a44d4b81d2baf69051ddc0702d4f925a6a024 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 19 Nov 2024 00:01:14 -0700 Subject: [PATCH 10/12] Put max password size back to avoid surprises for users; added explicit test comparing to passlib behavior If it was truncating passwords before, leaving the same limit in place does the least harm. --- alchemiscale/security/auth.py | 2 +- alchemiscale/tests/unit/test_security.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index cad8db4f..67a16e07 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -24,7 +24,7 @@ # increasing amounts of time or memory to validate; # this is deliberately higher than any reasonable key length # this is the same max size that `passlib` defaults to -MAX_SECRET_SIZE = 72 +MAX_SECRET_SIZE = 4096 class BcryptPasswordHandler(object): diff --git a/alchemiscale/tests/unit/test_security.py b/alchemiscale/tests/unit/test_security.py index 0f120414..11559ed8 100644 --- a/alchemiscale/tests/unit/test_security.py +++ b/alchemiscale/tests/unit/test_security.py @@ -37,3 +37,16 @@ def test_bcrypt_password_handler(): hash_ = handler.hash("test") assert handler.verify("test", hash_) assert not handler.verify("deadbeef", hash_) + + +def test_bcrypt_against_passlib(): + """Test the our bcrypt handler has the same behavior as passlib did""" + + # pre-generated hash from + # `passlib.context.CryptContext(schemes=["bcrypt"], deprecated="auto").hash()` + test_password = "the quick brown fox jumps over the lazy dog" + test_hash = "$2b$12$QZTnjdx/sJS7jnEnCqhM3uS8mZ4mhLV5dDfM7ZBUT6LwDiNZ2p7S." + + # test that we get the same thing back from our bcrypt handler + handler = auth.BcryptPasswordHandler() + assert handler.verify(test_password, test_hash) From 1c106acf1034bdc08e9ec68cbf11aa0a3b92ad03 Mon Sep 17 00:00:00 2001 From: LilDojd Date: Tue, 19 Nov 2024 15:40:16 +0400 Subject: [PATCH 11/12] Forbid NULL byte in secret --- alchemiscale/security/auth.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/alchemiscale/security/auth.py b/alchemiscale/security/auth.py index 67a16e07..78c4ac0e 100644 --- a/alchemiscale/security/auth.py +++ b/alchemiscale/security/auth.py @@ -25,6 +25,10 @@ # this is deliberately higher than any reasonable key length # this is the same max size that `passlib` defaults to MAX_SECRET_SIZE = 4096 +# Bcrypt truncates the secret at first NULL it encounters. For this reason, +# passlib forbids NULL bytes in the secret. This is not necessary for backwards +# compatibility, but we follow passlib's lead. +_BNULL = b"\x00" class BcryptPasswordHandler(object): @@ -52,14 +56,16 @@ def verify(self, key: str, hashed_salted: str) -> bool: pwd_context = BcryptPasswordHandler() -def validate_secret(secret): +def validate_secret(secret: str): """ensure secret has correct type & size""" - if not isinstance(secret, (str, bytes)): - raise TypeError("secret must be a string or bytes") + if not isinstance(secret, str): + raise TypeError("secret must be a string") if len(secret) > MAX_SECRET_SIZE: raise ValueError( f"secret is too long, maximum length is {MAX_SECRET_SIZE} characters" ) + if _BNULL in secret.encode("utf-8"): + raise ValueError("secret contains NULL byte") def generate_secret_key(): From 45c84181713c4b5d608559f990ba7aaa1b7664c5 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 19 Nov 2024 19:57:48 -0700 Subject: [PATCH 12/12] Added long test for bcrypt --- alchemiscale/tests/unit/test_security.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/alchemiscale/tests/unit/test_security.py b/alchemiscale/tests/unit/test_security.py index 11559ed8..8e7050ca 100644 --- a/alchemiscale/tests/unit/test_security.py +++ b/alchemiscale/tests/unit/test_security.py @@ -50,3 +50,16 @@ def test_bcrypt_against_passlib(): # test that we get the same thing back from our bcrypt handler handler = auth.BcryptPasswordHandler() assert handler.verify(test_password, test_hash) + + +def test_bcrypt_against_passlib_long(): + """Test the our bcrypt handler has the same behavior as passlib did for passwords longer than 72 characters, in which bcrypt truncates""" + + # pre-generated hash from + # `passlib.context.CryptContext(schemes=["bcrypt"], deprecated="auto").hash()` + test_password = "this password is so long, it's longer than 72 characters; this should get truncated by bcrypt, so we can ensure we get the same verification behavior as passlib gives" + test_hash = "$2b$12$DQd5IPjlc8z4FZjBIdaquOlVc9whAqnkpZRsnuUUWvfHvwWy.dZ16" + + # test that we get the same thing back from our bcrypt handler + handler = auth.BcryptPasswordHandler() + assert handler.verify(test_password, test_hash)