Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of a Primary/Archive setup for object storage. #13397

Merged
merged 14 commits into from
Apr 11, 2023
Merged
1 change: 1 addition & 0 deletions dev/environment
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ HIBP_API_KEY="something-not-real"
DOCS_URL="https://pythonhosted.org/{project}/"

FILES_BACKEND=warehouse.packaging.services.LocalFileStorage path=/var/opt/warehouse/packages/ url=http://localhost:9001/packages/{path}
ARCHIVE_FILES_BACKEND=warehouse.packaging.services.LocalArchiveFileStorage path=/var/opt/warehouse/packages-archive/ url=http://localhost:9001/packages-archive/{path}
SIMPLE_BACKEND=warehouse.packaging.services.LocalSimpleStorage path=/var/opt/warehouse/simple/ url=http://localhost:9001/simple/{path}
DOCS_BACKEND=warehouse.packaging.services.LocalDocsStorage path=/var/opt/warehouse/docs/
SPONSORLOGOS_BACKEND=warehouse.admin.services.LocalSponsorLogoStorage path=/var/opt/warehouse/sponsorlogos/
Expand Down
6 changes: 6 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ version: '3.9'
volumes:
simple:
packages:
packages-archive:
sponsorlogos:
policies:
vault:
Expand Down Expand Up @@ -98,6 +99,7 @@ services:
- ./pyproject.toml:/opt/warehouse/src/pyproject.toml:z
- .coveragerc:/opt/warehouse/src/.coveragerc:z
- packages:/var/opt/warehouse/packages
- packages-archive:/var/opt/warehouse/packages-archive
- sponsorlogos:/var/opt/warehouse/sponsorlogos
- policies:/var/opt/warehouse/policies
- simple:/var/opt/warehouse/simple
Expand Down Expand Up @@ -128,6 +130,7 @@ services:
command: python -m http.server 9001
volumes:
- packages:/var/opt/warehouse/packages
- packages-archive:/var/opt/warehouse/packages-archive
- sponsorlogos:/var/opt/warehouse/sponsorlogos
- simple:/var/opt/warehouse/simple
ports:
Expand All @@ -139,10 +142,13 @@ services:
command: hupper -m celery -A warehouse worker -B -S redbeat.RedBeatScheduler -l info
volumes:
- ./warehouse:/opt/warehouse/src/warehouse:z
- packages:/var/opt/warehouse/packages
- packages-archive:/var/opt/warehouse/packages-archive
env_file: dev/environment
environment:
C_FORCE_ROOT: "1"
FILES_BACKEND: "warehouse.packaging.services.LocalFileStorage path=/var/opt/warehouse/packages/ url=http://files:9001/packages/{path}"
ARCHIVE_FILES_BACKEND: "warehouse.packaging.services.LocalArchiveFileStorage path=/var/opt/warehouse/packages-archive/ url=http://files:9001/packages-archive/{path}"
miketheman marked this conversation as resolved.
Show resolved Hide resolved
SIMPLE_BACKEND: "warehouse.packaging.services.LocalSimpleStorage path=/var/opt/warehouse/simple/ url=http://files:9001/simple/{path}"

static:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ exclude = ["warehouse/locale/.*", "warehouse/migrations/versions.*"]
module = [
"automat.*",
"bpython.*", # https://github.com/bpython/bpython/issues/892
"b2sdk.*",
ewdurbin marked this conversation as resolved.
Show resolved Hide resolved
"celery.app.backends.*",
"celery.backends.redis.*",
"citext.*",
Expand Down
1 change: 1 addition & 0 deletions requirements/main.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
alembic>=0.7.0
Automat
argon2-cffi
b2sdk
ewdurbin marked this conversation as resolved.
Show resolved Hide resolved
Babel
bcrypt
boto3
Expand Down
17 changes: 17 additions & 0 deletions requirements/main.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ argon2-cffi-bindings==21.2.0 \
--hash=sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e \
--hash=sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351
# via argon2-cffi
arrow==1.2.3 \
--hash=sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1 \
--hash=sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2
# via b2sdk
asn1crypto==1.5.1 \
--hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
--hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
Expand All @@ -55,6 +59,9 @@ automat==22.10.0 \
--hash=sha256:c3164f8742b9dc440f3682482d32aaff7bb53f71740dd018533f9de286b64180 \
--hash=sha256:e56beb84edad19dcc11d30e8d9b895f75deeb5ef5e96b84a467066b3b84bb04e
# via -r requirements/main.in
b2sdk==1.20.0 \
--hash=sha256:b394d9fbdada1a4ffc0837cd6c930351f5fccc24cd0af23e41edd850d67fb687
# via -r requirements/main.in
babel==2.12.1 \
--hash=sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610 \
--hash=sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455
Expand Down Expand Up @@ -843,6 +850,10 @@ limits==3.3.1 \
--hash=sha256:df8685b1aff349b5199628ecdf41a9f339a35233d8e4fcd9c3e10002e4419b45 \
--hash=sha256:dfc59ed5b4847e33a33b88ec16033bed18ce444ce6a76287a4e054db9a683861
# via -r requirements/main.in
logfury==1.0.1 \
--hash=sha256:130a5daceab9ad534924252ddf70482aa2c96662b3a3825a7d30981d03b76a26 \
--hash=sha256:b4f04be1701a1df644afc3384d6167d64c899f8036b7eefc3b6c570c6a9b290b
# via b2sdk
lxml==4.9.2 \
--hash=sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7 \
--hash=sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726 \
Expand Down Expand Up @@ -1327,6 +1338,7 @@ python-dateutil==2.8.2 \
--hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
--hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
# via
# arrow
# botocore
# celery-redbeat
# elasticsearch-dsl
Expand Down Expand Up @@ -1358,6 +1370,7 @@ requests==2.28.2 \
--hash=sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf
# via
# -r requirements/main.in
# b2sdk
# datadog
# forcediphttpsadapter
# google-api-core
Expand Down Expand Up @@ -1475,6 +1488,10 @@ text-unidecode==1.3 \
--hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8 \
--hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93
# via python-slugify
tqdm==4.65.0 \
--hash=sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5 \
--hash=sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671
# via b2sdk
di marked this conversation as resolved.
Show resolved Hide resolved
transaction==3.1.0 \
--hash=sha256:65d0b1ea92dbe7c4e3b237fb6bd8b41dea23d7459e7bdd8c3880bffdaf912fa4 \
--hash=sha256:8376a959aa71821df1bdd7d066858a3f9f34b7f5f1c0a0e1efbd11d626895449
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def metrics_timing(*args, **kwargs):
def metrics():
return pretend.stub(
event=pretend.call_recorder(lambda *args, **kwargs: None),
gauge=pretend.call_recorder(lambda *args, **kwargs: None),
increment=pretend.call_recorder(lambda *args, **kwargs: None),
histogram=pretend.call_recorder(lambda *args, **kwargs: None),
timing=pretend.call_recorder(lambda *args, **kwargs: None),
Expand Down Expand Up @@ -173,6 +174,13 @@ def pyramid_request(pyramid_services, jinja, remote_addr):

dummy_request.registry.registerUtility(jinja, IJinja2Environment, name=".jinja2")

dummy_request._task_stub = pretend.stub(
delay=pretend.call_recorder(lambda *a, **kw: None)
)
dummy_request.task = pretend.call_recorder(
lambda *a, **kw: dummy_request._task_stub
)

def localize(message, **kwargs):
ts = TranslationString(message, **kwargs)
return ts.interpolate()
Expand Down Expand Up @@ -264,6 +272,7 @@ def app_config(database):
"ratelimit.url": "memory://",
"elasticsearch.url": "https://localhost/warehouse",
"files.backend": "warehouse.packaging.services.LocalFileStorage",
"archive_files.backend": "warehouse.packaging.services.LocalArchiveFileStorage",
"simple.backend": "warehouse.packaging.services.LocalSimpleStorage",
"docs.backend": "warehouse.packaging.services.LocalDocsStorage",
"sponsorlogos.backend": "warehouse.admin.services.LocalSponsorLogoStorage",
Expand All @@ -273,6 +282,7 @@ def app_config(database):
"warehouse.malware.services.PrinterMalwareCheckService"
),
"files.url": "http://localhost:7000/",
"archive_files.url": "http://localhost:7000/archive",
"sessions.secret": "123456",
"sessions.url": "redis://localhost:0/",
"statuspage.url": "https://2p66nmmycsj3.statuspage.io",
Expand Down
62 changes: 52 additions & 10 deletions tests/unit/forklift/test_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@
Release,
Role,
)
from warehouse.packaging.tasks import update_bigquery_release_files
from warehouse.packaging.tasks import (
sync_file_to_archive,
update_bigquery_release_files,
)
from warehouse.utils.security_policy import AuthenticationMethod

from ...common.db.accounts import EmailFactory, UserFactory
Expand Down Expand Up @@ -1426,17 +1429,12 @@ def storage_service_store(path, file_path, *, meta):
"warehouse.release_files_table": "example.pypi.distributions"
}

update_bigquery = pretend.stub(
delay=pretend.call_recorder(lambda *a, **kw: None)
)
db_request.task = pretend.call_recorder(lambda *a, **kw: update_bigquery)

resp = legacy.file_upload(db_request)

assert resp.status_code == 200
assert db_request.find_service.calls == [
pretend.call(IMetricsService, context=None),
pretend.call(IFileStorage),
pretend.call(IFileStorage, name="primary"),
]
assert len(storage_service.store.calls) == 2 if has_signature else 1
assert storage_service.store.calls[0] == pretend.call(
Expand Down Expand Up @@ -1508,7 +1506,10 @@ def storage_service_store(path, file_path, *, meta):
)
]

assert db_request.task.calls == [pretend.call(update_bigquery_release_files)]
assert db_request.task.calls == [
pretend.call(update_bigquery_release_files),
pretend.call(sync_file_to_archive),
]

assert metrics.increment.calls == [
pretend.call("warehouse.upload.attempt"),
Expand Down Expand Up @@ -2567,6 +2568,47 @@ def test_upload_fails_with_unsafe_filename(
assert resp.status_code == 400
assert resp.status == "400 Cannot upload a file with '/' or '\\' in the name."

@pytest.mark.parametrize("character", [*(chr(x) for x in range(32)), chr(127)])
def test_upload_fails_with_disallowed_in_filename(
self, pyramid_config, db_request, character
):

user = UserFactory.create()
pyramid_config.testing_securitypolicy(identity=user)
db_request.user = user
EmailFactory.create(user=user)
project = ProjectFactory.create()
release = ReleaseFactory.create(project=project, version="1.0")
RoleFactory.create(user=user, project=project)

filename = f"{project.name}{character}-{release.version}.tar.wat"

db_request.POST = MultiDict(
{
"metadata_version": "1.2",
"name": project.name,
"version": release.version,
"filetype": "sdist",
"md5_digest": "nope!",
"content": pretend.stub(
filename=filename,
file=io.BytesIO(b"a" * (legacy.MAX_FILESIZE + 1)),
type="application/tar",
),
}
)

with pytest.raises(HTTPBadRequest) as excinfo:
legacy.file_upload(db_request)

resp = excinfo.value

assert resp.status_code == 400
assert resp.status == (
"400 Cannot upload a file with non-printable characters (ordinals 0-31) "
"or the DEL character (ordinal 127) in the name."
)

def test_upload_fails_without_user_permission(self, pyramid_config, db_request):
user1 = UserFactory.create()
EmailFactory.create(user=user1)
Expand Down Expand Up @@ -2786,7 +2828,7 @@ def storage_service_store(path, file_path, *, meta):
assert resp.status_code == 200
assert db_request.find_service.calls == [
pretend.call(IMetricsService, context=None),
pretend.call(IFileStorage),
pretend.call(IFileStorage, name="primary"),
]
assert storage_service.store.calls == [
pretend.call(
Expand Down Expand Up @@ -2898,7 +2940,7 @@ def storage_service_store(path, file_path, *, meta):
assert resp.status_code == 200
assert db_request.find_service.calls == [
pretend.call(IMetricsService, context=None),
pretend.call(IFileStorage),
pretend.call(IFileStorage, name="primary"),
]
assert storage_service.store.calls == [
pretend.call(
Expand Down
9 changes: 8 additions & 1 deletion tests/unit/packaging/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from warehouse.packaging.models import File, Project, Release, Role
from warehouse.packaging.services import project_service_factory
from warehouse.packaging.tasks import ( # sync_bigquery_release_files,
check_file_archive_tasks_outstanding,
compute_2fa_mandate,
update_description_html,
)
Expand All @@ -49,6 +50,7 @@ def key_factory(keystring, iterate_on=None, if_attr_exists=None):
monkeypatch.setattr(packaging, "key_factory", key_factory)
settings = {
"files.backend": "foo.bar",
"archive_files.backend": "peas.carrots",
"simple.backend": "bread.butter",
"docs.backend": "wu.tang",
"warehouse.packaging.project_create_user_ratelimit_string": "20 per hour",
Expand All @@ -73,7 +75,8 @@ def key_factory(keystring, iterate_on=None, if_attr_exists=None):
packaging.includeme(config)

assert config.register_service_factory.calls == [
pretend.call(storage_class.create_service, IFileStorage),
pretend.call(storage_class.create_service, IFileStorage, name="primary"),
pretend.call(storage_class.create_service, IFileStorage, name="archive"),
pretend.call(storage_class.create_service, ISimpleStorage),
pretend.call(storage_class.create_service, IDocsStorage),
pretend.call(
Expand Down Expand Up @@ -169,6 +172,10 @@ def key_factory(keystring, iterate_on=None, if_attr_exists=None):
in config.add_periodic_task.calls
)

assert (
pretend.call(crontab(minute="*/1"), check_file_archive_tasks_outstanding)
in config.add_periodic_task.calls
)
assert (
pretend.call(crontab(minute="*/5"), update_description_html)
in config.add_periodic_task.calls
Expand Down
Loading