Skip to content

Commit

Permalink
✨🚨Autoscaling: Prepare Warmed EBS-backed volumes to use as buffer for…
Browse files Browse the repository at this point in the history
… machines (⚠️ Devops) 🚨 (#5923)
  • Loading branch information
sanderegg authored Jul 11, 2024
1 parent 9cee5db commit d080407
Show file tree
Hide file tree
Showing 36 changed files with 1,724 additions and 366 deletions.
1 change: 1 addition & 0 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ AUTOSCALING_EC2_INSTANCES=null
AUTOSCALING_LOGLEVEL=WARNING
AUTOSCALING_NODES_MONITORING=null
AUTOSCALING_POLL_INTERVAL=10
AUTOSCALING_SSM_ACCESS=null

CATALOG_BACKGROUND_TASK_REST_TIME=60
CATALOG_DEV_FEATURES_ENABLED=0
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ci-testing-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ on:
env:
DEFAULT_MAX_NANO_CPUS: 10000000
DEFAULT_MAX_MEMORY: 268435456
COLUMNS: 120

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down
18 changes: 18 additions & 0 deletions docs/remote-work-aws-ssm.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# How to use VSCode on a remote private EC2
[reference](https://medium.com/@dbpprt/transparently-develop-on-an-ec2-instance-with-vscode-remote-ssh-through-ssm-6e5c5e599ee1)

## to use from the terminal

```bash
host i-* mi-*
User ec2-user
ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
```

## to use from VSCode

```bash
host i-*.*.*
User ec2-user
ProxyCommand bash -c "aws ssm start-session --target $(echo %h|cut -d'.' -f1) --profile $(echo %h|/usr/bin/cut -d'.' -f2) --region $(echo %h|/usr/bin/cut -d'.' -f3) --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
```
15 changes: 15 additions & 0 deletions packages/aws-library/src/aws_library/ec2/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
ConstrainedStr,
Field,
NonNegativeFloat,
NonNegativeInt,
validator,
)
from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
Expand Down Expand Up @@ -139,6 +140,9 @@ class EC2InstanceBootSpecific(BaseModel):
description="time interval between pulls of images (minimum is 1 minute) "
"(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)",
)
buffer_count: NonNegativeInt = Field(
default=0, description="number of buffer EC2s to keep (defaults to 0)"
)

class Config:
schema_extra: ClassVar[dict[str, Any]] = {
Expand Down Expand Up @@ -184,6 +188,17 @@ class Config:
],
"pre_pull_images_cron_interval": "01:00:00",
},
{
# AMI + pre-pull + buffer count
"ami_id": "ami-123456789abcdef",
"pre_pull_images": [
"nginx:latest",
"itisfoundation/my-very-nice-service:latest",
"simcore/services/dynamic/another-nice-one:2.4.5",
"asd",
],
"buffer_count": 10,
},
]
}

Expand Down
16 changes: 12 additions & 4 deletions packages/aws-library/src/aws_library/ssm/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ async def create(cls, settings: SSMSettings) -> "SimcoreSSMAPI":
session = aioboto3.Session()
session_client = session.client(
"ssm",
endpoint_url=f"{settings.SSM_ENDPOINT}",
endpoint_url=settings.SSM_ENDPOINT,
aws_access_key_id=settings.SSM_ACCESS_KEY_ID.get_secret_value(),
aws_secret_access_key=settings.SSM_SECRET_ACCESS_KEY.get_secret_value(),
region_name=settings.SSM_REGION_NAME,
Expand Down Expand Up @@ -77,6 +77,10 @@ async def send_command(
DocumentName="AWS-RunShellScript",
Comment=command_name,
Parameters={"commands": [command]},
CloudWatchOutputConfig={
"CloudWatchOutputEnabled": True,
"CloudWatchLogGroupName": "simcore-ssm-logs",
},
)
assert response["Command"] # nosec
assert "Comment" in response["Command"] # nosec
Expand Down Expand Up @@ -120,9 +124,13 @@ async def is_instance_connected_to_ssm_server(self, instance_id: str) -> bool:
],
)
assert response["InstanceInformationList"] # nosec
assert len(response["InstanceInformationList"]) == 1 # nosec
assert "PingStatus" in response["InstanceInformationList"][0] # nosec
return bool(response["InstanceInformationList"][0]["PingStatus"] == "Online")
if response["InstanceInformationList"]:
assert len(response["InstanceInformationList"]) == 1 # nosec
assert "PingStatus" in response["InstanceInformationList"][0] # nosec
return bool(
response["InstanceInformationList"][0]["PingStatus"] == "Online"
)
return False

@log_decorator(_logger, logging.DEBUG)
@ssm_exception_handler(_logger)
Expand Down
57 changes: 57 additions & 0 deletions packages/pytest-simcore/src/pytest_simcore/aws_iam_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# pylint: disable=redefined-outer-name
# pylint: disable=unused-argument
# pylint: disable=unused-import

import contextlib
import logging
from collections.abc import AsyncIterator
from typing import cast

import aioboto3
import pytest
from aiobotocore.session import ClientCreatorContext
from faker import Faker
from settings_library.ec2 import EC2Settings
from types_aiobotocore_iam.client import IAMClient

from .helpers.logging_tools import log_context


@pytest.fixture
async def iam_client(
ec2_settings: EC2Settings,
) -> AsyncIterator[IAMClient]:
session = aioboto3.Session()
exit_stack = contextlib.AsyncExitStack()
session_client = session.client(
"iam",
endpoint_url=ec2_settings.EC2_ENDPOINT,
aws_access_key_id=ec2_settings.EC2_ACCESS_KEY_ID,
aws_secret_access_key=ec2_settings.EC2_SECRET_ACCESS_KEY,
region_name=ec2_settings.EC2_REGION_NAME,
)
assert isinstance(session_client, ClientCreatorContext)
iam_client = cast(IAMClient, await exit_stack.enter_async_context(session_client))

yield iam_client

await exit_stack.aclose()


@pytest.fixture
async def aws_instance_profile(
iam_client: IAMClient, faker: Faker
) -> AsyncIterator[str]:

profile = await iam_client.create_instance_profile(
InstanceProfileName=faker.pystr(),
)
profile_arn = profile["InstanceProfile"]["Arn"]
with log_context(
logging.INFO, msg=f"Created InstanceProfile in AWS with {profile_arn=}"
):
yield profile_arn

await iam_client.delete_instance_profile(
InstanceProfileName=profile["InstanceProfile"]["InstanceProfileName"]
)
29 changes: 17 additions & 12 deletions packages/pytest-simcore/src/pytest_simcore/aws_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
import requests
from aiohttp.test_utils import unused_port
from faker import Faker
from models_library.utils.fastapi_encoders import jsonable_encoder
from moto.server import ThreadedMotoServer
from pydantic import AnyHttpUrl, parse_obj_as
from pydantic import AnyHttpUrl, SecretStr, parse_obj_as
from pytest_mock.plugin import MockerFixture
from settings_library.basic_types import IDStr
from settings_library.ec2 import EC2Settings
from settings_library.s3 import S3Settings
from settings_library.ssm import SSMSettings
Expand Down Expand Up @@ -74,7 +76,7 @@ def mocked_ec2_server_envs(
monkeypatch: pytest.MonkeyPatch,
) -> EnvVarsDict:
changed_envs: EnvVarsDict = mocked_ec2_server_settings.dict()
return setenvs_from_dict(monkeypatch, changed_envs)
return setenvs_from_dict(monkeypatch, {**changed_envs})


@pytest.fixture
Expand All @@ -98,9 +100,12 @@ def mocked_ssm_server_settings(
reset_aws_server_state: None,
) -> SSMSettings:
return SSMSettings(
SSM_ACCESS_KEY_ID="xxx",
SSM_ENDPOINT=f"http://{mocked_aws_server._ip_address}:{mocked_aws_server._port}", # pylint: disable=protected-access # noqa: SLF001
SSM_SECRET_ACCESS_KEY="xxx", # noqa: S106
SSM_ACCESS_KEY_ID=SecretStr("xxx"),
SSM_ENDPOINT=parse_obj_as(
AnyHttpUrl,
f"http://{mocked_aws_server._ip_address}:{mocked_aws_server._port}", # pylint: disable=protected-access # noqa: SLF001
),
SSM_SECRET_ACCESS_KEY=SecretStr("xxx"),
)


Expand All @@ -109,23 +114,23 @@ def mocked_ssm_server_envs(
mocked_ssm_server_settings: SSMSettings,
monkeypatch: pytest.MonkeyPatch,
) -> EnvVarsDict:
changed_envs: EnvVarsDict = mocked_ssm_server_settings.dict()
return setenvs_from_dict(monkeypatch, changed_envs)
changed_envs: EnvVarsDict = jsonable_encoder(mocked_ssm_server_settings)
return setenvs_from_dict(monkeypatch, {**changed_envs})


@pytest.fixture
def mocked_s3_server_settings(
mocked_aws_server: ThreadedMotoServer, reset_aws_server_state: None, faker: Faker
) -> S3Settings:
return S3Settings(
S3_ACCESS_KEY="xxx",
S3_ACCESS_KEY=IDStr("xxx"),
S3_ENDPOINT=parse_obj_as(
AnyHttpUrl,
f"http://{mocked_aws_server._ip_address}:{mocked_aws_server._port}", # pylint: disable=protected-access # noqa: SLF001
),
S3_SECRET_KEY="xxx", # noqa: S106
S3_BUCKET_NAME=f"pytest{faker.pystr().lower()}",
S3_REGION="us-east-1",
S3_SECRET_KEY=IDStr("xxx"),
S3_BUCKET_NAME=IDStr(f"pytest{faker.pystr().lower()}"),
S3_REGION=IDStr("us-east-1"),
)


Expand All @@ -135,4 +140,4 @@ def mocked_s3_server_envs(
monkeypatch: pytest.MonkeyPatch,
) -> EnvVarsDict:
changed_envs: EnvVarsDict = mocked_s3_server_settings.dict(exclude_unset=True)
return setenvs_from_dict(monkeypatch, changed_envs)
return setenvs_from_dict(monkeypatch, {**changed_envs})
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# pytest_simcore.docker_compose fixture module config variables
import pytest

FIXTURE_CONFIG_CORE_SERVICES_SELECTION = "pytest_simcore_core_services_selection"
FIXTURE_CONFIG_OPS_SERVICES_SELECTION = "pytest_simcore_ops_services_selection"

# NOTE: this ensures that assertion printouts are nicely formated and complete see https://lorepirri.com/pytest-register-assert-rewrite.html
pytest.register_assert_rewrite("pytest_simcore.helpers")
26 changes: 10 additions & 16 deletions packages/pytest-simcore/src/pytest_simcore/helpers/aws_ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ async def assert_autoscaled_computational_ec2_instances(
expected_num_instances: int,
expected_instance_type: InstanceTypeType,
expected_instance_state: InstanceStateNameType,
expected_additional_tag_keys: list[str],
) -> list[InstanceTypeDef]:
return await assert_ec2_instances(
ec2_client,
Expand All @@ -24,7 +25,7 @@ async def assert_autoscaled_computational_ec2_instances(
"io.simcore.autoscaling.dask-scheduler_url",
"user_id",
"wallet_id",
"osparc-tag",
*expected_additional_tag_keys,
],
expected_user_data=["docker swarm join"],
)
Expand All @@ -37,6 +38,7 @@ async def assert_autoscaled_dynamic_ec2_instances(
expected_num_instances: int,
expected_instance_type: InstanceTypeType,
expected_instance_state: InstanceStateNameType,
expected_additional_tag_keys: list[str],
) -> list[InstanceTypeDef]:
return await assert_ec2_instances(
ec2_client,
Expand All @@ -47,9 +49,7 @@ async def assert_autoscaled_dynamic_ec2_instances(
expected_instance_tag_keys=[
"io.simcore.autoscaling.monitored_nodes_labels",
"io.simcore.autoscaling.monitored_services_labels",
"user_id",
"wallet_id",
"osparc-tag",
*expected_additional_tag_keys,
],
expected_user_data=["docker swarm join"],
)
Expand All @@ -74,7 +74,7 @@ async def assert_autoscaled_dynamic_warm_pools_ec2_instances(
expected_instance_tag_keys=[
"io.simcore.autoscaling.monitored_nodes_labels",
"io.simcore.autoscaling.monitored_services_labels",
"buffer-machine",
"io.simcore.autoscaling.buffer_machine",
*expected_additional_tag_keys,
],
expected_user_data=[],
Expand Down Expand Up @@ -106,20 +106,14 @@ async def assert_ec2_instances(
assert instance["InstanceType"] == expected_instance_type
assert "Tags" in instance
assert instance["Tags"]
expected_tag_keys = [
expected_tag_keys = {
*expected_instance_tag_keys,
"io.simcore.autoscaling.version",
"Name",
]
instance_tag_keys = [tag["Key"] for tag in instance["Tags"] if "Key" in tag]
for tag_key in instance_tag_keys:
assert (
tag_key in expected_tag_keys
), f"instance has additional unexpected {tag_key=} vs {expected_tag_keys=}"
for tag in expected_instance_tag_keys:
assert (
tag in instance_tag_keys
), f"instance missing {tag=} vs {instance_tag_keys=}"
}
instance_tag_keys = {tag["Key"] for tag in instance["Tags"] if "Key" in tag}

assert instance_tag_keys == expected_tag_keys

assert "PrivateDnsName" in instance
instance_private_dns_name = instance["PrivateDnsName"]
Expand Down
3 changes: 3 additions & 0 deletions services/autoscaling/requirements/_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,8 @@ types-aiobotocore-ec2==2.13.0
# via types-aiobotocore
types-aiobotocore-s3==2.13.0
# via types-aiobotocore
types-aiobotocore-ssm==2.13.0
# via types-aiobotocore
types-awscrt==0.20.9
# via botocore-stubs
types-python-dateutil==2.9.0.20240316
Expand All @@ -465,6 +467,7 @@ typing-extensions==4.11.0
# types-aiobotocore
# types-aiobotocore-ec2
# types-aiobotocore-s3
# types-aiobotocore-ssm
# uvicorn
urllib3==2.2.1
# via
Expand Down
2 changes: 2 additions & 0 deletions services/autoscaling/requirements/_test.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
--constraint _base.txt


types-aiobotocore[ec2,s3,ssm,iam]
asgi-lifespan
coverage
deepdiff
Expand All @@ -27,4 +28,5 @@ pytest-mock
pytest-runner
python-dotenv
pytest-icdiff
pytest-sugar
respx
Loading

0 comments on commit d080407

Please sign in to comment.