diff --git a/ci/buildspec.yml b/ci/buildspec.yml index 591d0b9..2781283 100644 --- a/ci/buildspec.yml +++ b/ci/buildspec.yml @@ -2,7 +2,7 @@ version: 0.2 env: variables: - FRAMEWORK_VERSION: "0.23-1" + FRAMEWORK_VERSION: "0.24-1" phases: install: diff --git a/docker/0.24-1/base/Dockerfile.cpu b/docker/0.24-1/base/Dockerfile.cpu new file mode 100644 index 0000000..fb9b84e --- /dev/null +++ b/docker/0.24-1/base/Dockerfile.cpu @@ -0,0 +1,106 @@ +ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_IMAGE_DIGEST=646942475da61b4ce9cc5b3fadb42642ea90e5d0de46111458e100ff2c7031e6 + +FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST} + +ARG MINICONDA_VERSION=4.8.3 +ARG CONDA_PY_VERSION=37 +ARG CONDA_PKG_VERSION=4.9.0 +ARG PYTHON_VERSION=3.7.10 +ARG PYARROW_VERSION=0.16.0 +ARG MLIO_VERSION=0.6.0 + +# Install python and other scikit-learn runtime dependencies +# Dependency list from http://scikit-learn.org/stable/developers/advanced_installation.html#installing-build-dependencies +RUN apt-get update && \ + apt-get -y install --no-install-recommends \ + build-essential \ + curl \ + git \ + jq \ + libatlas-base-dev \ + nginx \ + openjdk-8-jdk-headless \ + unzip \ + wget \ + && \ + # MLIO build dependencies + # Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO. + # Kitware contains the latest version of CMake. + apt-get -y install --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + software-properties-common \ + && \ + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ + gpg --dearmor - | \ + tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \ + apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + autoconf \ + automake \ + build-essential \ + cmake=3.18.4-0kitware1 \ + cmake-data=3.18.4-0kitware1 \ + doxygen \ + kitware-archive-keyring \ + libcurl4-openssl-dev \ + libssl-dev \ + libtool \ + ninja-build \ + python3-dev \ + python3-distutils \ + python3-pip \ + zlib1g-dev \ + && \ + rm /etc/apt/trusted.gpg.d/kitware.gpg && \ + rm -rf /var/lib/apt/lists/* + +RUN cd /tmp && \ + curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ + bash /tmp/Miniconda3.sh -bfp /miniconda3 && \ + rm /tmp/Miniconda3.sh + +ENV PATH=/miniconda3/bin:${PATH} + +# Install MLIO with Apache Arrow integration +# We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size +# which increases training time. We build from source to minimize the image size. +RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ + # Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html + conda config --system --set auto_update_conda false && \ + conda config --system --set show_channel_urls true && \ + echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ + conda install -c conda-forge python=${PYTHON_VERSION} && \ + conda install conda=${CONDA_PKG_VERSION} && \ + conda update -y conda && \ + conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \ + cd /tmp && \ + git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ + cd mlio && \ + build-tools/build-dependency build/third-party all && \ + mkdir -p build/release && \ + cd build/release && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. && \ + cmake --build . && \ + cmake --build . --target install && \ + cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DMLIO_INCLUDE_ARROW_INTEGRATION=ON ../.. && \ + cmake --build . --target mlio-py && \ + cmake --build . --target mlio-arrow && \ + cd ../../src/mlio-py && \ + python3 setup.py bdist_wheel && \ + python3 -m pip install dist/*.whl && \ + cp -r /tmp/mlio/build/third-party/lib/intel64/gcc4.7/* /usr/local/lib/ && \ + ldconfig && \ + rm -rf /tmp/mlio + +# Python won’t try to write .pyc or .pyo files on the import of source modules +# Force stdin, stdout and stderr to be totally unbuffered. Good for logging +ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 + +# Install Scikit-Learn +# Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4. +# Scikit-learn now requires Python 3.6 or newer. +RUN python -m pip install --no-cache -I scikit-learn==0.24.1 diff --git a/docker/0.24-1/extension/Dockerfile.cpu b/docker/0.24-1/extension/Dockerfile.cpu new file mode 100644 index 0000000..75e94e9 --- /dev/null +++ b/docker/0.24-1/extension/Dockerfile.cpu @@ -0,0 +1,9 @@ +FROM preprod-sklearn:0.24-1-cpu-py3 + +RUN pip freeze | grep -q 'scikit-learn==0.24.1'; \ + if [ $? -eq 0 ]; \ + then echo 'scikit-learn version 0.24.1 requirement met'; \ + else echo 'ERROR: Expected scikit-learn version is 0.24.1, check base images for scikit-learn version' && \ + exit 1; fi + +RUN pip install --upgrade --no-cache --no-deps sagemaker-scikit-learn-extension==2.1.0 diff --git a/docker/0.24-1/extension/README.md b/docker/0.24-1/extension/README.md new file mode 100644 index 0000000..472e616 --- /dev/null +++ b/docker/0.24-1/extension/README.md @@ -0,0 +1,56 @@ +# SageMaker Scikit-learn Extension Container + +The SageMaker Scikit-learn Extension Container is used in SageMaker Autopilot. + +The SageMaker Scikit-learn Extension Container is built in 3 steps. The first 2 steps should be the same as building the [sagemaker-scikit-learn-container](https://github.com/aws/sagemaker-scikit-learn-container) image. + +### Step 1: Base Image + +The "base" Dockerfile encompass the installation of the framework and all of the dependencies needed. + +Tagging scheme is based on --cpu-py. (e.g. 0.24-1-cpu-py3) + +All "final" Dockerfiles build images using base images that use the tagging scheme above. + +``` +docker build -t sklearn-base:0.24-1-cpu-py3 -f docker/0.24-1/base/Dockerfile.cpu . +``` + +Notice that this Dockerfile has the updated version of sklearn (0.24.1) installed. + +### Step 2: Final Image + +The "final" Dockerfiles encompass the installation of the SageMaker specific support code. + +All "final" Dockerfiles use base images for building. + +These "base" images are specified with the naming convention of sklearn-base:--cpu-py. + +Before building "final" images: + +Build your "base" image. Make sure it is named and tagged in accordance with your "final" Dockerfile. + +``` +# Create the SageMaker Scikit-learn Container Python package. +python setup.py bdist_wheel +``` + +Then build the final image, like in the sagemaker-sklearn-container + +``` +docker build -t preprod-sklearn:0.24-1-cpu-py3 -f docker/0.24-1/final/Dockerfile.cpu . +``` + +### Step 3: Build the extension image for SageMaker Scikit-learn Extension Container + +The "extension" Dockerfiles encompass the installation of the SageMaker Autopilot specific support code. + +The "extension" Dockerfiles use final images for building. + +Build the third additional Dockerfile needed for SageMaker Scikit-learn Extension Container. This Dockerfile specifies a hard dependecy on a certain version of scikit-learn (i.e. v0.24.1). + +Tagging scheme is based on extension---cpu-py. (e.g. extension-0.2-1-cpu-py3). Make sure the "extension" image is tagged in accordance with the `extension` (i.e. `extension-0.2-1-cpu-py3`). + +``` +docker build -t preprod-sklearn-extension:0.2-1-cpu-py3 -f docker/0.24-1/extension/Dockerfile.cpu . +``` \ No newline at end of file diff --git a/docker/0.24-1/final/Dockerfile.cpu b/docker/0.24-1/final/Dockerfile.cpu new file mode 100644 index 0000000..c43d433 --- /dev/null +++ b/docker/0.24-1/final/Dockerfile.cpu @@ -0,0 +1,57 @@ +FROM sklearn-base:0.24-1-cpu-py3 +ENV SAGEMAKER_SKLEARN_VERSION 0.24-1 + +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +COPY requirements.txt /requirements.txt +RUN python -m pip install -r /requirements.txt && \ + rm /requirements.txt + +COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl +# https://github.com/googleapis/google-cloud-python/issues/6647 +RUN rm -rf /miniconda3/lib/python3.7/site-packages/numpy-1.19.4.dist-info && \ + pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ + rm /sagemaker_sklearn_container-2.0-py3-none-any.whl + +ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main +ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main + +####### +# MMS # +####### +# Create MMS user directory +RUN useradd -m model-server +RUN mkdir -p /home/model-server/tmp +RUN chown -R model-server /home/model-server + +# Copy MMS configs +COPY docker/$SAGEMAKER_SKLEARN_VERSION/resources/mms/config.properties.tmp /home/model-server +ENV SKLEARN_MMS_CONFIG=/home/model-server/config.properties + +# Copy execution parameters endpoint plugin for MMS +RUN mkdir -p /tmp/plugins +COPY docker/$SAGEMAKER_SKLEARN_VERSION/resources/mms/endpoints-1.0.jar /tmp/plugins +RUN chmod +x /tmp/plugins/endpoints-1.0.jar + +# Create directory for models +RUN mkdir -p /opt/ml/models +RUN chmod +rwx /opt/ml/models + +##################### +# Required ENV vars # +##################### +# Set SageMaker training environment variables +ENV SM_INPUT /opt/ml/input +ENV SM_INPUT_TRAINING_CONFIG_FILE $SM_INPUT/config/hyperparameters.json +ENV SM_INPUT_DATA_CONFIG_FILE $SM_INPUT/config/inputdataconfig.json +ENV SM_CHECKPOINT_CONFIG_FILE $SM_INPUT/config/checkpointconfig.json + +# Set SageMaker serving environment variables +ENV SM_MODEL_DIR /opt/ml/model + +EXPOSE 8080 +ENV TEMP=/home/model-server/tmp + +# Required label for multi-model loading +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + diff --git a/docker/0.24-1/resources/mms/ExecutionParameters.java b/docker/0.24-1/resources/mms/ExecutionParameters.java new file mode 100644 index 0000000..65134a8 --- /dev/null +++ b/docker/0.24-1/resources/mms/ExecutionParameters.java @@ -0,0 +1,98 @@ +package software.amazon.ai.mms.plugins.endpoint; + +import com.google.gson.GsonBuilder; +import com.google.gson.annotations.SerializedName; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Properties; +import software.amazon.ai.mms.servingsdk.Context; +import software.amazon.ai.mms.servingsdk.ModelServerEndpoint; +import software.amazon.ai.mms.servingsdk.annotations.Endpoint; +import software.amazon.ai.mms.servingsdk.annotations.helpers.EndpointTypes; +import software.amazon.ai.mms.servingsdk.http.Request; +import software.amazon.ai.mms.servingsdk.http.Response; + +/** +The modified endpoint source code for the jar used in this container. +You can create this endpoint by moving it by cloning the MMS repo: +> git clone https://github.com/awslabs/mxnet-model-server.git + +Copy this file into plugins/endpoints/src/main/java/software/amazon/ai/mms/plugins/endpoints/ +and then from the plugins directory, run: + +> ./gradlew fJ + +Modify file in plugins/endpoint/resources/META-INF/services/* to specify this file location + +Then build the JAR: + +> ./gradlew build + +The jar should be available in plugins/endpoints/build/libs as endpoints-1.0.jar +**/ +@Endpoint( + urlPattern = "execution-parameters", + endpointType = EndpointTypes.INFERENCE, + description = "Execution parameters endpoint") +public class ExecutionParameters extends ModelServerEndpoint { + + @Override + public void doGet(Request req, Response rsp, Context ctx) throws IOException { + Properties prop = ctx.getConfig(); + // 6 * 1024 * 1024 + int maxRequestSize = Integer.parseInt(prop.getProperty("max_request_size", "6291456")); + SagemakerXgboostResponse response = new SagemakerXgboostResponse(); + response.setMaxConcurrentTransforms(Integer.parseInt(prop.getProperty("NUM_WORKERS", "1"))); + response.setBatchStrategy("MULTI_RECORD"); + response.setMaxPayloadInMB(maxRequestSize / (1024 * 1024)); + rsp.getOutputStream() + .write( + new GsonBuilder() + .setPrettyPrinting() + .create() + .toJson(response) + .getBytes(StandardCharsets.UTF_8)); + } + + /** Response for Model server endpoint */ + public static class SagemakerXgboostResponse { + @SerializedName("MaxConcurrentTransforms") + private int maxConcurrentTransforms; + + @SerializedName("BatchStrategy") + private String batchStrategy; + + @SerializedName("MaxPayloadInMB") + private int maxPayloadInMB; + + public SagemakerXgboostResponse() { + maxConcurrentTransforms = 4; + batchStrategy = "MULTI_RECORD"; + maxPayloadInMB = 6; + } + + public int getMaxConcurrentTransforms() { + return maxConcurrentTransforms; + } + + public String getBatchStrategy() { + return batchStrategy; + } + + public int getMaxPayloadInMB() { + return maxPayloadInMB; + } + + public void setMaxConcurrentTransforms(int newMaxConcurrentTransforms) { + maxConcurrentTransforms = newMaxConcurrentTransforms; + } + + public void setBatchStrategy(String newBatchStrategy) { + batchStrategy = newBatchStrategy; + } + + public void setMaxPayloadInMB(int newMaxPayloadInMB) { + maxPayloadInMB = newMaxPayloadInMB; + } + } +} diff --git a/docker/0.24-1/resources/mms/config.properties.tmp b/docker/0.24-1/resources/mms/config.properties.tmp new file mode 100644 index 0000000..6bb4569 --- /dev/null +++ b/docker/0.24-1/resources/mms/config.properties.tmp @@ -0,0 +1,12 @@ +model_store=$$SAGEMAKER_MMS_MODEL_STORE$$ +load_models=$$SAGEMAKER_MMS_LOAD_MODELS$$ +plugins_path=/tmp/plugins +inference_address=http://0.0.0.0:$$SAGEMAKER_BIND_TO_PORT$$ +management_address=http://0.0.0.0:$$SAGEMAKER_BIND_TO_PORT$$ +default_workers_per_model=$$SAGEMAKER_NUM_MODEL_WORKERS$$ +max_request_size=$$SAGEMAKER_MAX_REQUEST_SIZE$$ +decode_input_request=false +default_service_handler=$$SAGEMAKER_MMS_DEFAULT_HANDLER$$ +job_queue_size=$$SAGEMAKER_MODEL_JOB_QUEUE_SIZE$$ +preload_model=true + diff --git a/docker/0.24-1/resources/mms/endpoints-1.0.jar b/docker/0.24-1/resources/mms/endpoints-1.0.jar new file mode 100644 index 0000000..b5f4416 Binary files /dev/null and b/docker/0.24-1/resources/mms/endpoints-1.0.jar differ diff --git a/requirements.txt b/requirements.txt index ebc3ac2..29a6136 100755 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,6 @@ retrying==1.3.3 sagemaker-containers==2.8.6.post2 sagemaker-inference==1.2.0 sagemaker-training==3.6.2 -scikit-learn==0.23.2 +scikit-learn==0.24.1 scipy==1.5.3 six==1.15.0 diff --git a/tox.ini b/tox.ini index d49cc64..83f4e3b 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ max-line-length = 120 [testenv] deps = - sklearn0.23: scikit-learn==0.23.2 + sklearn0.24: scikit-learn==0.24.1 -r{toxinidir}/requirements.txt -r{toxinidir}/test-requirements.txt conda_deps=