diff --git a/README.md b/README.md index 4a711a40c..9dfa71e8b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![codecov](https://codecov.io/github/eth-easl/modyn/graph/badge.svg?token=KFDCE03SQ4)](https://codecov.io/github/eth-easl/modyn) [![License](https://img.shields.io/github/license/eth-easl/modyn)](https://img.shields.io/github/license/eth-easl/modyn) -Modyn is an open-source platform for model training on growing datasets, i.e., datasets where points get added over time. +Modyn is a data-centric machine learning pipeline orchestrator, i.e., a platform for model training on growing datasets where points get added over time. Check out our [blog post](https://systems.ethz.ch/research/blog/modyn.html) for a brief introduction. @@ -55,9 +55,8 @@ For running all integration tests, run Checkout our [Example Pipeline](docs/EXAMPLE.md) guide for an example on how to run a Modyn pipeline. Checkout our [Technical Guidelines](docs/TECHNICAL.md) for some hints on developing Modyn and how to add new data selection and triggering policies. Checkout the [Architecture Documentation](docs/ARCHITECTURE.md) for an overview of Modyn's components. -Last, checkout our [vision paper on Modyn](https://anakli.inf.ethz.ch/papers/MLonDynamicData_EuroMLSys23.pdf) for an introduction to model training on dynamic datasets. +Last, checkout our [full paper on Modyn](https://anakli.inf.ethz.ch/papers/modyn_sigmod25.pdf) for more technical background and experiments we ran using Modyn. -We are actively developing and designing Modyn, including more thorough documentation. Please reach out via Github, Twitter, E-Mail, or any other channel of communication if you are interested in collaborating, have any questions, or have any problems running Modyn. How to [contribute](docs/CONTRIBUTING.md). @@ -81,3 +80,16 @@ We welcome input from both research and practice. Modyn is being developed at the [Efficient Architectures and Systems Lab (EASL)](https://anakli.inf.ethz.ch/#Group) at the [ETH Zurich Systems Group](https://systems.ethz.ch/). Please reach out to `mboether [at] inf [­dot] ethz [dot] ch` or open an issue on Github if you have any questions or inquiry related to Modyn and its usage. + +### Paper / Citation + +If you use Modyn, please cite our SIGMOD'25 paper: + +```bibtex +@inproceedings{Bother2025Modyn, + author = {B\"{o}ther, Maximilian and Robroek, Ties and Gsteiger, Viktor and Ma, Xianzhe and T\"{o}z\"{u}n, P{\i}nar and Klimovic, Ana}, + title = {Modyn: Data-Centric Machine Learning Pipeline Orchestration}, + booktitle = {Proceedings of the Conference on Management of Data (SIGMOD)}, + year = {2025}, +} +``` diff --git a/_typos.toml b/_typos.toml index fdbcd645e..0882f9972 100644 --- a/_typos.toml +++ b/_typos.toml @@ -5,3 +5,4 @@ extend-ignore-re = ["(?Rm)^.*# spellchecker:disable-line$"] [default.extend-words] strat = "strat" fpr = "fpr" +ther = "ther" diff --git a/docker/Base/Dockerfile b/docker/Base/Dockerfile index abff4a016..f97972536 100644 --- a/docker/Base/Dockerfile +++ b/docker/Base/Dockerfile @@ -15,7 +15,8 @@ RUN chown -R appuser /src USER appuser ENV CONDA_DEFAULT_ENV modyn ENV MAMBA_DEFAULT_ENV modyn -RUN /bin/bash -c "mamba init" +ENV MAMBA_ROOT_PREFIX /opt/mamba +RUN /bin/bash -c "mamba shell init -s bash -y" RUN echo "mamba activate modyn" >> /home/appuser/.bashrc # set environment variable to tell modyn that it is running in a container diff --git a/docker/Dependencies/Dockerfile b/docker/Dependencies/Dockerfile index 555ecb14e..9964f07a4 100644 --- a/docker/Dependencies/Dockerfile +++ b/docker/Dependencies/Dockerfile @@ -55,12 +55,17 @@ RUN git clone --recurse-submodules -b v1.59.2 --depth 1 --shallow-submodules htt make -j8 && make install && cd ../../ # Install mamba -ENV CONDA_DIR /opt/mamba -ENV MAMBA_DIR /opt/mamba -RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" -O ~/mamba.sh && \ +ENV CONDA_DIR=/opt/mamba +ENV MAMBA_DIR=/opt/mamba +ENV MAMBA_ROOT_PREFIX /opt/mamba +RUN wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -O ~/mamba.sh && \ /bin/bash ~/mamba.sh -b -p /opt/mamba ENV PATH=$CONDA_DIR/bin:$PATH -RUN mamba update -n base -c defaults mamba && mamba update --all && mamba init bash +RUN mamba update -n base -c defaults mamba +RUN /bin/bash -c "mamba shell init -s bash -y" +RUN mamba update --all + +# RUN /bin/bash mamba shell init # Install dependencies COPY ./environment.yml /tmp/environment.yml diff --git a/docker/Evaluator/Dockerfile b/docker/Evaluator/Dockerfile index adf669869..ba636669d 100644 --- a/docker/Evaluator/Dockerfile +++ b/docker/Evaluator/Dockerfile @@ -7,4 +7,4 @@ FROM modynbase:latest AS evaluatorimage RUN chmod a+x /src/modyn/evaluator/modyn-evaluator # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD mamba run -n modyn --no-capture-output ./modyn/evaluator/modyn-evaluator ./modyn/config/examples/modyn_config.yaml +CMD mamba run -n modyn -a "" ./modyn/evaluator/modyn-evaluator ./modyn/config/examples/modyn_config.yaml diff --git a/docker/Model_Storage/Dockerfile b/docker/Model_Storage/Dockerfile index 2c698d332..89e7f5487 100644 --- a/docker/Model_Storage/Dockerfile +++ b/docker/Model_Storage/Dockerfile @@ -10,4 +10,4 @@ RUN chown appuser /tmp/models RUN chmod -R 777 /tmp/models # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD mamba run -n modyn --no-capture-output ./modyn/model_storage/modyn-model-storage ./modyn/config/examples/modyn_config.yaml +CMD mamba run -n modyn -a "" ./modyn/model_storage/modyn-model-storage ./modyn/config/examples/modyn_config.yaml diff --git a/docker/Selector/Dockerfile b/docker/Selector/Dockerfile index 631167fbd..0491307f6 100644 --- a/docker/Selector/Dockerfile +++ b/docker/Selector/Dockerfile @@ -6,4 +6,4 @@ RUN chown appuser /tmp/trigger_samples RUN chmod -R 777 /tmp/trigger_samples # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD mamba run -n modyn --no-capture-output ./modyn/selector/modyn-selector ./modyn/config/examples/modyn_config.yaml +CMD mamba run -n modyn -a "" ./modyn/selector/modyn-selector ./modyn/config/examples/modyn_config.yaml diff --git a/docker/Supervisor/Dockerfile b/docker/Supervisor/Dockerfile index b43fef6bc..492d7992b 100644 --- a/docker/Supervisor/Dockerfile +++ b/docker/Supervisor/Dockerfile @@ -6,4 +6,4 @@ RUN chown appuser /tmp/evaluation_results RUN chmod -R 777 /tmp/evaluation_results # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD mamba run -n modyn --no-capture-output ./modyn/supervisor/modyn-supervisor ./modyn/config/examples/modyn_config.yaml +CMD mamba run -n modyn -a "" ./modyn/supervisor/modyn-supervisor ./modyn/config/examples/modyn_config.yaml diff --git a/docker/Tests/Dockerfile b/docker/Tests/Dockerfile index 1ca987185..5b0c42af2 100644 --- a/docker/Tests/Dockerfile +++ b/docker/Tests/Dockerfile @@ -10,4 +10,4 @@ USER appuser RUN chmod a+x /src/integrationtests/run.sh # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD mamba run -n modyn --no-capture-output /src/integrationtests/run.sh +CMD mamba run -n modyn -a "" /src/integrationtests/run.sh diff --git a/docker/Trainer_Server/Dockerfile b/docker/Trainer_Server/Dockerfile index ec5a330cd..88f5d426c 100644 --- a/docker/Trainer_Server/Dockerfile +++ b/docker/Trainer_Server/Dockerfile @@ -8,4 +8,4 @@ RUN mkdir -p /tmp/offline_dataset RUN chown appuser /tmp/offline_dataset # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD mamba run -n modyn --no-capture-output ./modyn/trainer_server/modyn-trainer-server ./modyn/config/examples/modyn_config.yaml +CMD mamba run -n modyn -a "" ./modyn/trainer_server/modyn-trainer-server ./modyn/config/examples/modyn_config.yaml diff --git a/modyn/config/schema/pipeline/config.py b/modyn/config/schema/pipeline/config.py index 20b9388e4..52910374e 100644 --- a/modyn/config/schema/pipeline/config.py +++ b/modyn/config/schema/pipeline/config.py @@ -56,3 +56,6 @@ def validate_bts_training_selection_works(self) -> Self: ) return self + + +ModynPipelineConfig.model_rebuild()