Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Model Management Policies #295

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions benchmark/criteo_1TB/pipelines/exp0_finetune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ model:
cat_23: 12022
cat_24: 97
cat_25: 35
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/criteo_1TB/pipelines/exp1_finetune_ablation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ model:
cat_23: 12022
cat_24: 97
cat_25: 35
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/criteo_1TB/pipelines/exp2_retrain_keep_model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ model:
cat_23: 12022
cat_24: 97
cat_25: 35
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/criteo_1TB/pipelines/exp3_retrain_new_model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ model:
cat_23: 12022
cat_24: 97
cat_25: 35
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/criteo_1TB/pipelines/exp4_current_day_only.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ model:
cat_23: 12022
cat_24: 97
cat_25: 35
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
4 changes: 3 additions & 1 deletion benchmark/mnist/mnist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ model:
id: ResNet18
config:
num_classes: 10
model_storage:
MaxiBoether marked this conversation as resolved.
Show resolved Hide resolved
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down Expand Up @@ -43,7 +46,6 @@ data:
import io
def bytes_parser_function(data: bytes) -> Image:
return Image.open(io.BytesIO(data)).convert("RGB")

trigger:
id: DataAmountTrigger
trigger_config:
Expand Down
3 changes: 3 additions & 0 deletions benchmark/wildtime_benchmarks/example_pipelines/arxiv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ model:
id: ArticleNet
config:
num_classes: 172
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/wildtime_benchmarks/example_pipelines/fmow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ model:
id: FmowNet
config:
num_classes: 62
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/wildtime_benchmarks/example_pipelines/huffpost.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ model:
id: ArticleNet
config:
num_classes: 55
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
3 changes: 3 additions & 0 deletions benchmark/wildtime_benchmarks/example_pipelines/yearbook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ model:
config:
num_input_channels: 1
num_classes: 2
model_storage:
full_model_strategy:
name: "PyTorchFullModel"
training:
gpus: 1
device: "cuda:0"
Expand Down
6 changes: 5 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ services:
build:
context: .
dockerfile: docker/Model_Storage/Dockerfile
volumes:
- model_storage-data:/tmp/models
evaluator:
restart: on-failure
depends_on:
Expand All @@ -85,6 +87,7 @@ services:
- storage
- selector
- model_storage
- metadata-db
build:
context: .
dockerfile: docker/Trainer_Server/Dockerfile
Expand Down Expand Up @@ -159,4 +162,5 @@ services:
volumes:
storage-data:
selector-data:
downsampling-data:
downsampling-data:
model_storage-data:
2 changes: 2 additions & 0 deletions docker/Model_Storage/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
FROM modynbase:latest

RUN chmod a+x /src/modyn/model_storage/modyn-model-storage
RUN mkdir -p /tmp/models
RUN chown appuser /tmp/models

# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
CMD mamba run -n modyn --no-capture-output ./modyn/model_storage/modyn-model-storage ./modyn/config/examples/modyn_config.yaml
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies:
- pyaml
- numpy
- pandas
- bitstring
- tensorboard
- scipy
- pyftpdlib
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import yaml
from modyn.metadata_database.metadata_database_connection import MetadataDatabaseConnection
from modyn.metadata_database.models import SampleTrainingMetadata, TriggerTrainingMetadata
from modyn.metadata_database.utils import ModelStorageStrategyConfig

# pylint: disable-next=no-name-in-module
from modyn.metadata_processor.internal.grpc.generated.metadata_processor_pb2 import ( # noqa: E402, E501
Expand Down Expand Up @@ -49,7 +50,9 @@ def get_grpc_channel(config: dict, component: str) -> grpc.Channel:

def send_metadata_and_check_database(processor_client: MetadataProcessorClient, config: dict) -> int:
with MetadataDatabaseConnection(config) as database:
pipeline_id = database.register_pipeline(2)
pipeline_id = database.register_pipeline(
2, "ResNet18", "{}", False, ModelStorageStrategyConfig("PyTorchFullModel")
)

req = TrainingMetadataRequest(
pipeline_id=pipeline_id,
Expand Down
Loading
Loading