diff --git a/.gitignore b/.gitignore index cf5d0c0c..0fc4746b 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,4 @@ deepchem_data_dir tests/lightning_logs pretrained/ catboost_info +unarchived/ \ No newline at end of file diff --git a/config/datasets/detection/detection_ar_tax_or.yaml b/config/datasets/detection/detection_ar_tax_or.yaml index f3aaf55b..cb946d55 100755 --- a/config/datasets/detection/detection_ar_tax_or.yaml +++ b/config/datasets/detection/detection_ar_tax_or.yaml @@ -11,22 +11,25 @@ _target_: innofw.core.integrations.ultralytics.datamodule.YOLOV5DataModuleAdapte train: - source: ./data/ArTaxOr/train + source: https://api.blackhole.ai.innopolis.university/public-datasets/artaxor/train.zip + target: ./data/ArTaxOr/train test: - source: ./data/ArTaxOr/test + source: https://api.blackhole.ai.innopolis.university/public-datasets/artaxor/test.zip + target: ./data/ArTaxOr/test infer: - source: ./data/ArTaxOr/test + source: https://api.blackhole.ai.innopolis.university/public-datasets/artaxor/test.zip + target: ./data/ArTaxOr/infer -num_workers: 8 -image_size: 600 +num_workers: 48 +image_size: 640 channels_num: 3 num_classes: 7 names: - 0: Araneae - 1: Coleoptera - 2: Diptera - 3: Hemiptera - 4: Hymenoptera - 5: Lepidoptera + 0: Hymenoptera + 1: Hemiptera + 2: Lepidoptera + 3: Coleoptera + 4: Diptera + 5: Araneae 6: Odonata diff --git a/config/experiments/detection/MK_ArTaxOr_yolov5.yaml b/config/experiments/detection/MK_ArTaxOr_yolov5.yaml index 8ec1003c..5c399e8e 100644 --- a/config/experiments/detection/MK_ArTaxOr_yolov5.yaml +++ b/config/experiments/detection/MK_ArTaxOr_yolov5.yaml @@ -2,12 +2,17 @@ defaults: - override /models: detection/yolov5 - override /datasets: detection/detection_ar_tax_or - - override /initializations: random + - override /optimizers: sgd +optimizers: + lr0: 0.01 + lrf: 0.01 project: "ar_tax_or" task: "image-detection" random_seed: 42 epochs: 40 batch_size: 16 -weights_freq: 1 \ No newline at end of file +weights_freq: 1 + +ckpt_path: https://api.blackhole.ai.innopolis.university/pretrained/testing/artaxor.pt \ No newline at end of file diff --git a/config/experiments/sweeps/MK_ArTaxOr_detection_yolov5.yaml b/config/experiments/sweeps/MK_ArTaxOr_detection_yolov5.yaml new file mode 100644 index 00000000..92a7403b --- /dev/null +++ b/config/experiments/sweeps/MK_ArTaxOr_detection_yolov5.yaml @@ -0,0 +1,35 @@ +# wandb sweep config +program: train.py +method: bayes +metric: + name: precision + goal: maximize + +name: param_tuning_yolov5 +project: ArTaxOR + +parameters: + + experiments: + value: "detection/MK_ArTaxOr_yolov5.yaml" + + batch_size: + values: [12, 24, 32] + + optimizers: + values: ['sgd.yaml', 'adam.yaml'] + + models.arch: + values: ['yolov5s', 'yolov5m', 'yolov5x'] + + optimizers.lr0: + values: [1e-2, 1e-3, 1e-4] + + optimizers.lrf: + values: [1e-1, 1e-2] + +command: + - ${env} + - python + - ${program} + - ${args_no_hyphens} diff --git a/config/models/detection/yolov5.yaml b/config/models/detection/yolov5.yaml index 98b09322..66f11071 100644 --- a/config/models/detection/yolov5.yaml +++ b/config/models/detection/yolov5.yaml @@ -2,8 +2,7 @@ name: yolov5 description: yolov5 by ultralytics _target_: innofw.core.models.torch.architectures.detection.YOLOv5 -arch: yolov5x # s x -num_classes: 4 +arch: yolov5m # n s m l x #--weights yolov5x.pt diff --git a/config/test.yaml b/config/test.yaml index 8877a91e..5dc6b702 100755 --- a/config/test.yaml +++ b/config/test.yaml @@ -25,6 +25,7 @@ batch_size: weights_path: weights_freq: ckpt_path: +experiment_name: accelerator: cpu #devices: 1 diff --git a/docs/Tutorials - start here/yolov5.md b/docs/Tutorials - start here/yolov5.md new file mode 100644 index 00000000..c3610f97 --- /dev/null +++ b/docs/Tutorials - start here/yolov5.md @@ -0,0 +1,65 @@ +# How to Use Innofw Framework for Object Detection using YoloV5 + +In this tutorial, we will show you how to use Innofw framework for object detection using YoloV5. We will show you how to create a configuration file for your dataset, how to create a configuration file for your training, how to train your model, how to evaluate your model and how to make an inference on your model. + +There are different variations of yolov5 model. For example, you can use yolov5s, yolov5m, yolov5l or yolov5x. Models for YOLOv5 follow a certain order: 's', 'm', 'l', 'x'. This sequence signifies a progression from the smallest and fastest model to the largest and slowest one. It's important to note that while larger models tend to offer higher accuracy, they also demand more computational resources and take longer to process. You can change the model by changing the `arch` parameter in the file `config\models\detection\yolov5.yaml`. + +## Dataset +* It's necessary for you to generate a configuration file tailored for your dataset. Please save this file in the following directory: `config\datasets\detection`. Within this directory, you can access existing configuration files that may serve as practical templates for your own. Take advantage of these resources to create an optimized configuration for your dataset. + +* The configuration file should contain the following : + 1- Link to your dataset (if it is hosted on s3) or the path to your dataset (if it is on local machine). You will need to give the link to the training set, testing set and inference set separatly + 2- You need to add some information about the dataset such as the name, description, markup info, the task you are working on + +* If your dataset needs a specific preprocessing, You can create your own adapter. After creating the adapter, you will need to add it to the configuration file of your dataset using `_target_` key. + +## Training +* You need to create a configuration file for your experiment and save it inside the following directory: `config\experiments\detection`. You can see the current configuration files that currently exist in the directory. You can use them as a template for your training. You can also use the configuration file `template.yaml` as a template for your training. + +* In the configuration file you can specify several parameters such as the following: + 1- The dataset + 2- The model + 3- The optimizer + 4- The scheduler + 5- The number of epochs + 6- The batch size + 7- The loss function + 8- The augmentation + 9- The accelerator + 10- The image size + 11- The number of workers + + +* Selecting the image size is very important. The bigger the image size, the more accurate the model is. However, the bigger the image size, the slower the training is. You can change the image size by changing the `img_size`. +* You can watch the gpu usage by running the following command: `watch -n 1 nvidia-smi`. You can also watch the cpu usage by running the following command: `htop`. +* After creating the configuration file, you can run the following command to start the training: `python train.py experiments= ` +* You can run more than one experiment at the same time. + +## Checkpoints + +* To use a checkpoint, you need to add the following parameters to your configuration file: `ckpt_path: ` + +Example: `ckpt_path: logs\experiments\detection\yolov5s\2021-09-27\14-44-32\checkpoints\epoch=1-step=249.ckpt` + +* You can also use the checkpoint path as a parameter to the command line. To do so, you need to add the following parameter to your command line: `ckpt_path= ` + +* If your checkpoint is uploaded on s3, you can add the checkpoint link to your checkpoint. + +Example: `ckpt_path: https://api.blackhole.ai.innopolis.university/pretrained/testing/artaxor.pt` + + +## Evaluation + +* After training the model, you can use the checkpoint that was created by train.py to evaluate your model. You will find it inside logs folder + +* You can evaluate your model using the following command: `python test.py experiments= ckpt_path= ` + +* You don't need to specify the checkpoint path in the command line if you already specified it in the configuration file. + +## Inference + +* After training the model, you can use the checkpoint that was created by train.py to make an inference on your model. You will find it inside logs folder + +* You can make an inference on your model using the following command: `python inference.py experiments= ckpt_path= ` + +* You don't need to specify the checkpoint path in the command line if you already specified it in the configuration file. \ No newline at end of file diff --git a/innofw/core/integrations/ultralytics/datamodule.py b/innofw/core/integrations/ultralytics/datamodule.py index 1925bdf4..4d390c66 100644 --- a/innofw/core/integrations/ultralytics/datamodule.py +++ b/innofw/core/integrations/ultralytics/datamodule.py @@ -52,31 +52,9 @@ def setup_infer(self): return # root_dir self.infer_source = Path(self.infer_source) - root_path = self.infer_source.parent.parent - # new data folder - new_data_path = root_path / "unarchived" - new_data_path.mkdir(exist_ok=True, parents=True) - - new_img_path = new_data_path / "images" - - # === split train images and labels into train and val sets and move files === - - # split images and labels - infer_img_path = self.infer_source / "images" - - # get all files from train folder - img_files = list(infer_img_path.iterdir()) - - for files, folder_name in zip([img_files], ["infer"]): - # create a folder - new_path = new_img_path / folder_name - new_path.mkdir(exist_ok=True, parents=True) - - # copy files into new folder - for file in files: - shutil.copy(file, new_path / file.name) + root_path = self.infer_source.parent - self.data = str(new_data_path / "data.yaml") + self.data = str(root_path / "data.yaml") with open(self.data, "w+") as file: file.write(f"nc: {self.num_classes}\n") @@ -105,6 +83,7 @@ def __init__( augmentations=None, stage=False, channels_num: int = 3, + random_state: int = 42, *args, **kwargs, ): @@ -120,16 +99,26 @@ def __init__( val_size - fraction size of the validation set """ super().__init__(train, test, infer, stage=stage, *args, **kwargs) + if self.train: self.train_source = Path(self.train) + # In this datamodule, the train source should be the folder train itself not the folder "train/images" + if str(self.train_source).endswith("images"): + self.train_source = Path(str(self.train_source)[:-7]) if self.test: self.test_source = Path(self.test) + if str(self.test_source).endswith("images"): + self.test_source = Path(str(self.test_source)[:-7]) + + if self.infer: + self.infer_source = ( + Path(self.infer) + if not (type(self.infer) == str and self.infer.startswith("rts")) + else self.infer + ) + if str(self.infer_source).endswith("images"): + self.infer_source = Path(str(self.infer_source)[:-7]) - self.infer_source = ( - Path(self.infer) - if not (type(self.infer) == str and self.infer.startswith("rts")) - else self.infer - ) self.batch_size = batch_size # super().__init__(train, test, batch_size, num_workers) @@ -139,7 +128,7 @@ def __init__( self.val_size = val_size self.num_classes = num_classes self.names = names - self.random_state = 42 + self.random_state = random_state self.augmentations = augmentations # folder_name = self.train_dataset.stem @@ -148,13 +137,13 @@ def __init__( def setup_train_test_val(self, **kwargs): # root_dir - root_path = self.train_source.parent.parent + root_path = self.train_source.parent # new data folder - new_data_path = root_path / "unarchived" + new_data_path = root_path / "train_splitted" new_data_path.mkdir(exist_ok=True, parents=True) - - new_img_path = new_data_path / "images" - new_lbl_path = new_data_path / "labels" + + new_train_path = new_data_path / "train" + new_val_path = new_data_path / "val" # === split train images and labels into train and val sets and move files === @@ -168,6 +157,10 @@ def setup_train_test_val(self, **kwargs): assert ( len(label_files) == len(img_files) != 0 ), "number of images and labels should be the same" + + # sort the files so that the images and labels are in the same order + img_files.sort() + label_files.sort() # split into train and val ( @@ -180,46 +173,39 @@ def setup_train_test_val(self, **kwargs): img_files, test_size=self.val_size, random_state=self.random_state, - ) - - # get all files from test folder - test_img_path = self.test_source / "images" - test_lbl_path = self.test_source / "labels" - - test_img_files = list(test_img_path.iterdir()) - test_label_files = list(test_lbl_path.iterdir()) - - assert len(test_img_files) == len( - test_label_files - ), "number of test images and labels should be the same" + ) + + # Creating the training directory for files, folder_name in zip( - [train_label_files, val_label_files, test_label_files], - ["train", "val", "test"], + [train_img_files, train_label_files], + ["images", "labels"] ): # create a folder - new_path = new_lbl_path / folder_name + new_path = new_train_path / folder_name new_path.mkdir(exist_ok=True, parents=True) - # copy files into folder + + # Copy files into folder for file in files: shutil.copy(file, new_path / file.name) - + + # Creating the vallidation directory for files, folder_name in zip( - [train_img_files, val_img_files, test_img_files], - ["train", "val", "test"], + [val_img_files, val_label_files], + ["images", "labels"] ): # create a folder - new_path = new_img_path / folder_name + new_path = new_val_path /folder_name new_path.mkdir(exist_ok=True, parents=True) - - # copy files into new folder + + # Copy files into folder for file in files: - shutil.copy(file, new_path / file.name) + shutil.copy(file, new_path / file.name) - self.data = str(new_data_path / "data.yaml") + self.data = str(root_path / "data.yaml") + self.train_dataset = str(new_data_path / "train") + self.val_dataset = str(new_data_path / "val") + self.test_dataset = self.test_source - self.train_dataset = str(new_img_path / "train") - self.val_dataset = str(new_img_path / "val") - self.test_dataset = str(new_img_path / "test") # create a yaml file with open(self.data, "w+") as file: file.write(f"train: {self.train_dataset}\n") diff --git a/innofw/core/integrations/ultralytics/optimizers.py b/innofw/core/integrations/ultralytics/optimizers.py index 5a7b19eb..b0ccfbcd 100755 --- a/innofw/core/integrations/ultralytics/optimizers.py +++ b/innofw/core/integrations/ultralytics/optimizers.py @@ -13,11 +13,25 @@ class YOLOV5OptimizerBaseAdapter(BaseAdapter): def __init__(self): self.possible_values = {"optimizer": ["Adam", "AdamW", "SGD"]} - self.opt = {"optimizer": "Adam"} - self.hyp = {"momentum": 0.937, "weight decay": 5e-4} + self.opt = {"optimizer": "SGD"} # Setting SGD as the default optimizer + self.hyp = {"momentum": 0.937, "weight_decay": 5e-4} def adapt(self, optimizer) -> dict: + if optimizer is None: + return {"opt": self.opt, "hyp": self.hyp} + + if optimizer._target_.lower().endswith("adam"): + self.opt = {"optimizer": "Adam"} + elif optimizer._target_.lower().endswith("adamw"): + self.opt = {"optimizer": "AdamW"} + elif optimizer._target_.lower().endswith("sgd"): + self.opt = {"optimizer": "SGD"} + + self.hyp["lr0"] = optimizer.lr0 + self.hyp["lrf"] = optimizer.lrf + return {"opt": self.opt, "hyp": self.hyp} + def from_cfg(self, cfg): return {}, {} diff --git a/innofw/core/integrations/ultralytics/schedulers.py b/innofw/core/integrations/ultralytics/schedulers.py index bdefec5c..e4541507 100755 --- a/innofw/core/integrations/ultralytics/schedulers.py +++ b/innofw/core/integrations/ultralytics/schedulers.py @@ -12,10 +12,7 @@ class YOLOV5SchedulerBaseAdapter(BaseAdapter): def __init__(self): self.opt = {} - self.hyp = { - "lr0": 1e-2, - "lrf": 1e-1, - } + self.hyp = {} def adapt(self, scheduler) -> dict: return {"opt": self.opt, "hyp": self.hyp} diff --git a/innofw/core/integrations/ultralytics/yolov5_adapter.py b/innofw/core/integrations/ultralytics/yolov5_adapter.py index 831cbaf7..e057287b 100755 --- a/innofw/core/integrations/ultralytics/yolov5_adapter.py +++ b/innofw/core/integrations/ultralytics/yolov5_adapter.py @@ -116,7 +116,7 @@ def __init__( "sync_bn": False, "cos_lr": False, "image_weights": False, - "noplots": True, + "noplots": False, "noautoanchor": False, "noval": False, "nosave": False, @@ -151,7 +151,7 @@ def __init__( self.hyp = { "lr0": 0.01, - "lrf": 0.1, + "lrf": 0.01, "momentum": 0.937, "weight_decay": 0.0005, "warmup_epochs": 3.0, @@ -346,6 +346,7 @@ def test(self, data: YOLOV5DataModuleAdapter, ckpt_path=None): batch_size=data.batch_size, device=self.device, weights=ckpt_path, + task='test' ) self.update_checkpoints_path() diff --git a/innofw/pipeline.py b/innofw/pipeline.py index 2c3b6f03..cd28f2a3 100755 --- a/innofw/pipeline.py +++ b/innofw/pipeline.py @@ -103,6 +103,7 @@ def run_pipeline( stage=data_stage, augmentations=augmentations, batch_size=cfg.get("batch_size"), + random_state = cfg.get("random_seed") ) losses = get_losses(cfg, task, framework) callbacks = get_callbacks( @@ -138,6 +139,7 @@ def run_pipeline( "weights_path": cfg.get("weights_path"), "weights_freq": cfg.get("weights_freq"), "logger": logger, + "random_state": cfg.get("random_seed") } inno_model = InnoModel(**model_params) result = None diff --git a/test.py b/test.py index bc345a8b..bd451a2e 100755 --- a/test.py +++ b/test.py @@ -3,6 +3,9 @@ import dotenv import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import OmegaConf + from pckg_util import check_gpu_and_torch_compatibility check_gpu_and_torch_compatibility() @@ -23,6 +26,13 @@ def main(config): # utils.extras(config) from innofw.pipeline import run_pipeline + + if not config.get("experiment_name"): + hydra_cfg = HydraConfig.get() + experiment_name = OmegaConf.to_container(hydra_cfg.runtime.choices)[ + "experiments" + ] + config.experiment_name = experiment_name # Test model return run_pipeline(config, test=True, train=False, predict=False)