diff --git a/.github/workflows/docker_publish_image.yml b/.github/workflows/docker_publish_image.yml index 29fdc7f5..ffcbba92 100644 --- a/.github/workflows/docker_publish_image.yml +++ b/.github/workflows/docker_publish_image.yml @@ -19,7 +19,7 @@ env: jobs: build-and-push-api-image: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: contents: read packages: write @@ -50,7 +50,7 @@ jobs: build-and-push-worker-image: needs: build-and-push-api-image - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 permissions: contents: read packages: write diff --git a/backend/Dockerfile b/backend/Dockerfile index 783c3b06..6f9e36fa 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -27,7 +27,7 @@ RUN pip install setuptools==68.2.2 RUN pip install wheel==0.41.3 RUN pip install build==1.0.0 -RUN pip install -r requirements.txt +RUN pip install -r requirements.txt # RUN pip install --use-deprecated=legacy-resolver -r requirements.txt COPY docker/ramp/solaris /tmp/solaris diff --git a/backend/aiproject/settings.py b/backend/aiproject/settings.py index 106104eb..c479b47f 100644 --- a/backend/aiproject/settings.py +++ b/backend/aiproject/settings.py @@ -215,6 +215,8 @@ RAMP_HOME = env("RAMP_HOME", default=None) if RAMP_HOME: os.environ["RAMP_HOME"] = RAMP_HOME +YOLO_HOME = env("YOLO_HOME") + # training workspace TRAINING_WORKSPACE = env( @@ -224,4 +226,7 @@ ENABLE_PREDICTION_API = env("ENABLE_PREDICTION_API", default=False) +LOG_LINE_STREAM_TRUNCATE_VALUE = env("LOG_LINE_STREAM_TRUNCATE_VALUE", default=10) + + TEST_RUNNER = "tests.test_runners.NoDestroyTestRunner" diff --git a/backend/api-requirements.txt b/backend/api-requirements.txt index e74dc0a7..dabcdbdb 100644 --- a/backend/api-requirements.txt +++ b/backend/api-requirements.txt @@ -18,8 +18,8 @@ validators==0.20.0 gpxpy==1.5.0 geojson2osm==0.0.1 osmconflator==0.0.11 -orthogonalizer==0.0.4 -fairpredictor==0.0.26 +# orthogonalizer==0.0.4 +fairpredictor==0.0.37 rasterio==1.3.8 numpy<2.0.0 diff --git a/backend/core/models.py b/backend/core/models.py index 3964587d..3244ca83 100644 --- a/backend/core/models.py +++ b/backend/core/models.py @@ -14,7 +14,7 @@ class DatasetStatus(models.IntegerChoices): ACTIVE = 0 DRAFT = -1 - name = models.CharField(max_length=255) + name = models.CharField(max_length=50) user = models.ForeignKey(OsmUser, to_field="osm_id", on_delete=models.CASCADE) last_modified = models.DateTimeField(auto_now=True) created_at = models.DateTimeField(auto_now_add=True) @@ -36,6 +36,7 @@ class DownloadStatus(models.IntegerChoices): label_fetched = models.DateTimeField(null=True, blank=True) created_at = models.DateTimeField(auto_now_add=True) last_modified = models.DateTimeField(auto_now=True) + user = models.ForeignKey(OsmUser, to_field="osm_id", on_delete=models.CASCADE) class Label(models.Model): @@ -49,7 +50,8 @@ class Label(models.Model): class Model(models.Model): BASE_MODEL_CHOICES = ( ("RAMP", "RAMP"), - ("YOLO", "YOLO"), + ("YOLO_V8_V1", "YOLO_V8_V1"), + ("YOLO_V8_V2", "YOLO_V8_V2"), ) class ModelStatus(models.IntegerChoices): @@ -57,8 +59,8 @@ class ModelStatus(models.IntegerChoices): PUBLISHED = 0 DRAFT = -1 - dataset = models.ForeignKey(Dataset, to_field="id", on_delete=models.CASCADE) - name = models.CharField(max_length=255) + dataset = models.ForeignKey(Dataset, to_field="id", on_delete=models.DO_NOTHING) + name = models.CharField(max_length=50) created_at = models.DateTimeField(auto_now_add=True) last_modified = models.DateTimeField(auto_now=True) description = models.TextField(max_length=500, null=True, blank=True) @@ -66,7 +68,7 @@ class ModelStatus(models.IntegerChoices): published_training = models.PositiveIntegerField(null=True, blank=True) status = models.IntegerField(default=-1, choices=ModelStatus.choices) base_model = models.CharField( - choices=BASE_MODEL_CHOICES, default="RAMP", max_length=10 + choices=BASE_MODEL_CHOICES, default="RAMP", max_length=50 ) diff --git a/backend/core/serializers.py b/backend/core/serializers.py index 22507f2b..cb932485 100644 --- a/backend/core/serializers.py +++ b/backend/core/serializers.py @@ -140,8 +140,21 @@ class Meta: "last_modified", "label_fetched", "label_status", + "user", ) + def create(self, validated_data): + request = self.context.get("request") + if request and hasattr(request, "user"): + validated_data["user"] = request.user + return super().create(validated_data) + + def update(self, instance, validated_data): + request = self.context.get("request") + if request and hasattr(request, "user"): + validated_data["user"] = request.user + return super().update(instance, validated_data) + class FeedbackAOISerializer(GeoFeatureModelSerializer): class Meta: diff --git a/backend/core/tasks.py b/backend/core/tasks.py index fc9bc303..d20fbaf7 100644 --- a/backend/core/tasks.py +++ b/backend/core/tasks.py @@ -1,15 +1,25 @@ import json import logging import os +import pathlib import shutil import subprocess import sys import tarfile +import time import traceback from shutil import rmtree from celery import shared_task -from core.models import AOI, Feedback, FeedbackAOI, FeedbackLabel, Label, Training +from core.models import ( + AOI, + Feedback, + FeedbackAOI, + FeedbackLabel, + Label, + Model, + Training, +) from core.serializers import ( AOISerializer, FeedbackAOISerializer, @@ -23,6 +33,7 @@ from django.contrib.gis.geos import GEOSGeometry from django.shortcuts import get_object_or_404 from django.utils import timezone +from predictor import download_imagery, get_start_end_download_coords logger = logging.getLogger(__name__) @@ -32,6 +43,18 @@ DEFAULT_TILE_SIZE = 256 +class print_time: + def __init__(self, name): + self.name = name + + def __enter__(self): + self.start = time.perf_counter() + return self + + def __exit__(self, type, value, traceback): + print(f"{self.name} took {round(time.perf_counter() - self.start, 2)} seconds") + + def xz_folder(folder_path, output_filename, remove_original=False): """ Compresses a folder and its contents into a .tar.xz file and optionally removes the original folder. @@ -66,6 +89,346 @@ def get_file_count(path): return 0 +def prepare_data(training_instance, dataset_id, feedback, zoom_level, source_imagery): + training_input_base_path = os.path.join( + settings.TRAINING_WORKSPACE, f"dataset_{dataset_id}" + ) + training_input_image_source = os.path.join(training_input_base_path, "input") + if os.path.exists(training_input_image_source): + shutil.rmtree(training_input_image_source) + os.makedirs(training_input_image_source) + + if feedback: + aois = FeedbackAOI.objects.filter(training=feedback) + aoi_serializer = FeedbackAOISerializer(aois, many=True) + else: + aois = AOI.objects.filter(dataset=dataset_id) + aoi_serializer = AOISerializer(aois, many=True) + + first_aoi_centroid = aois[0].geom.centroid + training_instance.centroid = first_aoi_centroid + training_instance.save() + + for obj in aois: + bbox_coords = bbox(obj.geom.coords[0]) + for z in zoom_level: + zm_level = z + try: + tile_size = DEFAULT_TILE_SIZE + start, end = get_start_end_download_coords( + bbox_coords, zm_level, tile_size + ) + download_imagery( + start, + end, + zm_level, + base_path=training_input_image_source, + source=source_imagery, + ) + except Exception as ex: + raise ex + if is_dir_empty(training_input_image_source): + raise ValueError("No images found in the area") + + if feedback: + label = FeedbackLabel.objects.filter(feedback_aoi__in=[r.id for r in aois]) + serialized_field = FeedbackLabelFileSerializer(label, many=True) + else: + label = Label.objects.filter(aoi__in=[r.id for r in aois]) + serialized_field = LabelFileSerializer(label, many=True) + + with open( + os.path.join(training_input_image_source, "labels.geojson"), + "w", + encoding="utf-8", + ) as f: + f.write(json.dumps(serialized_field.data)) + + return training_input_image_source, aoi_serializer, serialized_field + + +def ramp_model_training( + training_instance, + dataset_id, + training_input_image_source, + serialized_field, + aoi_serializer, + epochs, + batch_size, + freeze_layers, + multimasks, + input_contact_spacing, + input_boundary_width, +): + import hot_fair_utilities + import ramp.utils + import tensorflow as tf + from hot_fair_utilities import preprocess + from hot_fair_utilities.training.ramp import run_feedback, train + + base_path = os.path.join(settings.RAMP_HOME, "ramp-data", str(dataset_id)) + if os.path.exists(base_path): + rmtree(base_path) + destination_image_input = os.path.join(base_path, "input") + + if not os.path.exists(training_input_image_source): + raise ValueError( + "Training folder has not been created, Build the dataset first /dataset/build/" + ) + if os.path.exists(destination_image_input): + shutil.rmtree(destination_image_input) + shutil.copytree(training_input_image_source, destination_image_input) + + model_input_image_path = f"{base_path}/input" + preprocess_output = f"/{base_path}/preprocessed" + + preprocess( + input_path=model_input_image_path, + output_path=preprocess_output, + rasterize=True, + rasterize_options=["binary"], + georeference_images=True, + multimasks=multimasks, + input_contact_spacing=input_contact_spacing, + input_boundary_width=input_boundary_width, + ) + training_instance.chips_length = get_file_count( + os.path.join(preprocess_output, "chips") + ) + training_instance.save() + + train_output = f"{base_path}/train" + final_accuracy, final_model_path = train( + input_path=preprocess_output, + output_path=train_output, + epoch_size=epochs, + batch_size=batch_size, + model="ramp", + model_home=os.environ["RAMP_HOME"], + freeze_layers=freeze_layers, + multimasks=multimasks, + ) + + output_path = os.path.join( + pathlib.Path(training_input_image_source).parent, + "output", + f"training_{training_instance.id}", + ) + if os.path.exists(output_path): + shutil.rmtree(output_path) + shutil.copytree(final_model_path, os.path.join(output_path, "checkpoint.tf")) + shutil.copytree(preprocess_output, os.path.join(output_path, "preprocessed")) + + graph_output_path = f"{base_path}/train/graphs" + shutil.copytree(graph_output_path, os.path.join(output_path, "graphs")) + + with open(os.path.join(output_path, "labels.geojson"), "w", encoding="utf-8") as f: + f.write(json.dumps(serialized_field.data)) + + with open(os.path.join(output_path, "aois.geojson"), "w", encoding="utf-8") as f: + f.write(json.dumps(aoi_serializer.data)) + + tippecanoe_command = f"""tippecanoe -o {os.path.join(output_path,"meta.pmtiles")} -Z7 -z18 -L aois:{ os.path.join(output_path, "aois.geojson")} -L labels:{os.path.join(output_path, "labels.geojson")} --force --read-parallel -rg --drop-densest-as-needed""" + try: + result = subprocess.run( + tippecanoe_command, shell=True, check=True, capture_output=True + ) + logging.info(result.stdout.decode("utf-8")) + except subprocess.CalledProcessError as ex: + logger.error(ex.output) + raise ex + + shutil.copyfile( + os.path.join(output_path, "aois.geojson"), + os.path.join(preprocess_output, "aois.geojson"), + ) + shutil.copyfile( + os.path.join(output_path, "labels.geojson"), + os.path.join(preprocess_output, "labels.geojson"), + ) + xz_folder( + preprocess_output, + os.path.join(output_path, "preprocessed.tar.xz"), + remove_original=True, + ) + shutil.rmtree(base_path) + training_instance.accuracy = float(final_accuracy) + training_instance.finished_at = timezone.now() + training_instance.status = "FINISHED" + training_instance.save() + response = { + "accuracy": float(final_accuracy), + "tiles_path": os.path.join(output_path, "meta.pmtiles"), + "model_path": os.path.join(output_path, "checkpoint.h5"), + "graph_path": os.path.join(output_path, "graphs"), + } + return response + + +def yolo_model_training( + training_instance, + dataset_id, + training_input_image_source, + serialized_field, + aoi_serializer, + epochs, + batch_size, + multimasks, + model="YOLO_V8_V1", +): + from hot_fair_utilities import preprocess + from hot_fair_utilities.preprocessing.yolo_v8_v1.yolo_format import ( + yolo_format as yolo_format_v1, + ) + from hot_fair_utilities.preprocessing.yolo_v8_v2.yolo_format import ( + yolo_format as yolo_format_v2, + ) + from hot_fair_utilities.training.yolo_v8_v1.train import train as train_yolo_v1 + from hot_fair_utilities.training.yolo_v8_v2.train import train as train_yolo_v2 + + base_path = os.path.join(settings.YOLO_HOME, "yolo-data", str(dataset_id)) + if os.path.exists(base_path): + rmtree(base_path) + destination_image_input = os.path.join(base_path, "input") + + if not os.path.exists(training_input_image_source): + raise ValueError( + "Training folder has not been created, Build the dataset first /dataset/build/" + ) + if os.path.exists(destination_image_input): + shutil.rmtree(destination_image_input) + shutil.copytree(training_input_image_source, destination_image_input) + + model_input_image_path = f"{base_path}/input" + preprocess_output = f"/{base_path}/preprocessed" + if model == "YOLO_V8_V1": + multimasks = True + preprocess( + input_path=model_input_image_path, + output_path=preprocess_output, + rasterize=True, + rasterize_options=["binary"], + georeference_images=True, + multimasks=multimasks, + epsg=4326 if model == "YOLO_V8_V2" else 3857, + ) + training_instance.chips_length = get_file_count( + os.path.join(preprocess_output, "chips") + ) + training_instance.save() + + yolo_data_dir = os.path.join(base_path, model) + with print_time("yolo conversion"): + if model == "YOLO_V8_V1": + yolo_format_v1( + preprocessed_dirs=preprocess_output, + yolo_dir=yolo_data_dir, + multimask=True, + p_val=0.05, + ) + else: + yolo_format_v2( + input_path=preprocess_output, + output_path=yolo_data_dir, + ) + if model == "YOLO_V8_V1": + output_model_path, final_accuracy = train_yolo_v1( + data=f"{base_path}", + weights=os.path.join(settings.YOLO_HOME, "yolov8s_v1-seg-best.pt"), + epochs=epochs, + batch_size=batch_size, + pc=2.0, + output_path=yolo_data_dir, + dataset_yaml_path=os.path.join(yolo_data_dir, "yolo_dataset.yaml"), + ) + else: + output_model_path, final_accuracy = train_yolo_v2( + data=f"{base_path}", + weights=os.path.join(settings.YOLO_HOME, "yolov8s_v2-seg.pt"), + epochs=epochs, + batch_size=batch_size, + pc=2.0, + output_path=yolo_data_dir, + dataset_yaml_path=os.path.join(yolo_data_dir, "yolo_dataset.yaml"), + ) + + output_path = os.path.join( + pathlib.Path(training_input_image_source).parent, + "output", + f"training_{training_instance.id}", + ) + if os.path.exists(output_path): + shutil.rmtree(output_path) + # print(output_path) + os.makedirs(output_path) + + shutil.copyfile(output_model_path, os.path.join(output_path, "checkpoint.pt")) + shutil.copyfile(os.path.join(os.path.dirname(output_model_path),'best.onnx'), os.path.join(output_path, "checkpoint.onnx")) + # shutil.copyfile(os.path.dirname(output_model_path,'checkpoint.tflite'), os.path.join(output_path, "checkpoint.tflite")) + + shutil.copytree(preprocess_output, os.path.join(output_path, "preprocessed")) + os.makedirs(os.path.join(output_path,model),exist_ok=True) + + shutil.copytree(os.path.join(yolo_data_dir,'images'), os.path.join(output_path,model, "images")) + shutil.copytree(os.path.join(yolo_data_dir,'labels'), os.path.join(output_path,model, "labels")) + shutil.copyfile(os.path.join(yolo_data_dir,'yolo_dataset.yaml'), os.path.join(output_path,model, "yolo_dataset.yaml")) + + graph_output_path = os.path.join( + pathlib.Path(os.path.dirname(output_model_path)).parent, "iou_chart.png" + ) + os.makedirs(os.path.join(output_path, "graphs"), exist_ok=True) + shutil.copyfile( + graph_output_path, + os.path.join( + output_path, + "graphs", + "training_accuracy.png", ### TODO : replace this with actual graph that will be decided + ), + ) + + with open(os.path.join(output_path, "labels.geojson"), "w", encoding="utf-8") as f: + f.write(json.dumps(serialized_field.data)) + + with open(os.path.join(output_path, "aois.geojson"), "w", encoding="utf-8") as f: + f.write(json.dumps(aoi_serializer.data)) + + tippecanoe_command = f"""tippecanoe -o {os.path.join(output_path,"meta.pmtiles")} -Z7 -z18 -L aois:{ os.path.join(output_path, "aois.geojson")} -L labels:{os.path.join(output_path, "labels.geojson")} --force --read-parallel -rg --drop-densest-as-needed""" + try: + result = subprocess.run( + tippecanoe_command, shell=True, check=True, capture_output=True + ) + logging.info(result.stdout.decode("utf-8")) + except subprocess.CalledProcessError as ex: + logger.error(ex.output) + raise ex + + shutil.copyfile( + os.path.join(output_path, "aois.geojson"), + os.path.join(preprocess_output, "aois.geojson"), + ) + shutil.copyfile( + os.path.join(output_path, "labels.geojson"), + os.path.join(preprocess_output, "labels.geojson"), + ) + xz_folder( + preprocess_output, + os.path.join(output_path, "preprocessed.tar.xz"), + remove_original=True, + ) + shutil.rmtree(base_path) + training_instance.accuracy = float(final_accuracy) + training_instance.finished_at = timezone.now() + training_instance.status = "FINISHED" + training_instance.save() + response = { + "accuracy": float(final_accuracy), + "tiles_path": os.path.join(output_path, "meta.pmtiles"), + "model_path": os.path.join(output_path, "checkpoint.pt"), + "graph_path": os.path.join(output_path, "graphs"), + } + return response + + @shared_task def train_model( dataset_id, @@ -80,280 +443,64 @@ def train_model( input_contact_spacing=8, input_boundary_width=3, ): - # importing them here so that it won't be necessary when sending tasks ( api only) - import hot_fair_utilities - import ramp.utils - import tensorflow as tf - from hot_fair_utilities import preprocess, train - from hot_fair_utilities.training import run_feedback - from predictor import download_imagery, get_start_end_download_coords training_instance = get_object_or_404(Training, id=training_id) + model_instance = get_object_or_404(Model, id=training_instance.model.id) + training_instance.status = "RUNNING" training_instance.started_at = timezone.now() training_instance.save() - if settings.RAMP_HOME is None: + os.makedirs(settings.LOG_PATH, exist_ok=True) + if training_instance.task_id is None or training_instance.task_id.strip() == "": + training_instance.task_id = train_model.request.id + training_instance.save() + log_file = os.path.join( + settings.LOG_PATH, f"run_{train_model.request.id}_log.txt" + ) + + + if model_instance.base_model == "YOLO_V8_V1" and settings.YOLO_HOME is None: + raise ValueError("YOLO Home is not configured") + elif model_instance.base_model != "YOLO_V8_V1" and settings.RAMP_HOME is None: raise ValueError("Ramp Home is not configured") try: - ## -----------IMAGE DOWNLOADER--------- - os.makedirs(settings.LOG_PATH, exist_ok=True) - if training_instance.task_id is None or training_instance.task_id.strip() == "": - training_instance.task_id = train_model.request.id - training_instance.save() - log_file = os.path.join( - settings.LOG_PATH, f"run_{train_model.request.id}_log.txt" - ) with open(log_file, "w") as f: - # redirect stdout to the log file + # redirect stdout to the log file sys.stdout = f - training_input_base_path = os.path.join( - settings.TRAINING_WORKSPACE, f"dataset_{dataset_id}" - ) - training_input_image_source = os.path.join( - training_input_base_path, "input" + training_input_image_source, aoi_serializer, serialized_field = prepare_data( + training_instance, dataset_id, feedback, zoom_level, source_imagery ) - if os.path.exists(training_input_image_source): # always build dataset - shutil.rmtree(training_input_image_source) - os.makedirs(training_input_image_source) - if feedback: - try: - aois = FeedbackAOI.objects.filter(training=feedback) - aoi_serializer = FeedbackAOISerializer(aois, many=True) - - except FeedbackAOI.DoesNotExist: - raise ValueError( - f"No Feedback AOI is attached with supplied training id:{dataset_id}, Create AOI first", - ) - else: - try: - aois = AOI.objects.filter(dataset=dataset_id) - aoi_serializer = AOISerializer(aois, many=True) - - except AOI.DoesNotExist: - raise ValueError( - f"No AOI is attached with supplied dataset id:{dataset_id}, Create AOI first", - ) - first_aoi_centroid = aois[0].geom.centroid - training_instance.centroid = first_aoi_centroid - training_instance.save() - - for obj in aois: - bbox_coords = bbox(obj.geom.coords[0]) - for z in zoom_level: - zm_level = z - print( - f"""Running Download process for - aoi : {obj.id} - dataset : {dataset_id} , zoom : {zm_level}""" - ) - try: - tile_size = DEFAULT_TILE_SIZE # by default - - start, end = get_start_end_download_coords( - bbox_coords, zm_level, tile_size - ) - # start downloading - download_imagery( - start, - end, - zm_level, - base_path=training_input_image_source, - source=source_imagery, - ) - - except Exception as ex: - raise ex - if is_dir_empty(training_input_image_source): - raise ValueError("No images found in the area") - - ## -----------LABEL GENERATOR--------- - logging.info("Label Generator started") - aoi_list = [r.id for r in aois] - logging.info(aoi_list) - - if feedback: - label = FeedbackLabel.objects.filter(feedback_aoi__in=aoi_list) - logging.info(label) - - serialized_field = FeedbackLabelFileSerializer(label, many=True) - else: - label = Label.objects.filter(aoi__in=aoi_list) - serialized_field = LabelFileSerializer(label, many=True) - - with open( - os.path.join(training_input_image_source, "labels.geojson"), - "w", - encoding="utf-8", - ) as f: - f.write(json.dumps(serialized_field.data)) - - ## --------- Data Preparation ---------- - base_path = os.path.join(settings.RAMP_HOME, "ramp-data", str(dataset_id)) - # Check if the path exists - if os.path.exists(base_path): - # Delete the directory and its contents - rmtree(base_path) - destination_image_input = os.path.join(base_path, "input") - - logging.info(training_input_image_source) - if not os.path.exists(training_input_image_source): - raise ValueError( - "Training folder has not been created at , Build the dataset first /dataset/build/" - ) - if os.path.exists(destination_image_input): - shutil.rmtree(destination_image_input) - shutil.copytree(training_input_image_source, destination_image_input) - - # preprocess - model_input_image_path = f"{base_path}/input" - preprocess_output = f"/{base_path}/preprocessed" - - if multimasks: - logger.info( - "Using multiple masks for training : background, footprint, boundary, contact" + if model_instance.base_model in ("YOLO_V8_V1", "YOLO_V8_V2"): + response = yolo_model_training( + training_instance, + dataset_id, + training_input_image_source, + serialized_field, + aoi_serializer, + epochs, + batch_size, + multimasks, + model=model_instance.base_model, ) else: - logger.info("Using binary masks for training : background, footprint") - preprocess( - input_path=model_input_image_path, - output_path=preprocess_output, - rasterize=True, - rasterize_options=["binary"], - georeference_images=True, - multimasks=multimasks, - input_contact_spacing=input_contact_spacing, - input_boundary_width=input_boundary_width, - ) - training_instance.chips_length = get_file_count( - os.path.join(preprocess_output, "chips") - ) - training_instance.save() - - # train - - train_output = f"{base_path}/train" - if feedback: - final_accuracy, final_model_path = run_feedback( - input_path=preprocess_output, - output_path=train_output, - feedback_base_model=os.path.join( - settings.TRAINING_WORKSPACE, - f"dataset_{dataset_id}", - "output", - f"training_{feedback}", - "checkpoint.tf", - ), - model_home=os.environ["RAMP_HOME"], - epoch_size=epochs, - multimasks=multimasks, - batch_size=batch_size, - freeze_layers=freeze_layers, + response = ramp_model_training( + training_instance, + dataset_id, + training_input_image_source, + serialized_field, + aoi_serializer, + epochs, + batch_size, + freeze_layers, + multimasks, + input_contact_spacing, + input_boundary_width, ) - else: - final_accuracy, final_model_path = train( - input_path=preprocess_output, - output_path=train_output, - epoch_size=epochs, - batch_size=batch_size, - model="ramp", - model_home=os.environ["RAMP_HOME"], - freeze_layers=freeze_layers, - multimasks=multimasks, - ) - - # copy final model to output - output_path = os.path.join( - training_input_base_path, "output", f"training_{training_id}" - ) - if os.path.exists(output_path): - shutil.rmtree(output_path) - shutil.copytree( - final_model_path, os.path.join(output_path, "checkpoint.tf") - ) - - # shutil.copytree( - # preprocess_output, os.path.join(output_path, "preprocessed") - # ) - - graph_output_path = f"{base_path}/train/graphs" - shutil.copytree(graph_output_path, os.path.join(output_path, "graphs")) - - # convert model to hdf5 for faster inference - model = tf.keras.models.load_model( - os.path.join(output_path, "checkpoint.tf") - ) - # Save the model in HDF5 format - model.save(os.path.join(output_path, "checkpoint.h5")) - - logger.info(model.inputs) - logger.info(model.outputs) - - # Convert the model to tflite for android/ios. - converter = tf.lite.TFLiteConverter.from_keras_model(model) - tflite_model = converter.convert() - - # Save the model. - with open(os.path.join(output_path, "checkpoint.tflite"), "wb") as f: - f.write(tflite_model) - - # dump labels to output folder as well - with open( - os.path.join(output_path, "labels.geojson"), - "w", - encoding="utf-8", - ) as f: - f.write(json.dumps(serialized_field.data)) - - # dump used aois as featurecollection in output - with open( - os.path.join(output_path, "aois.geojson"), - "w", - encoding="utf-8", - ) as f: - f.write(json.dumps(aoi_serializer.data)) - - tippecanoe_command = f"""tippecanoe -o {os.path.join(output_path,"meta.pmtiles")} -Z7 -z18 -L aois:{ os.path.join(output_path, "aois.geojson")} -L labels:{os.path.join(output_path, "labels.geojson")} --force --read-parallel -rg --drop-densest-as-needed""" - logging.info("Starting to generate vector tiles for aois and labels") - try: - result = subprocess.run( - tippecanoe_command, shell=True, check=True, capture_output=True - ) - logging.info(result.stdout.decode("utf-8")) - except subprocess.CalledProcessError as ex: - logger.error(ex.output) - raise ex - logging.info("Vector tile generation done !") - - # copy aois and labels to preprocess output before compressing it to tar - shutil.copyfile( - os.path.join(output_path, "aois.geojson"), - os.path.join(preprocess_output, "aois.geojson"), - ) - shutil.copyfile( - os.path.join(output_path, "labels.geojson"), - os.path.join(preprocess_output, "labels.geojson"), - ) - xz_folder( - preprocess_output, - os.path.join(output_path, "preprocessed.tar.xz"), - remove_original=True, - ) - # now remove the ramp-data all our outputs are copied to our training workspace - shutil.rmtree(base_path) - training_instance.accuracy = float(final_accuracy) - training_instance.finished_at = timezone.now() - training_instance.status = "FINISHED" - training_instance.save() - response = {} - response["accuracy"] = float(final_accuracy) - response["tiles_path"] = os.path.join(output_path, "meta.pmtiles") - response["model_path"] = os.path.join(output_path, "checkpoint.h5") - response["graph_path"] = os.path.join(output_path, "graphs") - sys.stdout = sys.__stdout__ - logger.info(f"Training task {training_id} completed successfully") - return response + logger.info(f"Training task {training_id} completed successfully") + return response except Exception as ex: training_instance.status = "FAILED" diff --git a/backend/core/views.py b/backend/core/views.py index 8bbd2d35..fa01b834 100644 --- a/backend/core/views.py +++ b/backend/core/views.py @@ -32,7 +32,6 @@ from geojson2osm import geojson2osm from login.authentication import OsmAuthentication from login.permissions import IsAdminUser, IsOsmAuthenticated, IsStaffUser -from orthogonalizer import othogonalize_poly from osmconflator import conflate_geojson from rest_framework import decorators, filters, serializers, status, viewsets from rest_framework.decorators import api_view @@ -89,7 +88,22 @@ class DatasetViewSet( permission_classes = [IsOsmAuthenticated] public_methods = ["GET"] queryset = Dataset.objects.all() + filter_backends = ( + DjangoFilterBackend, + filters.SearchFilter, + filters.OrderingFilter, + ) serializer_class = DatasetSerializer # connecting serializer + filterset_fields = { + "status": ["exact"], + "created_at": ["exact", "gt", "gte", "lt", "lte"], + "last_modified": ["exact", "gt", "gte", "lt", "lte"], + "user": ["exact"], + "id": ["exact"], + "source_imagery": ["exact"], + } + ordering_fields = ["created_at", "last_modified", "id", "status"] + search_fields = ["name", "id"] class TrainingSerializer( @@ -195,8 +209,15 @@ class TrainingViewSet( public_methods = ["GET"] queryset = Training.objects.all() http_method_names = ["get", "post", "delete"] + filter_backends = ( + DjangoFilterBackend, + filters.SearchFilter, + filters.OrderingFilter, + ) serializer_class = TrainingSerializer # connecting serializer filterset_fields = ["model", "status"] + ordering_fields = ["finished_at", "accuracy", "id", "model", "status"] + search_fields = ["description", "id"] def retrieve(self, request, *args, **kwargs): instance = self.get_object() @@ -304,6 +325,7 @@ class AOIViewSet(viewsets.ModelViewSet): authentication_classes = [OsmAuthentication] permission_classes = [IsOsmAuthenticated] public_methods = ["GET"] + authenticated_user_allowed_methods = ["POST", "DELETE"] queryset = AOI.objects.all() serializer_class = AOISerializer # connecting serializer filter_backends = [DjangoFilterBackend] @@ -317,9 +339,10 @@ class LabelViewSet(viewsets.ModelViewSet): queryset = Label.objects.all() serializer_class = LabelSerializer # connecting serializer bbox_filter_field = "geom" + pagination_class = None filter_backends = ( InBBoxFilter, # it will take bbox like this api/v1/label/?in_bbox=-90,29,-89,35 , - # TMSTileFilter, # will serve as tms tiles https://wiki.openstreetmap.org/wiki/TMS , use like this ?tile=8/100/200 z/x/y which is equivalent to filtering on the bbox (-39.37500,-71.07406,-37.96875,-70.61261) # Note that the tile address start in the upper left, not the lower left origin used by some implementations. + TMSTileFilter, # will serve as tms tiles https://wiki.openstreetmap.org/wiki/TMS , use like this ?tile=8/100/200 z/x/y which is equivalent to filtering on the bbox (-39.37500,-71.07406,-37.96875,-70.61261) # Note that the tile address start in the upper left, not the lower left origin used by some implementations. DjangoFilterBackend, ) bbox_filter_include_overlapping = ( @@ -533,9 +556,9 @@ def run_task_status(request, run_id: str): log_file = os.path.join(settings.LOG_PATH, f"run_{run_id}_log.txt") try: # read the last 10 lines of the log file - output = subprocess.check_output(["tail", "-n", "10", log_file]).decode( - "utf-8" - ) + output = subprocess.check_output( + ["tail", "-n", settings.LOG_LINE_STREAM_TRUNCATE_VALUE, log_file] + ).decode("utf-8") except Exception as e: output = str(e) result = { @@ -621,6 +644,7 @@ def post(self, request, *args, **kwargs): DEFAULT_TILE_SIZE = 256 if settings.ENABLE_PREDICTION_API: +from orthogonalizer import othogonalize_poly class PredictionView(APIView): authentication_classes = [OsmAuthentication] @@ -727,15 +751,20 @@ def post(self, request, *args, **kwargs): def publish_training(request, training_id: int): """Publishes training for model""" training_instance = get_object_or_404(Training, id=training_id) + model_instance = get_object_or_404(Model, id=training_instance.model.id) if training_instance.status != "FINISHED": return Response("Training is not FINISHED", status=409) - if training_instance.accuracy < 70: - return Response( - "Can't publish the training since its accuracy is below 70%", status=403 - ) - - model_instance = get_object_or_404(Model, id=training_instance.model.id) + if model_instance.base_model == "RAMP": + if training_instance.accuracy < 70: + return Response( + "Can't publish the training since its accuracy is below 70%", status=403 + ) + else: ## Training publish limit for other model than ramp , TODO : Change this limit after testing for yolo + if training_instance.accuracy < 5: + return Response( + "Can't publish the training since its accuracy is below 5%", status=403 + ) # Check if the current user is the owner of the model if model_instance.user != request.user: @@ -829,7 +858,7 @@ class TrainingWorkspaceDownloadView(APIView): def dispatch(self, request, *args, **kwargs): lookup_dir = kwargs.get("lookup_dir") - if lookup_dir.endswith("training_validation_sparse_categorical_accuracy.png"): + if lookup_dir.endswith("training_accuracy.png"): # bypass self.authentication_classes = [] self.permission_classes = [] diff --git a/backend/login/permissions.py b/backend/login/permissions.py index b40c1390..d33a5549 100644 --- a/backend/login/permissions.py +++ b/backend/login/permissions.py @@ -12,10 +12,8 @@ def has_permission(self, request, view): return True if request.user and request.user.is_authenticated: - # Global access - if request.user.is_staff or request.user.is_superuser: - return True - + # if request.user.is_staff or request.user.is_superuser: + # return True return True return False @@ -29,14 +27,18 @@ def has_object_permission(self, request, view, obj): if request.user.is_staff or request.user.is_superuser: return True ## if the object it is trying to access has user info + if request.user and request.user.is_authenticated: + authenticated_user_allowed_methods = getattr( + view, "authenticated_user_allowed_methods", [] + ) + if request.method in authenticated_user_allowed_methods: + return True + if hasattr(obj, "user"): # in order to change it it needs to be in his/her name if obj.user == request.user: return True - else: - if request.method == "POST": - # if object doesn't have user in it then he has permission to access the object , considered as common object - return True + return False diff --git a/backend/pyproject.toml b/backend/pyproject.toml index e1eb0f5b..096cd654 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -25,8 +25,7 @@ dependencies = [ "gpxpy==1.5.0", "geojson2osm==0.0.1", "osmconflator==0.0.9", - "orthogonalizer==0.0.4", - "fairpredictor==0.0.26", + "fairpredictor==0.0.36", "tflite-runtime==2.14.0", "hot-fair-utilities==1.2.3", ] diff --git a/backend/requirements.txt b/backend/requirements.txt index d7b4e8cc..31e3b2b4 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,4 +1,4 @@ -r api-requirements.txt -hot-fair-utilities==1.3.0 +hot-fair-utilities==2.0.6 tflite-runtime==2.14.0 tippecanoe==2.45.0 \ No newline at end of file