Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug Fix : Dataset Size #251

Merged
merged 7 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,27 @@ This project was bootstrapped with [Geodjango Template](https://github.com/itsk
source ./env/bin/activate

##### Setup Basemodels (Ramp Supported Currently)
- Install git lfs
```bash
sudo apt-get install git-lfs
```

- Clone Ramp Basemodel
```
git clone https://github.com/radiantearth/model_ramp_baseline.git
```
OR Download from google drive
```
pip install gdown
gdown --fuzzy https://drive.google.com/file/d/1wvJhkiOrSlHmmvJ0avkAdu9sslFf5_I0/view?usp=sharing
```

- Clone Ramp - Code
Note: This clone location will be your RAMP_HOME
```
git clone https://github.com/kshitijrajsharma/ramp-code-fAIr.git ramp-code
```

- Copy Basemodel checkpoint to ramp-code
```
cp -r model_ramp_baseline/data/input/checkpoint.tf ramp-code/ramp/checkpoint.tf
```

Our Basemodel is available for public download [here](https://drive.google.com/file/d/1wvJhkiOrSlHmmvJ0avkAdu9sslFf5_I0/view?usp=sharing)

You can unzip and move the downloaded basemodel
```
unzip checkpoint.tf.zip -d ramp-code/ramp
```


- Remove basemodel repo we don't need it anymore
```
Expand Down Expand Up @@ -136,11 +130,7 @@ pip install -r requirements.txt
You will need more env variables (Such as Ramp home, Training Home) that can be found on ```.sample_env```

#### Now change your username, password and db name in settings.py accordingly to your database
python manage.py makemigrations login
python manage.py migrate login
python manage.py makemigrations core
python manage.py migrate core
python manage.py makemigrations
python manage.py makemigrations login core
python manage.py migrate
python manage.py runserver
### Now server will be available in your 8000 port on web, you can check out your localhost:8000/admin for admin panel
Expand Down
23 changes: 23 additions & 0 deletions backend/api-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
django==4.1.4
# gdal==3.6.2
psycopg2
djangorestframework==3.14.0
djangorestframework-gis==1.0
dj-database-url==1.2.0
django-leaflet==0.28.3
drf-yasg==1.21.4
django-environ==0.9.0 # used for environment
django-filter==22.1
django-cors-headers==3.13.0 # used for enabling cors when frontend is hosted on different server / origin
osm-login-python==0.0.2
celery==5.2.7
redis==4.4.0
django_celery_results==2.4.0
flower==1.2.0
validators==0.20.0
gpxpy==1.5.0
geojson2osm==0.0.1
osmconflator==0.0.9
orthogonalizer==0.0.4
fairpredictor==0.0.26
tflite-runtime==2.14.0
1 change: 1 addition & 0 deletions backend/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class Training(models.Model):
finished_at = models.DateTimeField(null=True, blank=True)
accuracy = models.FloatField(null=True, blank=True)
epochs = models.PositiveIntegerField()
chips_length = models.PositiveIntegerField(default=0)
batch_size = models.PositiveIntegerField()
freeze_layers = models.BooleanField(default=False)

Expand Down
58 changes: 43 additions & 15 deletions backend/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,14 @@
import os
import shutil
import sys
import tarfile
import traceback
from shutil import rmtree
import tarfile

import hot_fair_utilities
import ramp.utils
import tensorflow as tf
from celery import shared_task
from django.conf import settings
from django.contrib.gis.db.models.aggregates import Extent
from django.contrib.gis.geos import GEOSGeometry
from django.shortcuts import get_object_or_404
from django.utils import timezone
from hot_fair_utilities import preprocess, train
from hot_fair_utilities.training import run_feedback
from predictor import download_imagery, get_start_end_download_coords

from core.models import AOI, Feedback, FeedbackAOI, FeedbackLabel, Label, Training
from core.serializers import (
AOISerializer,
Expand All @@ -29,6 +20,14 @@
LabelFileSerializer,
)
from core.utils import bbox, is_dir_empty
from django.conf import settings
from django.contrib.gis.db.models.aggregates import Extent
from django.contrib.gis.geos import GEOSGeometry
from django.shortcuts import get_object_or_404
from django.utils import timezone
from hot_fair_utilities import preprocess, train
from hot_fair_utilities.training import run_feedback
from predictor import download_imagery, get_start_end_download_coords

logger = logging.getLogger(__name__)

Expand All @@ -37,6 +36,7 @@

DEFAULT_TILE_SIZE = 256


def xz_folder(folder_path, output_filename, remove_original=False):
"""
Compresses a folder and its contents into a .tar.xz file and optionally removes the original folder.
Expand All @@ -47,8 +47,8 @@ def xz_folder(folder_path, output_filename, remove_original=False):
- remove_original: If True, the original folder is removed after compression.
"""

if not output_filename.endswith('.tar.xz'):
output_filename += '.tar.xz'
if not output_filename.endswith(".tar.xz"):
output_filename += ".tar.xz"

with tarfile.open(output_filename, "w:xz") as tar:
tar.add(folder_path, arcname=os.path.basename(folder_path))
Expand All @@ -57,6 +57,20 @@ def xz_folder(folder_path, output_filename, remove_original=False):
shutil.rmtree(folder_path)


def get_file_count(path):
try:
return len(
[
entry
for entry in os.listdir(path)
if os.path.isfile(os.path.join(path, entry))
]
)
except Exception as e:
print(f"An error occurred: {e}")
return 0


@shared_task
def train_model(
dataset_id,
Expand Down Expand Up @@ -189,6 +203,10 @@ def train_model(
rasterize_options=["binary"],
georeference_images=True,
)
training_instance.chips_length = get_file_count(
os.path.join(preprocess_output, "chips")
)
training_instance.save()

# train

Expand Down Expand Up @@ -272,9 +290,19 @@ def train_model(
f.write(json.dumps(aoi_serializer.data))

# copy aois and labels to preprocess output before compressing it to tar
shutil.copyfile(os.path.join(output_path, "aois.geojson"), os.path.join(preprocess_output,'aois.geojson'))
shutil.copyfile(os.path.join(output_path, "labels.geojson"), os.path.join(preprocess_output,'labels.geojson'))
xz_folder(preprocess_output, os.path.join(output_path, "preprocessed.tar.xz"), remove_original=True)
shutil.copyfile(
os.path.join(output_path, "aois.geojson"),
os.path.join(preprocess_output, "aois.geojson"),
)
shutil.copyfile(
os.path.join(output_path, "labels.geojson"),
os.path.join(preprocess_output, "labels.geojson"),
)
xz_folder(
preprocess_output,
os.path.join(output_path, "preprocessed.tar.xz"),
remove_original=True,
)

# now remove the ramp-data all our outputs are copied to our training workspace
shutil.rmtree(base_path)
Expand Down
4 changes: 2 additions & 2 deletions backend/core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# now import the views.py file into this code
from .views import (
AOIViewSet,
APIStatus,
# APIStatus,
ConflateGeojson,
DatasetViewSet,
FeedbackAOIViewset,
Expand Down Expand Up @@ -52,7 +52,7 @@
path("training/publish/<int:training_id>/", publish_training),
path("prediction/", PredictionView.as_view()),
path("feedback/training/submit/", FeedbackView.as_view()),
path("status/", APIStatus.as_view()),
# path("status/", APIStatus.as_view()),
path("geojson2osm/", geojson2osmconverter, name="geojson2osmconverter"),
path("conflate/", ConflateGeojson, name="Conflate Geojson"),
path("aoi/gpx/<int:aoi_id>/", GenerateGpxView.as_view()),
Expand Down
25 changes: 12 additions & 13 deletions backend/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from datetime import datetime
from tempfile import NamedTemporaryFile

import tensorflow as tf
# import tensorflow as tf
from celery import current_app
from celery.result import AsyncResult
from django.conf import settings
Expand Down Expand Up @@ -127,7 +127,7 @@ def create(self, validated_data):
validated_data["created_by"] = user
# create the model instance
instance = Training.objects.create(**validated_data)
logging.info("Sending record to redis queue")

# run your function here
task = train_model.delay(
dataset_id=instance.model.dataset.id,
Expand Down Expand Up @@ -471,7 +471,6 @@ def post(self, request, *args, **kwargs):
batch_size=batch_size,
source_imagery=training_instance.source_imagery,
)

task = train_model.delay(
dataset_id=instance.model.dataset.id,
training_id=instance.id,
Expand Down Expand Up @@ -614,16 +613,16 @@ def publish_training(request, training_id: int):
return Response("Training Published", status=status.HTTP_201_CREATED)


class APIStatus(APIView):
def get(self, request):
res = {
"tensorflow_version": tf.__version__,
"No of GPU Available": len(
tf.config.experimental.list_physical_devices("GPU")
),
"API Status": "Healthy", # static for now should be dynamic TODO
}
return Response(res, status=status.HTTP_200_OK)
# class APIStatus(APIView):
# def get(self, request):
# res = {
# "tensorflow_version": tf.__version__,
# "No of GPU Available": len(
# tf.config.experimental.list_physical_devices("GPU")
# ),
# "API Status": "Healthy", # static for now should be dynamic TODO
# }
# return Response(res, status=status.HTTP_200_OK)


class GenerateGpxView(APIView):
Expand Down
25 changes: 1 addition & 24 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,2 @@
django==4.1.4
# gdal
psycopg2
djangorestframework==3.14.0
djangorestframework-gis==1.0
dj-database-url==1.2.0
django-leaflet==0.28.3
drf-yasg==1.21.4
Pillow
django-environ==0.9.0 # used for environment
django-filter==22.1
django-cors-headers==3.13.0 # used for enabling cors when frontend is hosted on different server / origin
osm-login-python==0.0.2
celery==5.2.7
redis==4.4.0
django_celery_results==2.4.0
flower==1.2.0
validators==0.20.0
gpxpy==1.5.0
-r api-requirements.txt
hot-fair-utilities==1.2.3
geojson2osm==0.0.1
osmconflator
orthogonalizer
fairpredictor==0.0.26
tflite-runtime==2.14.0
1 change: 1 addition & 0 deletions backend/sample_env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
DEBUG=True
SECRET_KEY=yl2w)c0boi_ma-1v5)935^2#&m*r!1s9z9^*9e5co^08_ixzo6
DATABASE_URL=postgis://admin:password@localhost:5432/ai
EXPORT_TOOL_API_URL=MY_RAW_DATA_URL
Expand Down