pyronear · MateoLostanlen · Aug 16, 2023 · Apr 7, 2023 · Apr 7, 2023 · Apr 19, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -15,6 +15,9 @@ COPY ./pyproject.toml /tmp/pyproject.toml
 COPY ./README.md /tmp/README.md
 COPY ./setup.py /tmp/setup.py
 
+# install git
+RUN apt-get update && apt-get install git -y
+
 COPY ./src/requirements.txt /tmp/requirements.txt
 RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y\
     && pip install --upgrade pip setuptools wheel \

diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
     "Pillow>=8.4.0",
     "onnxruntime>=1.10.0,<2.0.0",
     "numpy>=1.19.5,<2.0.0",
-    "pyroclient>=0.1.2",
+    "pyroclient @ git+https://github.com/pyronear/pyro-api.git@main#egg=pkg&subdirectory=client",
     "requests>=2.20.0,<3.0.0",
     "opencv-python==4.5.5.64",
 ]

diff --git a/pyroengine/engine.py b/pyroengine/engine.py
@@ -15,12 +15,15 @@
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple
 
+import cv2
 import numpy as np
 from PIL import Image
 from pyroclient import client
 from requests.exceptions import ConnectionError
 from requests.models import Response
 
+from pyroengine.utils import box_iou, nms
+
 from .vision import Classifier
 
 __all__ = ["Engine"]
@@ -97,7 +100,7 @@ def __init__(
         cam_creds: Optional[Dict[str, Dict[str, str]]] = None,
         latitude: Optional[float] = None,
         longitude: Optional[float] = None,
-        alert_relaxation: int = 3,
+        nb_consecutive_frames: int = 4,
         frame_size: Optional[Tuple[int, int]] = None,
         cache_backup_period: int = 60,
         frame_saving_period: Optional[int] = None,
@@ -127,7 +130,7 @@ def __init__(
 
         # Cache & relaxation
         self.frame_saving_period = frame_saving_period
-        self.alert_relaxation = alert_relaxation
+        self.nb_consecutive_frames = nb_consecutive_frames
         self.frame_size = frame_size
         self.jpeg_quality = jpeg_quality
         self.cache_backup_period = cache_backup_period
@@ -138,11 +141,24 @@ def __init__(
 
         # Var initialization
         self._states: Dict[str, Dict[str, Any]] = {
-            "-1": {"consec": 0, "frame_count": 0, "ongoing": False},
+            "-1": {"last_predictions": deque([], self.nb_consecutive_frames), "frame_count": 0, "ongoing": False},
         }
         if isinstance(cam_creds, dict):
             for cam_id in cam_creds:
-                self._states[cam_id] = {"consec": 0, "frame_count": 0, "ongoing": False}
+                self._states[cam_id] = {
+                    "last_predictions": deque([], self.nb_consecutive_frames),
+                    "frame_count": 0,
+                    "ongoing": False,
+                }
+
+        self.occlusion_masks = {"-1": None}
+        if isinstance(cam_creds, dict):
+            for cam_id in cam_creds:
+                mask_file = cache_folder + "/occlusion_masks/" + cam_id + ".jpg"
+                if os.path.isfile(mask_file):
+                    self.occlusion_masks[cam_id] = cv2.imread(mask_file, 0)
+                else:
+                    self.occlusion_masks[cam_id] = None
 
         # Restore pending alerts cache
         self._alerts: deque = deque([], cache_size)
@@ -153,7 +169,7 @@ def __init__(
 
     def clear_cache(self) -> None:
         """Clear local cache"""
-        for file in self._cache.rglob("*"):
+        for file in self._cache.rglob("pending*"):
             file.unlink()
 
     def _dump_cache(self) -> None:
@@ -178,6 +194,7 @@ def _dump_cache(self) -> None:
                     "frame_path": str(self._cache.joinpath(f"pending_frame{idx}.jpg")),
                     "cam_id": info["cam_id"],
                     "ts": info["ts"],
+                    "localization": info["localization"],
                 }
             )
 
@@ -202,27 +219,53 @@ def heartbeat(self, cam_id: str) -> Response:
         """Updates last ping of device"""
         return self.api_client[cam_id].heartbeat()
 
-    def _update_states(self, conf: float, cam_key: str) -> bool:
+    def _update_states(self, frame: Image.Image, preds: np.array, cam_key: str) -> bool:
         """Updates the detection states"""
-        # Detection
-        if conf >= self.conf_thresh:
-            # Don't increment beyond relaxation
-            if not self._states[cam_key]["ongoing"] and self._states[cam_key]["consec"] < self.alert_relaxation:
-                self._states[cam_key]["consec"] += 1
 
-            if self._states[cam_key]["consec"] == self.alert_relaxation:
-                self._states[cam_key]["ongoing"] = True
+        conf_th = self.conf_thresh * self.nb_consecutive_frames
+        # Reduce threshold once we are in alert mode to collect more data
+        if self._states[cam_key]["ongoing"]:
+            conf_th *= 0.8
+
+        # Get last predictions
+        boxes = np.zeros((0, 5))
+        boxes = np.concatenate([boxes, preds])
+        for _, box, _, _, _ in self._states[cam_key]["last_predictions"]:
+            if box.shape[0] > 0:
+                boxes = np.concatenate([boxes, box])
+
+        conf = 0
+        output_predictions = np.zeros((0, 5))
+        # Get the best ones
+        if boxes.shape[0]:
+            best_boxes = nms(boxes)
+            ious = box_iou(best_boxes[:, :4], boxes[:, :4])
+            best_boxes_scores = np.array([sum(boxes[iou > 0, 4]) for iou in ious.T])
+            combine_predictions = best_boxes[best_boxes_scores > conf_th, :]
+            conf = np.max(best_boxes_scores) / self.nb_consecutive_frames
+
+            # if current predictions match with combine predictions send match else send combine predcition
+            ious = box_iou(combine_predictions[:, :4], preds[:, :4])
+            if np.sum(ious) > 0:
+                output_predictions = preds
+            else:
+                output_predictions = combine_predictions
+
+            # Limit bbox size in api
+            output_predictions = np.round(output_predictions, 3)  # max 3 digit
+            output_predictions = output_predictions[:5, :]  # max 5 bbox
 
-            return self._states[cam_key]["ongoing"]
-        # No wildfire
+        self._states[cam_key]["last_predictions"].append(
+            (frame, preds, str(json.dumps(output_predictions.tolist())), datetime.utcnow().isoformat(), False)
+        )
+
+        # update state
+        if conf > self.conf_thresh:
+            self._states[cam_key]["ongoing"] = True
         else:
-            if self._states[cam_key]["consec"] > 0:
-                self._states[cam_key]["consec"] -= 1
-            # Consider event as finished
-            if self._states[cam_key]["consec"] == 0:
-                self._states[cam_key]["ongoing"] = False
+            self._states[cam_key]["ongoing"] = False
 
-        return False
+        return conf
 
     def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float:
         """Computes the confidence that the image contains wildfire cues
@@ -245,23 +288,31 @@ def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float:
         # Reduce image size to save bandwidth
         if isinstance(self.frame_size, tuple):
             frame_resize = frame.resize(self.frame_size[::-1], Image.BILINEAR)
+        else:
+            frame_resize = frame
 
         if is_day_time(self._cache, frame, self.day_time_strategy):
             # Inference with ONNX
-            pred = float(self.model(frame.convert("RGB")))
+            preds = self.model(frame.convert("RGB"), self.occlusion_masks[cam_key])
+            conf = self._update_states(frame_resize, preds, cam_key)
+
             # Log analysis result
             device_str = f"Camera '{cam_id}' - " if isinstance(cam_id, str) else ""
-            pred_str = "Wildfire detected" if pred >= self.conf_thresh else "No wildfire"
-            logging.info(f"{device_str}{pred_str} (confidence: {pred:.2%})")
+            pred_str = "Wildfire detected" if conf > self.conf_thresh else "No wildfire"
+            logging.info(f"{device_str}{pred_str} (confidence: {conf:.2%})")
 
             # Alert
-
-            to_be_staged = self._update_states(pred, cam_key)
-            if to_be_staged and len(self.api_client) > 0 and isinstance(cam_id, str):
+            if conf > self.conf_thresh and len(self.api_client) > 0 and isinstance(cam_id, str):
                 # Save the alert in cache to avoid connection issues
-                self._stage_alert(frame_resize, cam_id)
+                for idx, (frame, preds, localization, ts, is_staged) in enumerate(
+                    self._states[cam_key]["last_predictions"]
+                ):
+                    if not is_staged:
+                        self._stage_alert(frame, cam_id, ts, localization)
+                        self._states[cam_key]["last_predictions"][idx] = frame, preds, localization, ts, True
+
         else:
-            pred = 0  # return default value
+            conf = 0  # return default value
 
         # Uploading pending alerts
         if len(self._alerts) > 0:
@@ -289,7 +340,7 @@ def predict(self, frame: Image.Image, cam_id: Optional[str] = None) -> float:
                 except ConnectionError:
                     stream.seek(0)  # "Rewind" the stream to the beginning so we can read its content
 
-        return pred
+        return float(conf)
 
     def _upload_frame(self, cam_id: str, media_data: bytes) -> Response:
         """Save frame"""
@@ -303,15 +354,16 @@ def _upload_frame(self, cam_id: str, media_data: bytes) -> Response:
 
         return response
 
-    def _stage_alert(self, frame: Image.Image, cam_id: str) -> None:
+    def _stage_alert(self, frame: Image.Image, cam_id: str, ts: int, localization: str) -> None:
         # Store information in the queue
         self._alerts.append(
             {
                 "frame": frame,
                 "cam_id": cam_id,
-                "ts": datetime.utcnow().isoformat(),
+                "ts": ts,
                 "media_id": None,
                 "alert_id": None,
+                "localization": localization,
             }
         )
 
@@ -335,9 +387,10 @@ def _process_alerts(self) -> None:
                     self._alerts[0]["alert_id"] = (
                         self.api_client[cam_id]
                         .send_alert_from_device(
-                            self.latitude,
-                            self.longitude,
-                            self._alerts[0]["media_id"],
+                            lat=self.latitude,
+                            lon=self.longitude,
+                            media_id=self._alerts[0]["media_id"],
+                            # localization=self._alerts[0]["localization"],
                         )
                         .json()["id"]
                     )

diff --git a/pyroengine/utils.py b/pyroengine/utils.py
@@ -7,10 +7,21 @@
 import cv2
 import numpy as np
 
-__all__ = ["letterbox"]
+__all__ = ["letterbox", "nms", "xywh2xyxy"]
 
 
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, stride=32):
+def xywh2xyxy(x: np.array):
+    y = np.copy(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
+    return y
+
+
+def letterbox(
+    im: np.array, new_shape: tuple = (640, 640), color: tuple = (114, 114, 114), auto: bool = False, stride: int = 32
+):
     """Letterbox image transform for yolo models
     Args:
         im (np.array): Input image
@@ -51,3 +62,50 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, stride
     im_b[top : top + h, left : left + w, :] = im
 
     return im_b.astype("uint8")
+
+
+def box_iou(box1: np.array, box2: np.array, eps: float = 1e-7):
+    """
+    Calculate intersection-over-union (IoU) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+
+    Args:
+        box1 (np.array): A numpy array of shape (N, 4) representing N bounding boxes.
+        box2 (np.array): A numpy array of shape (M, 4) representing M bounding boxes.
+        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
+
+    Returns:
+        (np.array): An NxM numpy array containing the pairwise IoU values for every element in box1 and box2.
+    """
+
+    (a1, a2), (b1, b2) = np.split(box1, 2, 1), np.split(box2, 2, 1)
+    inter = (np.minimum(a2, b2[:, None, :]) - np.maximum(a1, b1[:, None, :])).clip(0).prod(2)
+
+    # IoU = inter / (area1 + area2 - inter)
+    return inter / ((a2 - a1).prod(1) + (b2 - b1).prod(1)[:, None] - inter + eps)
+
+
+def nms(boxes: np.array, overlapThresh: int = 0):
+    """Non maximum suppression
+
+    Args:
+        boxes (np.array): A numpy array of shape (N, 4) representing N bounding boxes in (x1, y1, x2, y2, conf) format
+        overlapThresh (int, optional): iou threshold. Defaults to 0.
+
+    Returns:
+        boxes: Boxes after NMS
+    """
+    # Return an empty list, if no boxes given
+    boxes = boxes[boxes[:, -1].argsort()]
+    if len(boxes) == 0:
+        return []
+
+    indices = np.arange(len(boxes))
+    rr = box_iou(boxes[:, :4], boxes[:, :4])
+    for i, box in enumerate(boxes):
+        temp_indices = indices[indices != i]
+        if np.any(rr[i, temp_indices] > overlapThresh):
+            indices = indices[indices != i]
+
+    return boxes[indices]