Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into drop_big_detections
Browse files Browse the repository at this point in the history
  • Loading branch information
MateoLostanlen committed Jan 9, 2025
2 parents c4e8c4e + ccbd96b commit 0fdd361
Showing 1 changed file with 30 additions and 21 deletions.
51 changes: 30 additions & 21 deletions pyroengine/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Optional, Tuple
from urllib.request import urlretrieve

import ncnn
import ncnn # type: ignore
import numpy as np
import onnxruntime
from huggingface_hub import HfApi # type: ignore[import-untyped]
Expand Down Expand Up @@ -148,67 +148,76 @@ def prep_process(self, pil_img: Image.Image) -> Tuple[np.ndarray, Tuple[int, int
- The resized and normalized image of shape (1, C, H, W).
- Padding information as a tuple of integers (pad_height, pad_width).
"""

np_img, pad = letterbox(np.array(pil_img), self.imgsz) # Applies letterbox resize with padding

if self.format == "ncnn":
np_img = ncnn.Mat.from_pixels(np_img, ncnn.Mat.PixelType.PIXEL_BGR, np_img.shape[1], np_img.shape[0])
mean = [0, 0, 0]
std = [1 / 255, 1 / 255, 1 / 255]
np_img.substract_mean_normalize(mean=mean, norm=std)

else:

np_img = np.expand_dims(np_img.astype("float"), axis=0) # Add batch dimension
np_img = np.expand_dims(np_img.astype("float32"), axis=0) # Add batch dimension
np_img = np.ascontiguousarray(np_img.transpose((0, 3, 1, 2))) # Convert from BHWC to BCHW format
np_img = np_img.astype("float32") / 255 # Normalize to [0, 1]
np_img /= 255.0 # Normalize to [0, 1]

return np_img, pad

def post_process(self, pred: np.ndarray, pad: int) -> Tuple[np.ndarray, Tuple[int, int]]:
def post_process(self, pred: np.ndarray, pad: Tuple[int, int]) -> np.ndarray:
"""Post-process model predictions.
# Drop low conf for speed-up
pred = pred[:, pred[-1, :] > self.conf]
# Post processing
Args:
pred: Raw predictions from the model.
pad: Padding information as (left_pad, top_pad).
Returns:
Processed predictions as a numpy array.
"""
pred = pred[:, pred[-1, :] > self.conf] # Drop low-confidence predictions
pred = np.transpose(pred)
pred = xywh2xyxy(pred)
# Sort by confidence
pred = pred[pred[:, 4].argsort()]
pred = pred[pred[:, 4].argsort()] # Sort by confidence
pred = nms(pred)
pred = pred[::-1]
pred = pred[::-1] # Reverse for highest confidence first

# Normalize preds
if len(pred) > 0:
# Remove padding
left_pad, top_pad = pad
left_pad, top_pad = pad # Unpack the tuple
pred[:, :4:2] -= left_pad
pred[:, 1:4:2] -= top_pad
pred[:, :4:2] /= self.imgsz - 2 * left_pad
pred[:, 1:4:2] /= self.imgsz - 2 * top_pad
pred = np.clip(pred, 0, 1)
pred = np.reshape(pred, (-1, 5))
else:
pred = np.zeros((0, 5)) # normalize output
pred = np.zeros((0, 5)) # Return empty prediction array

return pred

def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] = None) -> np.ndarray:
"""Run the classifier on an input image.
Args:
pil_img: The input PIL image.
occlusion_mask: Optional occlusion mask to exclude certain areas.
Returns:
Processed predictions.
"""
np_img, pad = self.prep_process(pil_img)

if self.format == "ncnn":

extractor = self.model.create_extractor()
extractor.set_light_mode(True)
extractor.input("in0", np_img)
pred = ncnn.Mat()
extractor.extract("out0", pred)
pred = np.asarray(pred)

else:
pred = self.ort_session.run(["output0"], {"images": np_img})[0][0]

pred = self.post_process(pred, pad)
# Convert pad to a tuple if required
if isinstance(pad, list):
pad = tuple(pad)

pred = self.post_process(pred, pad) # Ensure pad is passed as a tuple

# drop big detections
pred = np.clip(pred, 0, 1)
Expand Down

0 comments on commit 0fdd361

Please sign in to comment.