-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/receptionist-polishing'
- Loading branch information
Showing
41 changed files
with
1,736 additions
and
611 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
common/vision/lasr_vision_clip/examples/test_person_detector.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#!/usr/bin/env python3 | ||
from lasr_vision_msgs.srv import ( | ||
ClipLearnFaceRequest, | ||
ClipLearnFace, | ||
ClipLearnFaceResponse, | ||
CroppedDetection, | ||
CroppedDetectionRequest, | ||
CroppedDetectionResponse, | ||
ClipRecogniseFaceRequest, | ||
ClipRecogniseFace, | ||
ClipRecogniseFaceResponse, | ||
) | ||
from lasr_vision_msgs.msg import CDRequest | ||
from sensor_msgs.msg import Image | ||
import cv2 | ||
from cv2_img import msg_to_cv2_img, cv2_img_to_msg | ||
import rospy | ||
from typing import List | ||
import numpy as np | ||
|
||
|
||
if __name__ == "__main__": | ||
rospy.init_node("clip_encoder_test") | ||
cropped_detector = rospy.ServiceProxy("/vision/cropped_detection", CroppedDetection) | ||
learn_face_service = rospy.ServiceProxy("/vision/learn_face", ClipLearnFace) | ||
detect_face_service = rospy.ServiceProxy( | ||
"/vision/face_detection", ClipRecogniseFace | ||
) | ||
debug_pub = rospy.Publisher("/clip/recognise/debug", Image, queue_size=1) | ||
input_str = "" | ||
while True: | ||
input_str = input("Please enter your name and hit enter to learn your face: ") | ||
if input_str == "done": | ||
break | ||
person_1_imgs = [] | ||
for i in range(10): | ||
cropped_response = cropped_detector( | ||
CroppedDetectionRequest( | ||
[ | ||
CDRequest( | ||
method="centered", | ||
use_mask=True, | ||
object_names=["person"], | ||
yolo_model="yolov8x-seg.pt", | ||
yolo_model_confidence=0.8, | ||
yolo_nms_threshold=0.4, | ||
) | ||
] | ||
) | ||
) | ||
rospy.sleep(0.1) | ||
try: | ||
person_1_imgs.append(cropped_response.responses[0].cropped_imgs[0]) | ||
except: | ||
continue | ||
|
||
learn_face_service(ClipLearnFaceRequest(raw_imgs=person_1_imgs, name=input_str)) | ||
|
||
# Run inference | ||
while not rospy.is_shutdown(): | ||
cropped_response = cropped_detector( | ||
CroppedDetectionRequest( | ||
[ | ||
CDRequest( | ||
method="centered", | ||
use_mask=True, | ||
object_names=["person"], | ||
yolo_model="yolov8x-seg.pt", | ||
yolo_model_confidence=0.8, | ||
yolo_nms_threshold=0.4, | ||
) | ||
] | ||
) | ||
) | ||
|
||
try: | ||
names = [] | ||
xywhs = [] | ||
for cropped_img in cropped_response.responses[0].cropped_imgs: | ||
response = detect_face_service( | ||
ClipRecogniseFaceRequest(image_raw=cropped_img) | ||
) | ||
names.append(response.name) | ||
xywhs.append(response.xywh) | ||
rospy.loginfo(f"Recognised face: {response.name}") | ||
|
||
# Add names to image | ||
cv2_img = msg_to_cv2_img(cropped_response.responses[0].masked_img) | ||
for name, xywh in zip(names, xywhs): | ||
x, y, w, h = xywh[0], xywh[1], xywh[2], xywh[3] | ||
cv2.rectangle(cv2_img, (x, y), (x + w, y + h), (0, 255, 0), 2) | ||
cv2.putText( | ||
cv2_img, | ||
name, | ||
(x, y), | ||
cv2.FONT_HERSHEY_SIMPLEX, | ||
0.5, | ||
(0, 255, 0), | ||
2, | ||
cv2.LINE_AA, | ||
) | ||
debug_pub.publish(cv2_img_to_msg(cv2_img)) | ||
except Exception as e: | ||
rospy.loginfo(e) | ||
continue | ||
|
||
rospy.spin() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import rospy | ||
from lasr_vision_clip import FaceService | ||
|
||
|
||
if __name__ == "__main__": | ||
rospy.init_node("clip_vqa_service") | ||
face_service = FaceService() | ||
rospy.spin() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
facenet-pytorch | ||
sentence-transformers | ||
opencv-python | ||
opencv-python | ||
opencv-contrib-python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 2 additions & 1 deletion
3
common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
|
||
from .clip_utils import load_model, encode_img, load_face_model, infer | ||
from .learn_face import FaceService |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
import cv2 | ||
import rospy | ||
from typing import Dict | ||
import numpy as np | ||
import rospkg | ||
from lasr_vision_msgs.srv import ( | ||
ClipRecogniseFaceRequest, | ||
ClipRecogniseFaceResponse, | ||
ClipLearnFace, | ||
ClipRecogniseFace, | ||
ClipLearnFaceRequest, | ||
ClipLearnFaceResponse, | ||
) | ||
from sensor_msgs.msg import Image | ||
from cv2_img import msg_to_cv2_img, cv2_img_to_msg | ||
from lasr_vision_clip import load_face_model, encode_img, infer | ||
|
||
|
||
class FaceService: | ||
def __init__(self, similarity_threshold: float = 6.0) -> None: | ||
self._face_classifier = cv2.CascadeClassifier( | ||
os.path.join( | ||
rospkg.RosPack().get_path("lasr_vision_clip"), | ||
"data", | ||
"haarcascade_frontalface_default.xml", | ||
) | ||
) | ||
self.learned_faces: Dict[str, np.ndarray] = {} | ||
self._similarity_threshold = similarity_threshold | ||
self.processor, self.model = load_face_model() | ||
self._face_pub = rospy.Publisher("/clip/face_detection", Image, queue_size=1) | ||
|
||
rospy.Service("/vision/face_detection", ClipRecogniseFace, self.face_detection) | ||
rospy.Service("/vision/learn_face", ClipLearnFace, self.learn_face) | ||
|
||
rospy.loginfo("Face detector service started") | ||
|
||
def _detect_faces(self, img: np.ndarray): | ||
faces = self._face_classifier.detectMultiScale( | ||
img, 1.1, minNeighbors=5, minSize=(10, 10) | ||
) | ||
return faces | ||
|
||
def face_detection( | ||
self, req: ClipRecogniseFaceRequest | ||
) -> ClipRecogniseFaceResponse: | ||
img = req.image_raw | ||
cv2_img = msg_to_cv2_img(img) | ||
# cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY) | ||
try: | ||
faces = self._detect_faces(cv2_img) | ||
|
||
# Assume only one face in image | ||
encoded_face = None | ||
closest_name = "Unknown" | ||
min_dist = float("inf") | ||
min_xywh = None | ||
for x, y, w, h in faces: | ||
cv2_face = cv2_img[y : y + h, x : x + w] | ||
# cv2_face = cv2.cvtColor(cv2_face, cv2.COLOR_GRAY2BGR) | ||
face_msg = cv2_img_to_msg(cv2_face) | ||
self._face_pub.publish(face_msg) | ||
encoded_face = infer( | ||
cv2_img_to_msg(cv2_img), self.processor, self.model | ||
) | ||
encoded_face = encoded_face.flatten() | ||
for name, face in self.learned_faces.items(): | ||
distance = np.linalg.norm(encoded_face - face) | ||
rospy.loginfo(f"Distance to {name} : {distance}") | ||
if distance < min_dist: | ||
min_dist = distance | ||
min_xywh = [x, y, w, h] | ||
closest_name = name | ||
return ClipRecogniseFaceResponse( | ||
name=closest_name, distance=min_dist, xywh=min_xywh | ||
) | ||
except Exception as e: | ||
rospy.loginfo(e) | ||
return ClipRecogniseFaceResponse(name="Unknown", distance=None, xywh=None) | ||
|
||
def learn_face(self, request: ClipLearnFaceRequest) -> ClipLearnFaceResponse: | ||
imgs = request.raw_imgs | ||
|
||
embedding_vectors = [] | ||
for img in imgs: | ||
cv2_img = msg_to_cv2_img(img) | ||
# cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY) | ||
rospy.loginfo(f"Image shape: {cv2_img.shape}") | ||
try: | ||
faces = self._detect_faces(cv2_img) | ||
except Exception as e: # No face detected | ||
rospy.loginfo(e) | ||
continue | ||
for x, y, w, h in faces: | ||
cv2_face = cv2_img[y : y + h, x : x + w] | ||
# cv2_face = cv2.cvtColor(cv2_face, cv2.COLOR_GRAY2BGR) | ||
face_msg = cv2_img_to_msg(cv2_face) | ||
self._face_pub.publish(face_msg) | ||
encoded_face = infer( | ||
cv2_img_to_msg(cv2_img), self.processor, self.model | ||
) | ||
encoded_face = encoded_face.flatten() | ||
embedding_vectors.append(encoded_face) | ||
|
||
embedding_vectors = np.array(embedding_vectors) | ||
embedding_vector = np.mean(embedding_vectors, axis=0) | ||
self.learned_faces[request.name] = embedding_vector | ||
rospy.loginfo(f"Learned {request.name}") | ||
|
||
return ClipLearnFaceResponse() |
Oops, something went wrong.