Skip to content

Commit

Permalink
Merge branch 'main' into face-skills
Browse files Browse the repository at this point in the history
  • Loading branch information
jws-1 authored May 7, 2024
2 parents b5930f6 + d8fbf4f commit e06fa7b
Show file tree
Hide file tree
Showing 18 changed files with 390 additions and 104 deletions.
33 changes: 15 additions & 18 deletions common/speech/lasr_speech_recognition_whisper/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
--extra-index-url https://pypi.ngc.nvidia.com
--trusted-host pypi.ngc.nvidia.com

catkin-pkg==1.0.0 # via rospkg
certifi==2024.2.2 # via requests
cffi==1.16.0 # via sounddevice
charset-normalizer==3.3.2 # via requests
distro==1.9.0 # via rospkg
docutils==0.20.1 # via catkin-pkg
filelock==3.13.1 # via torch, triton
fsspec==2024.2.0 # via torch
idna==3.6 # via requests
jinja2==3.1.3 # via torch
docutils==0.21.2 # via catkin-pkg
filelock==3.14.0 # via torch, triton
fsspec==2024.3.1 # via torch
idna==3.7 # via requests
jinja2==3.1.4 # via torch
llvmlite==0.42.0 # via numba
markupsafe==2.1.5 # via jinja2
more-itertools==10.2.0 # via openai-whisper
mpmath==1.3.0 # via sympy
networkx==3.2.1 # via torch
numba==0.59.0 # via openai-whisper
networkx==3.3 # via torch
numba==0.59.1 # via openai-whisper
numpy==1.26.4 # via numba, openai-whisper
nvidia-cublas-cu12==12.1.3.1 # via nvidia-cudnn-cu12, nvidia-cusolver-cu12, torch
nvidia-cuda-cupti-cu12==12.1.105 # via torch
Expand All @@ -27,27 +24,27 @@ nvidia-cufft-cu12==11.0.2.54 # via torch
nvidia-curand-cu12==10.3.2.106 # via torch
nvidia-cusolver-cu12==11.4.5.107 # via torch
nvidia-cusparse-cu12==12.1.0.106 # via nvidia-cusolver-cu12, torch
nvidia-nccl-cu12==2.19.3 # via torch
nvidia-nvjitlink-cu12==12.4.99 # via nvidia-cusolver-cu12, nvidia-cusparse-cu12
nvidia-nccl-cu12==2.20.5 # via torch
nvidia-nvjitlink-cu12==12.4.127 # via nvidia-cusolver-cu12, nvidia-cusparse-cu12
nvidia-nvtx-cu12==12.1.105 # via torch
openai-whisper==20231117 # via -r requirements.in
pyaudio==0.2.13 # via -r requirements.in
pycparser==2.21 # via cffi
pycparser==2.22 # via cffi
pyparsing==3.1.2 # via catkin-pkg
python-dateutil==2.9.0.post0 # via catkin-pkg
pyyaml==6.0.1 # via -r requirements.in, rospkg
regex==2023.12.25 # via tiktoken
regex==2024.4.28 # via tiktoken
requests==2.31.0 # via speechrecognition, tiktoken
rospkg==1.5.0 # via -r requirements.in
six==1.16.0 # via python-dateutil
sounddevice==0.4.6 # via -r requirements.in
speechrecognition==3.10.0 # via -r requirements.in
sympy==1.12 # via torch
tiktoken==0.6.0 # via openai-whisper
torch==2.2.1 # via openai-whisper
tqdm==4.66.2 # via openai-whisper
triton==2.2.0 # via openai-whisper, torch
typing-extensions==4.10.0 # via torch
torch==2.3.0 # via openai-whisper
tqdm==4.66.4 # via openai-whisper
triton==2.3.0 # via openai-whisper, torch
typing-extensions==4.11.0 # via torch
urllib3==2.2.1 # via requests

# The following packages are considered to be unsafe in a requirements file:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import numpy as np

from PIL import Image
import re
import tensorflow as tf
from tf_bodypix.api import download_model, load_model, BodyPixModelPaths

from sensor_msgs.msg import Image as SensorImage
from lasr_vision_msgs.msg import BodyPixMask, BodyPixPose

from lasr_vision_msgs.msg import BodyPixMask, BodyPixPose, BodyPixKeypoint
from lasr_vision_msgs.srv import BodyPixDetectionRequest, BodyPixDetectionResponse

import rospkg
Expand All @@ -19,6 +21,15 @@
r = rospkg.RosPack()


def snake_to_camel(snake_str):
components = snake_str.split("_")
return components[0] + "".join(x.title() for x in components[1:])


def camel_to_snake(name):
return re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()


def load_model_cached(dataset: str) -> None:
"""
Load a model into cache
Expand All @@ -41,7 +52,10 @@ def load_model_cached(dataset: str) -> None:


def detect(
request: BodyPixDetectionRequest, debug_publisher: rospy.Publisher | None
request: BodyPixDetectionRequest,
debug_publisher: rospy.Publisher = rospy.Publisher(
"/bodypix/debug", SensorImage, queue_size=1
),
) -> BodyPixDetectionResponse:
"""
Run BodyPix inference on given detection request
Expand All @@ -58,6 +72,7 @@ def detect(
# run inference
rospy.loginfo("Running inference")
result = model.predict_single(img)

mask = result.get_mask(threshold=request.confidence)
rospy.loginfo("Inference complete")

Expand All @@ -69,43 +84,13 @@ def detect(
).squeeze()

bodypix_mask = BodyPixMask()
bodypix_mask.mask = part_mask.flatten().astype(bool)

bodypix_mask.mask = part_mask.flatten().astype(bool).tolist()
bodypix_mask.shape = list(part_mask.shape)
masks.append(bodypix_mask)

# construct poses response and neck coordinates
poses = result.get_poses()
rospy.loginfo(str(poses))

neck_coordinates = []
for pose in poses:
left_shoulder_keypoint = pose.keypoints.get(
5
) # 5 is the typical index for left shoulder
right_shoulder_keypoint = pose.keypoints.get(
6
) # 6 is the typical index for right shoulder

if left_shoulder_keypoint and right_shoulder_keypoint:
# If both shoulders are detected, calculate neck as midpoint
left_shoulder = left_shoulder_keypoint.position
right_shoulder = right_shoulder_keypoint.position
neck_x = (left_shoulder.x + right_shoulder.x) / 2
neck_y = (left_shoulder.y + right_shoulder.y) / 2
elif left_shoulder_keypoint:
# Only left shoulder detected, use it as neck coordinate
left_shoulder = left_shoulder_keypoint.position
neck_x = left_shoulder.x
neck_y = left_shoulder.y
elif right_shoulder_keypoint:
# Only right shoulder detected, use it as neck coordinate
right_shoulder = right_shoulder_keypoint.position
neck_x = right_shoulder.x
neck_y = right_shoulder.y

pose = BodyPixPose()
pose.coord = np.array([neck_x, neck_y]).astype(np.int32)
neck_coordinates.append(pose)

# publish to debug topic
if debug_publisher is not None:
Expand All @@ -122,7 +107,25 @@ def detect(
)
debug_publisher.publish(cv2_img.cv2_img_to_msg(coloured_mask))

output_poses = []

for pose in poses:
pose_msg = BodyPixPose()
keypoints_msg = []

for i, keypoint in pose.keypoints.items():

if camel_to_snake(keypoint.part) in request.masks[0].parts:
keypoint_msg = BodyPixKeypoint()
keypoint_msg.xy = [int(keypoint.position.x), int(keypoint.position.y)]
keypoint_msg.score = keypoint.score
keypoint_msg.part = keypoint.part
keypoints_msg.append(keypoint_msg)

pose_msg.keypoints = keypoints_msg
output_poses.append(pose_msg)

response = BodyPixDetectionResponse()
response.poses = output_poses
response.masks = masks
response.poses = neck_coordinates
return response
3 changes: 2 additions & 1 deletion common/vision/lasr_vision_msgs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ add_message_files(
Detection.msg
Detection3D.msg
BodyPixPose.msg
BodyPixKeypoint.msg
BodyPixMask.msg
BodyPixMaskRequest.msg
)
Expand Down Expand Up @@ -204,4 +205,4 @@ include_directories(
# endif()

## Add folders to be run by python nosetests
# catkin_add_nosetests(test)
# catkin_add_nosetests(test)
12 changes: 12 additions & 0 deletions common/vision/lasr_vision_msgs/msg/BodyPixKeypoint.msg
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Keypoint.msg

# int number of the parts following
# https://github.com/de-code/python-tf-bodypix/blob/develop/tf_bodypix/bodypix_js_utils/part_channels.py#L5

string part

# the score of the body part
float64 score

# the x and y coordinates of the body part
int32[] xy
4 changes: 2 additions & 2 deletions common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# x and y coordinates
float32[] coord
# keypoints
BodyPixKeypoint[] keypoints
18 changes: 9 additions & 9 deletions common/vision/lasr_vision_pointing/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
absl-py==2.1.0 # via mediapipe
attrs==23.2.0 # via mediapipe
cffi==1.16.0 # via sounddevice
contourpy==1.2.0 # via matplotlib
contourpy==1.2.1 # via matplotlib
cycler==0.12.1 # via matplotlib
flatbuffers==24.3.7 # via mediapipe
fonttools==4.50.0 # via matplotlib
jax==0.4.25 # via mediapipe
flatbuffers==24.3.25 # via mediapipe
fonttools==4.51.0 # via matplotlib
jax==0.4.26 # via mediapipe
kiwisolver==1.4.5 # via matplotlib
matplotlib==3.8.3 # via mediapipe
matplotlib==3.8.4 # via mediapipe
mediapipe==0.10.10 # via -r requirements.in
ml-dtypes==0.3.2 # via jax
ml-dtypes==0.4.0 # via jax
numpy==1.26.4 # via contourpy, jax, matplotlib, mediapipe, ml-dtypes, opencv-contrib-python, opt-einsum, scipy
opencv-contrib-python==4.9.0.80 # via mediapipe
opt-einsum==3.3.0 # via jax
packaging==24.0 # via matplotlib
pillow==10.2.0 # via matplotlib
pillow==10.3.0 # via matplotlib
protobuf==3.20.3 # via mediapipe
pycparser==2.21 # via cffi
pycparser==2.22 # via cffi
pyparsing==3.1.2 # via matplotlib
python-dateutil==2.9.0.post0 # via matplotlib
scipy==1.12.0 # via jax
scipy==1.13.0 # via jax
six==1.16.0 # via python-dateutil
sounddevice==0.4.6 # via mediapipe
22 changes: 13 additions & 9 deletions common/vision/lasr_vision_pointing/scripts/detect_pointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse:
keypoints = self.detect_keypoints(
img
) # Detect keypoints using MediaPipe
direction = self.determine_pointing_direction(keypoints)
direction = self.determine_pointing_direction(
keypoints, buffer_width=25
)
rospy.loginfo(f"Person detected pointing: {direction}")

# Visualize pointing direction with landmarks
Expand All @@ -52,7 +54,7 @@ def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse:

resp.direction = direction
else:
resp.direction = "Err"
resp.direction = "NONE"

self.counter += 1
return resp
Expand Down Expand Up @@ -117,16 +119,16 @@ def determine_pointing_direction(self, keypoints, buffer_width=50):

# Determine pointing direction based on the difference in x-coordinates
if abs(left_diff - right_diff) < buffer_width:
return "Front"
return "FORWARDS"
elif abs(left_diff) > buffer_width and abs(left_diff) > abs(right_diff):
return "Left" if left_diff > 0 else "Right"
return "LEFT" if left_diff > 0 else "RIGHT"
elif abs(right_diff) > buffer_width and abs(right_diff) > abs(
left_diff
):
return "Right" if right_diff > 0 else "Left"
return "RIGHT" if right_diff > 0 else "LEFT"

# Default: Determine direction based on the relative position to the center of the image
return "Front"
return "NONE"

def visualize_pointing_direction_with_landmarks(
self, image_path, person_bbox, pointing_direction, keypoints
Expand All @@ -142,12 +144,14 @@ def visualize_pointing_direction_with_landmarks(

# Calculate endpoint of arrow based on pointing direction
arrow_length = min(w, h) // 2
if pointing_direction == "Left":
if pointing_direction == "LEFT":
endpoint = (center_x - arrow_length, center_y)
elif pointing_direction == "Right":
elif pointing_direction == "RIGHT":
endpoint = (center_x + arrow_length, center_y)
else:
elif pointing_direction == "FORWARDS":
endpoint = (center_x, center_y)
else:
return # No pointing direction detected

# Draw arrow on image
color = (0, 255, 0) # Green color
Expand Down
22 changes: 21 additions & 1 deletion skills/config/motions.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
play_motion:
motions:
reach_arm:
reach_arm_vertical_gripper:
joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
points:
- positions: [1.61, -0.93, -3.14, 1.83, -1.58, -0.53, 0.00]
time_from_start: 0.0
reach_arm_horizontal_gripper:
joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
points:
- positions: [1.61, -0.93, -3.14, 1.83, -1.58, -0.53, 0.00]
Expand All @@ -19,4 +24,19 @@ play_motion:
joints: [torso_lift_joint, head_1_joint, head_2_joint]
points:
- positions: [0.25, 0.0, 0.0]
time_from_start: 0.0
look_left:
joints: [head_1_joint, head_2_joint]
points:
- positions: [0.5, 0.0]
time_from_start: 0.0
look_right:
joints: [head_1_joint, head_2_joint]
points:
- positions: [-0.5, 0.0]
time_from_start: 0.0
look_centre:
joints: [head_1_joint, head_2_joint]
points:
- positions: [0.0, 0.0]
time_from_start: 0.0
2 changes: 2 additions & 0 deletions skills/src/lasr_skills/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@
from .clip_vqa import QueryImage
from .detect_faces import DetectFaces
from .recognise import Recognise
from .detect_pointing import DetectPointingDirection
from .detect_gesture import DetectGesture
Loading

0 comments on commit e06fa7b

Please sign in to comment.