diff --git a/Ground_Station.py b/Ground_Station.py new file mode 100644 index 00000000..a5d88e02 --- /dev/null +++ b/Ground_Station.py @@ -0,0 +1,356 @@ +''' +The Ground Station represents a client in the communication with pytracking (server). + +self.rtsp variable represents an rtsp stream, which should be set accordingly. + +self.serverAddressPort represents server ID and port for communication. + +Workflow of the Ground Station: + +1. Tries to connect with the server, unitl it is connected. + +2. After connection establishment, the ground station pop-up a window that shows continuous video feed. + Select a target object by drawing a bounding box using left mouse click. + +3. The Bounding Box is sent to the server end to initialize the tracker. + +4. The server after the initialization starts tracking the target object and sends back the tracked bounding box to Ground Station. + +5. The Ground Station displays the countinuous receving tracked object. + +Press Q for quit the Ground Station +Press R to re-select the object for tracking +Press T to Terminate both Ground Station and Server + +''' + +import cv2 +from win32api import GetSystemMetrics +import socket +import threading +import time + +class Tracking(): + + def __init__(self): + + # Initializing Parameters + + self.rect = (0,0,0,0) + self.startPoint = False + self.endPoint = False + self.Flag = False + self.init_flag = False + self.camera_flag = False + self.rec_flag = False + self.flag_recv = False + self.flag_sending = False + + # RTSP Stream + self.rtsp = 'rtsp://..' + + self.initBB = (0,0,0,0) + self.initAA = (0,0,0,0) + + # Server ID and Port No + self.serverAddressPort = ("127.0.0.1", 8554) + self.bufferSize = 1024 + + + def receive_msg(self, conn, bufferSize): + + bytesAddressPair = conn.recvfrom(bufferSize) + + message = b'' + message = bytesAddressPair[0] + message = message.decode('utf-8') + address = bytesAddressPair[1] + + return message, address + + def send_msg(self, conn, address, box): + + payload = str(box) + payload = payload.encode('utf-8') + conn.sendto(payload, address) + + def sending(self): + counter = 0 + while self.flag_recv == False: + Mess_conn = "Connection" + self.send_msg(self.UDPClientSocket, self.serverAddressPort, Mess_conn) + + if self.flag_sending == True: + break + + counter += 1 + if counter == 3: + print('Trying to connect ...') + counter = 0 + + time.sleep(1) + + def closing_threads(self, cm, sd, con): + + self.thread_flag = True + cm.join() + + self.flag_sending = True + sd.join() + + self.thread_conn = True + con.join() + + def connection(self): + + self.thread_conn = False + + msg_conn, addr = self.receive_msg(self.UDPClientSocket, self.bufferSize) + if msg_conn == "Connected": + print("Connection Establised.") + + self.flag_recv = True + + while True: + if self.init_flag == True: + if msg_conn == "Connected": + + if self.init_flag == True: + self.send_msg(self.UDPClientSocket, self.serverAddressPort, self.initialization) + + if self.Flag == True: + Message2 = "Send Tracker Coordinates" + self.send_msg(self.UDPClientSocket, self.serverAddressPort, Message2) + + msg, addr = self.receive_msg(self.UDPClientSocket, self.bufferSize) + + if self.thread_conn == True: + break + + if msg == "Tracker Lost": + print('Tracker Lost the Object') + cv2.destroyAllWindows() + break + + else: + self.rec_flag = True + Trackerbox = (float(msg.split(",")[0][1:]), float(msg.split(",")[1][1:]), float(msg.split(",")[2][1:]), float(msg.split(",")[3][1:(len(msg.split(",")[3])-1)])) + + self.Trackerbox = ((1/self.ratio)*Trackerbox[0], (1/self.ratio)*Trackerbox[1], (1/self.ratio)*Trackerbox[2], (1/self.ratio)*Trackerbox[3]) + + if self.thread_conn == True: + break + + time.sleep(0.005) + + + + def camera(self): + + while True: + cap = cv2.VideoCapture(self.rtsp) + self.thread_flag = False + + while cap.isOpened() == True: + self.success, self.frame = cap.read() + self.camera_flag = True + + if self.success == False: + break + + if self.thread_flag == True: + break + + time.sleep(0.005) + + if self.thread_flag == True: + break + + time.sleep(0.005) + + def main(self): + + while True: + + try: + self.UDPClientSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + + cam = threading.Thread(target=self.camera) + cam.start() + print('Camera thread Initiated') + + while True: + if self.camera_flag == True: + break + + snd = threading.Thread(target=self.sending) + snd.start() + print('Sending thread Initiated') + + conn = threading.Thread(target=self.connection) + conn.start() + print('Connection thread Initiated') + + while True: + if self.camera_flag == True: + frame_disp = self.frame.copy() + + self.img_input, self.ratio = self.Img2ScreenRatio(frame_disp) + + cv2.namedWindow("image") + cv2.setMouseCallback("image", self.GetCoordinates) + + if self.startPoint == True and self.Flag == False: + cv2.rectangle(self.img_input, (self.rect[0], self.rect[1]), (self.rect[2], self.rect[3]), (0, 255, 0), 2) + + x = int((int(self.screen_width) - int(self.img_input.shape[1]))/2) + + if self.endPoint == True: + Rectangle = self.RectPositioning(self.rect[0], self.rect[1], self.rect[2], self.rect[3]) + + if Rectangle[0] > 0 and Rectangle[1] > 0 and Rectangle[2] > 0 and Rectangle[3] > 0: + Rectangle = (int(self.ratio*Rectangle[0]), int(self.ratio*Rectangle[1]), int(self.ratio*Rectangle[2]), int(self.ratio*Rectangle[3])) + + self.initBB = Rectangle + self.initialization = (self.initBB[0], self.initBB[1], (self.initBB[2] - self.initBB[0]), (self.initBB[3] - self.initBB[1])) + + if self.initialization[2] >= 5 and self.initialization[3] >=5: + self.Verify() + self.Flag = True + + else: + print('Select a Bounding Box instead of a Click') + self.rect = (0,0,0,0) + self.startPoint = False + self.endPoint = False + self.initBB = (0,0,0,0) + self.initAA = (0,0,0,0) + self.initialization = (0,0,0,0) + self.Flag = False + + if self.rec_flag == True: + cv2.rectangle(self.img_input, (int(self.Trackerbox[0]), int(self.Trackerbox[1])), (int(self.Trackerbox[0]+self.Trackerbox[2]), int(self.Trackerbox[1]+self.Trackerbox[3])), (0, 0, 255), 2) + cv2.moveWindow("image",x,0) + cv2.imshow('image', self.img_input) + if cv2.waitKey(1) & 0xFF == ord('q'): + print("Quit program using keyboard Interrupt") + Message3 = "Close" + self.send_msg(self.UDPClientSocket, self.serverAddressPort, Message3) + + self.closing_threads(cam, snd, conn) + + return + + if cv2.waitKey(1) & 0xFF == ord('t'): + print("Terminate both GCS and Air Station") + Message3 = "Terminate" + self.send_msg(self.UDPClientSocket, self.serverAddressPort, Message3) + + self.closing_threads(cam, snd, conn) + + return + + if cv2.waitKey(1) & 0xFF == ord('r'): + print("Reset program using keyboard Interrupt") + Message3 = "Close" + self.send_msg(self.UDPClientSocket, self.serverAddressPort, Message3) + + self.thread_conn = True + conn.join() + + self.rect = (0,0,0,0) + self.startPoint = False + self.endPoint = False + self.initBB = (0,0,0,0) + self.initAA = (0,0,0,0) + self.Trackerbox = (0,0,0,0) + self.flag_recv = False + self.initialization = (0,0,0,0) + self.init_flag = False + self.Flag = False + + break + + self.camera_flag = False + + time.sleep(0.005) + + cv2.destroyAllWindows() + self.UDPClientSocket.close() + + time.sleep(0.005) + + except KeyboardInterrupt: + print("Closed program using keyboard Interrupt") + Message3 = "Close" + self.send_msg(self.UDPClientSocket, self.serverAddressPort, Message3) + + self.closing_threads(cam, snd, conn) + + cv2.destroyAllWindows() + self.UDPClientSocket.close() + + def Img2ScreenRatio(self, frame): + + image_height, image_width = frame.shape[:2] + + self.screen_width = GetSystemMetrics(0) # 1920 + screen_height = GetSystemMetrics(1) #1080 + + image_ratio = image_width / image_height + screen_ratio = self.screen_width / screen_height + + if image_ratio < screen_ratio: + img_input = cv2.resize(frame, (int(screen_height*image_ratio),screen_height)) + ratio = image_height / screen_height + + else: + img_input = cv2.resize(frame, (self.screen_width,int(self.screen_width/image_ratio))) + ratio = image_width / self.screen_width + + return img_input, ratio + + def GetCoordinates(self,event, x, y, flags, param): + + if event == cv2.EVENT_LBUTTONDOWN: + self.rect = (x, y, 0, 0) + self.startPoint = True + self.endPoint = False + self.rect = (self.rect[0], self.rect[1], x, y) + + elif event == cv2.EVENT_MOUSEMOVE: + + if self.endPoint == False: + self.rect = (self.rect[0], self.rect[1], x, y) + + elif event == cv2.EVENT_LBUTTONUP: + self.endPoint = True + + def RectPositioning(self,x1,y1,x2,y2): + + if x1 <= x2 and y1 <= y2: + rect = (x1, y1, x2, y2) + return rect + + elif x1 >= x2 and y1 >= y2: + rect = (x2, y2, x1, y1) + return rect + + elif x1 <= x2 and y1 >= y2: + rect = (x1, y2, x2, y1) + return rect + + elif x1 >= x2 and y1 <= y2: + rect = (x2, y1, x1, y2) + return rect + + + def Verify(self): + if self.initAA[0] != self.initBB[0] or self.initAA[1] != self.initBB[1] or self.initAA[2] != self.initBB[2] or self.initAA[3] != self.initBB[3]: + print("Bounding Box Selected from GCS: ", self.initialization) + self.init_flag = True + + self.initAA = self.initBB + + +Tracking().main() \ No newline at end of file diff --git a/pytracking/evaluation/tracker.py b/pytracking/evaluation/tracker.py index 634173f0..526f75b8 100644 --- a/pytracking/evaluation/tracker.py +++ b/pytracking/evaluation/tracker.py @@ -1,3 +1,14 @@ +''' +Edit the tracker.py file accordingly. + +Set self.rtsp to your rtsp stream + +Set self.localIP as it's the Server IP (your PC) + +Set self.localPort as it's the Server communication port + +''' + import importlib import os import numpy as np @@ -15,6 +26,11 @@ from pathlib import Path import torch +import threading + +import socket +import argparse +import imutils _tracker_disp_colors = {1: (0, 255, 0), 2: (0, 0, 255), 3: (255, 0, 0), 4: (255, 255, 255), 5: (0, 0, 0), 6: (0, 255, 128), @@ -46,11 +62,28 @@ class Tracker: def __init__(self, name: str, parameter_name: str, run_id: int = None, display_name: str = None): assert run_id is None or isinstance(run_id, int) + # Initializing Parameters + self.name = name self.parameter_name = parameter_name self.run_id = run_id self.display_name = display_name + # RTSP Stream + self.rtsp = 'rtsp://...' + + # Number of frames to skip while processing + self.skip_frame = 1 + self.camera_flag = False + self.distance_flag = False # Flag for distance calculation + + # Socket programming parameters + self.localIP = "127.0.0.1" # Server IP + self.localPort = 8554 # Server Port + self.bufferSize = 1024 + self.initBB = (0,0,0,0) + self.initAA = (0,0,0,0) + env = env_settings() if self.run_id is None: self.results_dir = '{}/{}/{}'.format(env.results_path, self.name, self.parameter_name) @@ -68,6 +101,32 @@ def __init__(self, name: str, parameter_name: str, run_id: int = None, display_n self.visdom = None + def connection(self, localIP, localPort): + + # Create a datagram socket and bind IP/Port Address + UDPServerSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + UDPServerSocket.bind((localIP, localPort)) + + return UDPServerSocket + + def receive_msg(self, conn, bufferSize): + + bytesAddressPair = conn.recvfrom(bufferSize) + + message = b'' + message = bytesAddressPair[0] + message = message.decode('utf-8') + address = bytesAddressPair[1] + + return message, address + + + def send_msg(self, conn, address, box): + + payload = str(box) + payload = payload.encode('utf-8') + conn.sendto(payload, address) + def _init_visdom(self, visdom_info, debug): visdom_info = {} if visdom_info is None else visdom_info @@ -103,134 +162,70 @@ def create_tracker(self, params): tracker.visdom = self.visdom return tracker - def run_sequence(self, seq, visualization=None, debug=None, visdom_info=None, multiobj_mode=None): - """Run tracker on sequence. - args: - seq: Sequence to run the tracker on. - visualization: Set visualization flag (None means default value specified in the parameters). - debug: Set debug level (None means default value specified in the parameters). - visdom_info: Visdom info. - multiobj_mode: Which mode to use for multiple objects. - """ - params = self.get_parameters() - visualization_ = visualization + + def camera_video(self, videofilepath, sf): - debug_ = debug - if debug is None: - debug_ = getattr(params, 'debug', 0) - if visualization is None: - if debug is None: - visualization_ = getattr(params, 'visualization', False) - else: - visualization_ = True if debug else False + self.cap = cv.VideoCapture(videofilepath) + + txt = videofilepath.split("/") + txt = txt[2].split(".") + txt = txt[0] + - params.visualization = visualization_ - params.debug = debug_ - - self._init_visdom(visdom_info, debug_) - if visualization_ and self.visdom is None: - self.init_visualization() + self.camera_thread = False - # Get init information - init_info = seq.init_info() - is_single_object = not seq.multiobj_mode + # VIDEO Writing + frame_width = int(self.cap.get(3)) + frame_height = int(self.cap.get(4)) + self.out = cv.VideoWriter(txt + '_Result.avi',cv.VideoWriter_fourcc('M','J','P','G'), 15, (frame_width,frame_height)) - if multiobj_mode is None: - multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) + counter = 0 + skip_frame = sf - if multiobj_mode == 'default' or is_single_object: - tracker = self.create_tracker(params) - elif multiobj_mode == 'parallel': - tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom) - else: - raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) + while True: + if self.camera_flag == False: + self.success, self.frame = self.cap.read() - output = self._track_sequence(tracker, seq, init_info) - return output + counter += 1 - def _track_sequence(self, tracker, seq, init_info): - # Define outputs - # Each field in output is a list containing tracker prediction for each frame. + if counter == skip_frame + 1: + self.camera_flag = True - # In case of single object tracking mode: - # target_bbox[i] is the predicted bounding box for frame i - # time[i] is the processing time for frame i - # segmentation[i] is the segmentation mask for frame i (numpy array) + self.new_frame = self.frame - # In case of multi object tracking mode: - # target_bbox[i] is an OrderedDict, where target_bbox[i][obj_id] is the predicted box for target obj_id in - # frame i - # time[i] is either the processing time for frame i, or an OrderedDict containing processing times for each - # object in frame i - # segmentation[i] is the multi-label segmentation mask for frame i (numpy array) + counter = 0 - output = {'target_bbox': [], - 'time': [], - 'segmentation': [], - 'object_presence_score': []} + if self.success == False: + break - def _store_outputs(tracker_out: dict, defaults=None): - defaults = {} if defaults is None else defaults - for key in output.keys(): - val = tracker_out.get(key, defaults.get(key, None)) - if key in tracker_out or val is not None: - output[key].append(val) + if self.camera_thread == True: + break - # Initialize - image = self._read_image(seq.frames[0]) + time.sleep(0.005) - if tracker.params.visualization and self.visdom is None: - self.visualize(image, init_info.get('init_bbox')) + def camera_rtsp(self, rtsp): - start_time = time.time() - out = tracker.initialize(image, init_info) - if out is None: - out = {} + while True: - prev_output = OrderedDict(out) + self.cap = cv.VideoCapture(rtsp) - init_default = {'target_bbox': init_info.get('init_bbox'), - 'time': time.time() - start_time, - 'segmentation': init_info.get('init_mask'), - 'object_presence_score': 1.} + self.camera_thread = False - _store_outputs(out, init_default) + while (self.cap.isOpened() == True): + self.success, self.frame = self.cap.read() + self.camera_flag = True - for frame_num, frame_path in enumerate(seq.frames[1:], start=1): - while True: - if not self.pause_mode: - break - elif self.step: - self.step = False + if self.success == False: break - else: - time.sleep(0.1) - - image = self._read_image(frame_path) - - start_time = time.time() - info = seq.frame_info(frame_num) - info['previous_output'] = prev_output - - out = tracker.track(image, info) - prev_output = OrderedDict(out) - _store_outputs(out, {'time': time.time() - start_time}) - - segmentation = out['segmentation'] if 'segmentation' in out else None - if self.visdom is not None: - tracker.visdom_draw_tracking(image, out['target_bbox'], segmentation) - elif tracker.params.visualization: - self.visualize(image, out['target_bbox'], segmentation) + if self.camera_thread == True: + break - for key in ['target_bbox', 'segmentation']: - if key in output and len(output[key]) <= 1: - output.pop(key) + time.sleep(0.005) - output['image_shape'] = image.shape[:2] - output['object_presence_score_threshold'] = tracker.params.get('object_presence_score_threshold', 0.55) + if self.camera_thread == True: + break - return output def run_video(self, videofilepath, optional_box=None, debug=None, visdom_info=None, save_results=False): """Run the tracker with the video file. @@ -266,95 +261,260 @@ def run_video(self, videofilepath, optional_box=None, debug=None, visdom_info=No output_boxes = [] - cap = cv.VideoCapture(videofilepath) + camera = threading.Thread(target=self.camera_video, args=(videofilepath, self.skip_frame,)) + camera.start() + print('Camera initiated') + + + while True: + if self.camera_flag == True: + break + display_name = 'Display: ' + tracker.params.tracker_name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) - success, frame = cap.read() - cv.imshow(display_name, frame) + cv.imshow(display_name, self.frame) + def _build_init_info(box): return {'init_bbox': OrderedDict({1: box}), 'init_object_ids': [1, ], 'object_ids': [1, ], 'sequence_object_ids': [1, ]} - if success is not True: + if self.success is not True: print("Read frame from {} failed.".format(videofilepath)) exit(-1) if optional_box is not None: assert isinstance(optional_box, (list, tuple)) assert len(optional_box) == 4, "valid box's foramt is [x,y,w,h]" - tracker.initialize(frame, _build_init_info(optional_box)) + tracker.initialize(self.frame, _build_init_info(optional_box)) output_boxes.append(optional_box) else: while True: - # cv.waitKey() - frame_disp = frame.copy() - - cv.putText(frame_disp, 'Select target ROI and press ENTER', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, - 1.5, (0, 0, 0), 1) - - x, y, w, h = cv.selectROI(display_name, frame_disp, fromCenter=False) - init_state = [x, y, w, h] - tracker.initialize(frame, _build_init_info(init_state)) - output_boxes.append(init_state) - break + if self.camera_flag == True: + frame_disp = self.new_frame.copy() - while True: - ret, frame = cap.read() + cv.putText(frame_disp, 'Select target ROI and press ENTER', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, + 1.5, (0, 0, 255), 1) - if frame is None: - break + x, y, w, h = cv.selectROI(display_name, frame_disp, fromCenter=False) + init_state = [x, y, w, h] + tracker.initialize(self.new_frame, _build_init_info(init_state)) + output_boxes.append(init_state) + self.camera_flag = False - frame_disp = frame.copy() - - # Draw box - out = tracker.track(frame) - state = [int(s) for s in out['target_bbox'][1]] - output_boxes.append(state) + break - cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), - (0, 255, 0), 5) + else: + time.sleep(0.01) - font_color = (0, 0, 0) - cv.putText(frame_disp, 'Tracking!', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, - font_color, 1) - cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, - font_color, 1) - cv.putText(frame_disp, 'Press q to quit', (20, 80), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, - font_color, 1) - # Display the resulting frame - cv.imshow(display_name, frame_disp) - key = cv.waitKey(1) - if key == ord('q'): - break - elif key == ord('r'): - ret, frame = cap.read() - frame_disp = frame.copy() - - cv.putText(frame_disp, 'Select target ROI and press ENTER', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1.5, - (0, 0, 0), 1) + while True: + if self.camera_flag == True: + frame_disp = self.new_frame.copy() + t1 = time.time() + out = tracker.track(self.new_frame) + t2 = time.time() - t1 + #print('FPS: ', 1/t2) + + state = [int(s) for s in out['target_bbox'][1]] + output_boxes.append(state) + + cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), (0, 0, 255), 2) + + font_color = (0, 0, 255) + cv.putText(frame_disp, 'Tracking!', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 2, font_color, 1) + cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 2, font_color, 1) + cv.putText(frame_disp, 'Press q to quit', (20, 80), cv.FONT_HERSHEY_COMPLEX_SMALL, 2, font_color, 1) + + # Display the resulting frame cv.imshow(display_name, frame_disp) - x, y, w, h = cv.selectROI(display_name, frame_disp, fromCenter=False) - init_state = [x, y, w, h] - tracker.initialize(frame, _build_init_info(init_state)) - output_boxes.append(init_state) - + self.out.write(frame_disp) + key = cv.waitKey(1) + if key == ord('q'): + self.camera_thread = True + camera.join() + + break + + elif key == ord('r'): + frame_disp = self.new_frame.copy() + + cv.putText(frame_disp, 'Select target ROI and press ENTER', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 2.5, (0, 0, 255), 1) + + cv.imshow(display_name, frame_disp) + x, y, w, h = cv.selectROI(display_name, frame_disp, fromCenter=False) + init_state = [x, y, w, h] + tracker.initialize(self.new_frame, _build_init_info(init_state)) + output_boxes.append(init_state) + + self.camera_flag = False + + else: + time.sleep(0.01) + # When everything done, release the capture - cap.release() + self.cap.release() cv.destroyAllWindows() - + + if save_results: if not os.path.exists(self.results_dir): os.makedirs(self.results_dir) video_name = Path(videofilepath).stem base_results_path = os.path.join(self.results_dir, 'video_{}'.format(video_name)) - + tracked_bb = np.array(output_boxes).astype(int) bbox_file = '{}.txt'.format(base_results_path) np.savetxt(bbox_file, tracked_bb, delimiter='\t', fmt='%d') + def run_video_comm(self, optional_box=None, debug=None, visdom_info=None, save_results=False): + """Run the tracker with the communication with the Ground Station (socket programming) using rtsp. + args: + debug: Debug level. + """ + + params = self.get_parameters() + + debug_ = debug + if debug is None: + debug_ = getattr(params, 'debug', 0) + params.debug = debug_ + + params.tracker_name = self.name + params.param_name = self.parameter_name + self._init_visdom(visdom_info, debug_) + + multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) + + if multiobj_mode == 'default': + tracker = self.create_tracker(params) + if hasattr(tracker, 'initialize_features'): + tracker.initialize_features() + + elif multiobj_mode == 'parallel': + tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) + else: + raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) + + while True: + + camera = threading.Thread(target=self.camera_rtsp, args=(self.rtsp,)) + camera.start() + print('Camera thread Initiated') + + output_boxes = [] + + while True: + if self.camera_flag == True: + break + + self.conn = self.connection(self.localIP,self.localPort) + + while True: + self.message, self.address = self.receive_msg(self.conn, self.bufferSize) + if self.message == "Connection": + print('Connection Established.') + Mess_conn = "Connected" + self.send_msg(self.conn, self.address, Mess_conn) + + elif self.message == "Send Tracker Coordinates": + print('message', self.message) + cv.destroyAllWindows() + self.conn.close() + + elif self.message == "Close": + print('Ground Station is Closed, waiting for reconnection ...') + + + elif self.message == "Terminate": + print('Program terminated from Ground Station.') + + self.camera_thread = True + camera.join() + + self.cap.release() + cv.destroyAllWindows() + self.conn.close() + + return + + else: + self.initBB = (int(self.message.split(",")[0][1:]), int(self.message.split(",")[1][1:]), int(self.message.split(",")[2][1:]), + int(self.message.split(",")[3][1:(len(self.message.split(",")[3])-1)])) + print('Bounding Box from GCS:', self.initBB) + + break + + + while True: + + if self.camera_flag == True: + + def _build_init_info(box): + return {'init_bbox': OrderedDict({1: box}), 'init_object_ids': [1, ], 'object_ids': [1, ], + 'sequence_object_ids': [1, ]} + + if self.initAA[0] != self.initBB[0] or self.initAA[1] != self.initBB[1] or self.initAA[2] != self.initBB[2] or self.initAA[3] != self.initBB[3]: + init_state = self.initBB + tracker.initialize(self.frame, _build_init_info(init_state)) + output_boxes.append(init_state) + + self.initAA = self.initBB + frame_disp = self.frame.copy() + + if self.initBB is not None: + # Draw box + + out = tracker.track(self.frame) + state = [int(s) for s in out['target_bbox'][1]] + output_boxes.append(state) + + cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), + (0, 255, 0), 5) + + + self.message2, self.address2 = self.receive_msg(self.conn, self.bufferSize) + + if self.message2 == "Send Tracker Coordinates": + self.send_msg(self.conn, self.address2, state) + + elif self.message2 == "Close": + + print('Ground Station is Closed, waiting for reconnection ...') + cv.destroyAllWindows() + + self.camera_thread = True + camera.join() + + self.initBB = (0,0,0,0) + self.initAA = (0,0,0,0) + + break + + elif self.message2 == "Terminate": + print('Program terminated from Ground Station') + + self.camera_thread = True + camera.join() + + self.cap.release() + cv.destroyAllWindows() + self.conn.close() + + return + + self.camera_flag = False + + else: + time.sleep(0.01) + + # When everything done, release the capture + self.cap.release() + cv.destroyAllWindows() + self.conn.close() + + def run_webcam(self, debug=None, visdom_info=None): """Run the tracker with the webcam. args: @@ -415,242 +575,100 @@ def get_bb(self): return bb ui_control = UIControl() - cap = cv.VideoCapture(0) - display_name = 'Display: ' + self.name + + camera = threading.Thread(target=self.camera_rtsp, args=(self.rtsp,)) + camera.start() + print('Camera initiated') + + display_name = 'Display: ' + tracker.params.tracker_name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) + cv.setMouseCallback(display_name, ui_control.mouse_callback) next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() - while True: - # Capture frame-by-frame - ret, frame = cap.read() - frame_disp = frame.copy() - - info = OrderedDict() - info['previous_output'] = prev_output - - if ui_control.new_init: - ui_control.new_init = False - init_state = ui_control.get_bb() - - info['init_object_ids'] = [next_object_id, ] - info['init_bbox'] = OrderedDict({next_object_id: init_state}) - sequence_object_ids.append(next_object_id) - - next_object_id += 1 - - # Draw box - if ui_control.mode == 'select': - cv.rectangle(frame_disp, ui_control.get_tl(), ui_control.get_br(), (255, 0, 0), 2) - - if len(sequence_object_ids) > 0: - info['sequence_object_ids'] = sequence_object_ids - out = tracker.track(frame, info) - prev_output = OrderedDict(out) - - if 'segmentation' in out: - frame_disp = overlay_mask(frame_disp, out['segmentation']) - - if 'target_bbox' in out: - for obj_id, state in out['target_bbox'].items(): - state = [int(s) for s in state] - cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), - _tracker_disp_colors[obj_id], 5) - - # Put text - font_color = (0, 0, 0) - cv.putText(frame_disp, 'Select target', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) - cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, - font_color, 1) - cv.putText(frame_disp, 'Press q to quit', (20, 85), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, - font_color, 1) - - # Display the resulting frame - cv.imshow(display_name, frame_disp) - key = cv.waitKey(1) - if key == ord('q'): - break - elif key == ord('r'): - next_object_id = 1 - sequence_object_ids = [] - prev_output = OrderedDict() - info = OrderedDict() - - info['object_ids'] = [] - info['init_object_ids'] = [] - info['init_bbox'] = OrderedDict() - tracker.initialize(frame, info) - ui_control.mode = 'init' - - # When everything done, release the capture - cap.release() - cv.destroyAllWindows() - - def run_vot2020(self, debug=None, visdom_info=None): - params = self.get_parameters() - params.tracker_name = self.name - params.param_name = self.parameter_name - params.run_id = self.run_id - - debug_ = debug - if debug is None: - debug_ = getattr(params, 'debug', 0) - - if debug is None: - visualization_ = getattr(params, 'visualization', False) - else: - visualization_ = True if debug else False - - params.visualization = visualization_ - params.debug = debug_ - - self._init_visdom(visdom_info, debug_) - - tracker = self.create_tracker(params) - tracker.initialize_features() - - output_segmentation = tracker.predicts_segmentation_mask() - - import pytracking.evaluation.vot2020 as vot - - def _convert_anno_to_list(vot_anno): - vot_anno = [vot_anno[0], vot_anno[1], vot_anno[2], vot_anno[3]] - return vot_anno - - def _convert_image_path(image_path): - return image_path - - """Run tracker on VOT.""" - - if output_segmentation: - handle = vot.VOT("mask") - else: - handle = vot.VOT("rectangle") - - vot_anno = handle.region() - - image_path = handle.frame() - if not image_path: - return - image_path = _convert_image_path(image_path) - - image = self._read_image(image_path) - - if output_segmentation: - vot_anno_mask = vot.make_full_size(vot_anno, (image.shape[1], image.shape[0])) - bbox = masks_to_bboxes(torch.from_numpy(vot_anno_mask), fmt='t').squeeze().tolist() - else: - bbox = _convert_anno_to_list(vot_anno) - vot_anno_mask = None - - out = tracker.initialize(image, {'init_mask': vot_anno_mask, 'init_bbox': bbox}) - - if out is None: - out = {} - prev_output = OrderedDict(out) - - # Track while True: - image_path = handle.frame() - if not image_path: + if self.camera_flag == True: break - image_path = _convert_image_path(image_path) - - image = self._read_image(image_path) - - info = OrderedDict() - info['previous_output'] = prev_output - - out = tracker.track(image, info) - prev_output = OrderedDict(out) - if output_segmentation: - pred = out['segmentation'].astype(np.uint8) - else: - state = out['target_bbox'] - pred = vot.Rectangle(*state) - handle.report(pred, 1.0) - - segmentation = out['segmentation'] if 'segmentation' in out else None - if self.visdom is not None: - tracker.visdom_draw_tracking(image, out['target_bbox'], segmentation) - elif tracker.params.visualization: - self.visualize(image, out['target_bbox'], segmentation) - - - def run_vot(self, debug=None, visdom_info=None): - params = self.get_parameters() - params.tracker_name = self.name - params.param_name = self.parameter_name - params.run_id = self.run_id + while True: + if self.camera_flag == True: + frame_disp = self.frame.copy() - debug_ = debug - if debug is None: - debug_ = getattr(params, 'debug', 0) + info = OrderedDict() + info['previous_output'] = prev_output - if debug is None: - visualization_ = getattr(params, 'visualization', False) - else: - visualization_ = True if debug else False + if ui_control.new_init: + ui_control.new_init = False + init_state = ui_control.get_bb() - params.visualization = visualization_ - params.debug = debug_ + info['init_object_ids'] = [next_object_id, ] + info['init_bbox'] = OrderedDict({next_object_id: init_state}) + sequence_object_ids.append(next_object_id) - self._init_visdom(visdom_info, debug_) + next_object_id += 1 - tracker = self.create_tracker(params) - tracker.initialize_features() + # Draw box + if ui_control.mode == 'select': + cv.rectangle(frame_disp, ui_control.get_tl(), ui_control.get_br(), (255, 0, 0), 2) - import pytracking.evaluation.vot as vot + if len(sequence_object_ids) > 0: + info['sequence_object_ids'] = sequence_object_ids + out = tracker.track(self.frame, info) + prev_output = OrderedDict(out) - def _convert_anno_to_list(vot_anno): - vot_anno = [vot_anno[0][0][0], vot_anno[0][0][1], vot_anno[0][1][0], vot_anno[0][1][1], - vot_anno[0][2][0], vot_anno[0][2][1], vot_anno[0][3][0], vot_anno[0][3][1]] - return vot_anno + if 'segmentation' in out: + frame_disp = overlay_mask(frame_disp, out['segmentation']) - def _convert_image_path(image_path): - image_path_new = image_path[20:- 2] - return "".join(image_path_new) + if 'target_bbox' in out: + for obj_id, state in out['target_bbox'].items(): + state = [int(s) for s in state] + cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), + _tracker_disp_colors[obj_id], 5) - """Run tracker on VOT.""" - handle = vot.VOT("polygon") + # Put text + font_color = (0, 255, 0) + cv.putText(frame_disp, 'Select target', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) + cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) + cv.putText(frame_disp, 'Press q to quit', (20, 85), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) - vot_anno_polygon = handle.region() - vot_anno_polygon = _convert_anno_to_list(vot_anno_polygon) + # Display the resulting frame + cv.imshow(display_name, frame_disp) + self.out.write(frame_disp) + key = cv.waitKey(1) + if key == ord('q'): + self.camera_thread = True + camera.join() - init_state = convert_vot_anno_to_rect(vot_anno_polygon, tracker.params.vot_anno_conversion_type) + break - image_path = handle.frame() - if not image_path: - return - image_path = _convert_image_path(image_path) + elif key == ord('r'): + next_object_id = 1 + sequence_object_ids = [] + prev_output = OrderedDict() - image = self._read_image(image_path) - tracker.initialize(image, {'init_bbox': init_state}) + info = OrderedDict() - # Track - while True: - image_path = handle.frame() - if not image_path: - break - image_path = _convert_image_path(image_path) + info['object_ids'] = [] + info['init_object_ids'] = [] + info['init_bbox'] = OrderedDict() + tracker.initialize(self.frame, info) + ui_control.mode = 'init' - image = self._read_image(image_path) - out = tracker.track(image) - state = out['target_bbox'] + self.camera_flag = False + + else: + time.sleep(0.005) + - handle.report(vot.Rectangle(state[0], state[1], state[2], state[3])) + # When everything done, release the capture + self.cap.release() + cv.destroyAllWindows() - segmentation = out['segmentation'] if 'segmentation' in out else None - if self.visdom is not None: - tracker.visdom_draw_tracking(image, out['target_bbox'], segmentation) - elif tracker.params.visualization: - self.visualize(image, out['target_bbox'], segmentation) def get_parameters(self): """Get parameters.""" @@ -710,6 +728,3 @@ def press(self, event): def _read_image(self, image_file: str): im = cv.imread(image_file) return cv.cvtColor(im, cv.COLOR_BGR2RGB) - - - diff --git a/pytracking/run_video_communication.py b/pytracking/run_video_communication.py new file mode 100644 index 00000000..79f33731 --- /dev/null +++ b/pytracking/run_video_communication.py @@ -0,0 +1,41 @@ +''' +Execute this file when tracking with a Client Server setup. +''' + +import os +import sys +import argparse + +env_path = os.path.join(os.path.dirname(__file__), '..') +if env_path not in sys.path: + sys.path.append(env_path) + +from pytracking.evaluation import Tracker + + +def run_video_comm(tracker_name, tracker_param, optional_box=None, debug=None, save_results=False): + """Run the tracker with the communication with the Ground Station (socket programming) using rtsp. + args: + tracker_name: Name of tracking method. + tracker_param: Name of parameter file. + debug: Debug level. + """ + tracker = Tracker(tracker_name, tracker_param) + tracker.run_video_comm(optional_box=optional_box, debug=debug, save_results=save_results) + +def main(): + parser = argparse.ArgumentParser(description='Run the tracker on your webcam.') + parser.add_argument('tracker_name', type=str, help='Name of tracking method.') + parser.add_argument('tracker_param', type=str, help='Name of parameter file.') + parser.add_argument('--optional_box', type=float, default=None, nargs="+", help='optional_box with format x y w h.') + parser.add_argument('--debug', type=int, default=0, help='Debug level.') + parser.add_argument('--save_results', dest='save_results', action='store_true', help='Save bounding boxes') + parser.set_defaults(save_results=False) + + args = parser.parse_args() + + run_video_comm(args.tracker_name, args.tracker_param, args.optional_box, args.debug, args.save_results) + + +if __name__ == '__main__': + main()