face_detection.py

from imutils import face_utils
import numpy as np
import pyautogui as pyag
import imutils
import dlib
import cv2
import mediapipe as mp


# Thresholds and consecutive frame length for triggering the mouse action.
EYE_AR_THRESH = 0.19
EYE_AR_CONSECUTIVE_FRAMES = 15
BLINK_AR_CONSECUTIVE_FRAMES = 60
WINK_AR_DIFF_THRESH = 0.04
WINK_AR_CLOSE_THRESH = 0.19
WINK_CONSECUTIVE_FRAMES = 10

# Initialize the frame counters for each action as well as
# booleans used to indicate if action is performed or not
TOGGLE = 0
EYE_COUNTER = 0
BLINK_COUNTER = 0
L_BLINK_COUNTER = 0
R_BLINK_COUNTER = 0
WINK_COUNTER = 0
INPUT_MODE = False
EYE_CLICK = False
LEFT_WINK = False
RIGHT_WINK = False
SCROLL_MODE = False
# ANCHOR_POINT = (0, 0)
WHITE_COLOR = (255, 255, 255)
YELLOW_COLOR = (0, 255, 255)
RED_COLOR = (0, 0, 255)
GREEN_COLOR = (0, 255, 0)
BLUE_COLOR = (255, 0, 0)
BLACK_COLOR = (0, 0, 0)

shape_predictor = "C:/Users/Bishal/Documents/programming files/EyeConnect/shape_predictor_68_face_landmarks.dat"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(shape_predictor)

(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
# (nStart, nEnd) = face_utils.FACIAL_LANDMARKS_IDXS["nose"]
face_mesh = mp.solutions.face_mesh.FaceMesh(refine_landmarks=True)
screen_w, screen_h = pyag.size()
duration = 0.25
# Video capture
vid = cv2.VideoCapture(0)
resolution_w = 1366
resolution_h = 768
cam_w = 640
cam_h = 480
unit_w = resolution_w / cam_w
unit_h = resolution_h / cam_h

while True:
    _, frame = vid.read()
    frame = cv2.flip(frame, 1)
    frame = imutils.resize(frame, width=cam_w, height=cam_h)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    output = face_mesh.process(rgb_frame)

    landmark_points = output.multi_face_landmarks
    frame_h, frame_w, _ = frame.shape
    if landmark_points:
        landmarks = landmark_points[0].landmark
        for id, landmark in enumerate(landmarks[474:478]):
            x = 20 * int(landmark.x * frame_w)
            y = int(landmark.y * frame_h)
            cv2.circle(frame, (x, y), 3, (0, 255, 0))
            if id == 1:
                screen_x = screen_w * landmark.x
                screen_y = screen_h * landmark.y
                pyag.moveTo(
                    screen_x, screen_y, duration=duration
                )  # Add duration parameter
        left = [landmarks[145], landmarks[159]]
        for landmark in left:
            x = int(landmark.x * frame_w)
            y = int(landmark.y * frame_h)
            cv2.circle(frame, (x, y), 3, (0, 255, 255))

    rects = detector(gray, 0)
    if len(rects) > 0:
        rect = rects[0]
    else:
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        continue
    shape = predictor(gray, rect)
    shape = face_utils.shape_to_np(shape)

    leftEye = shape[lStart:lEnd]
    rightEye = shape[rStart:rEnd]
    # nose = shape[nStart:nEnd]

    temp = leftEye
    leftEye = rightEye
    rightEye = temp

    def eye_aspect_ratio(eye):
        # Compute the euclidean distances between the two sets of
        # vertical eye landmarks (x, y)-coordinates
        A = np.linalg.norm(eye[1] - eye[5])
        B = np.linalg.norm(eye[2] - eye[4])

        # Compute the euclidean distance between the horizontal
        # eye landmark (x, y)-coordinates
        C = np.linalg.norm(eye[0] - eye[3])

        # Compute the eye aspect ratio
        ear = (A + B) / (2.0 * C)

        # Return the eye aspect ratio
        return ear

    leftEAR = eye_aspect_ratio(leftEye)
    rightEAR = eye_aspect_ratio(rightEye)
    ear = (leftEAR + rightEAR) / 2.0
    diff_ear = np.abs(leftEAR - rightEAR)

    # nose_point = (nose[3, 0], nose[3, 1])

    leftEyeHull = cv2.convexHull(leftEye)
    rightEyeHull = cv2.convexHull(rightEye)
    cv2.drawContours(frame, [leftEyeHull], -1, YELLOW_COLOR, 1)
    cv2.drawContours(frame, [rightEyeHull], -1, YELLOW_COLOR, 1)

    for x, y in np.concatenate((leftEye, rightEye), axis=0):
        cv2.circle(frame, (x, y), 2, GREEN_COLOR, -1)
    landmark_points = output.multi_face_landmarks
    frame_h, frame_w, _ = frame.shape
    if landmark_points and INPUT_MODE == True:
        landmarks = landmark_points[0].landmark
        for id, landmark in enumerate(landmarks[474:478]):
            x = 20 * int(landmark.x * frame_w)
            y = int(landmark.y * frame_h)
            cv2.circle(frame, (x, y), 3, (0, 255, 0))
            if id == 1:
                screen_x = (screen_w * landmark.x) + 4
                screen_y = (screen_h * landmark.y) + 4
                pyag.moveTo(2 * screen_x, screen_y, duration=duration)

    if diff_ear > WINK_AR_DIFF_THRESH and INPUT_MODE == True:
        if leftEAR < rightEAR:
            if leftEAR < EYE_AR_THRESH:
                WINK_COUNTER += 1

                if WINK_COUNTER > WINK_CONSECUTIVE_FRAMES:
                    pyag.click(button="left")

                    WINK_COUNTER = 0

        elif leftEAR > rightEAR:
            if rightEAR < EYE_AR_THRESH:
                WINK_COUNTER += 1

                if WINK_COUNTER > WINK_CONSECUTIVE_FRAMES:
                    pyag.click(button="right")

                    WINK_COUNTER = 0
        else:
            WINK_COUNTER = 0
    else:
        if ear <= EYE_AR_THRESH:
            EYE_COUNTER += 1

            if EYE_COUNTER > EYE_AR_CONSECUTIVE_FRAMES:
                # SCROLL_MODE = not SCROLL_MODE
                # INPUT_MODE = not INPUT_MODE
                EYE_COUNTER = 0

                # nose point to draw a bounding box around it

        else:
            EYE_COUNTER = 0
            WINK_COUNTER = 0

    if leftEAR < 0.15:
        L_BLINK_COUNTER += 1
    if rightEAR < 0.15:
        R_BLINK_COUNTER += 1
    if L_BLINK_COUNTER >= 2 and R_BLINK_COUNTER >= 2:
        BLINK_COUNTER += 1
        if BLINK_COUNTER >= BLINK_AR_CONSECUTIVE_FRAMES:
            INPUT_MODE = not INPUT_MODE
            TOGGLE = 1
            L_BLINK_COUNTER = 0
            R_BLINK_COUNTER = 0
            BLINK_COUNTER = 0
    if INPUT_MODE:
        cv2.putText(
            frame,
            "READING INPUT!",
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            RED_COLOR,
            2,
        )
    if SCROLL_MODE:
        cv2.putText(
            frame,
            "SCROLL MODE IS ON!",
            (10, 60),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            RED_COLOR,
            2,
        )
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    if leftEAR < 0.15:
        L_BLINK_COUNTER += 1
    if rightEAR < 0.15:
        R_BLINK_COUNTER += 1
    if L_BLINK_COUNTER >= 2 and R_BLINK_COUNTER >= 2:
        if TOGGLE == 1 and BLINK_COUNTER >= BLINK_AR_CONSECUTIVE_FRAMES:
            INPUT_MODE = not INPUT_MODE
            TOGGLE = 0
            L_BLINK_COUNTER = 0
            R_BLINK_COUNTER = 0
            BLINK_COUNTER = 0
            break
            # If the `Esc` key was pressed, break from the loop
    if key == 27:
        break
cv2.destroyAllWindows()
vid.release()