From 8c6dad014765244c3bf35bd0e7c7c0f610bee7df Mon Sep 17 00:00:00 2001
From: xuwei <xuwei@heygen.com>
Date: Thu, 14 Dec 2023 06:57:49 +0000
Subject: [PATCH 1/2] fix(represent): 1. support represent user given image 2.
 img_path support pathlib.Path

---
 deepface/DeepFace.py          | 41 +++++++++++++++++++++++++----------
 deepface/commons/functions.py |  8 +++++++
 tests/unit_tests.py           | 21 ++++++++++++++++++
 3 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py
index 9edd0be27..a5e2d48e7 100644
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@@ -674,16 +674,30 @@ def represent(
             This might be convenient for low resolution images.
 
             detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
-            dlib, mediapipe or yolov8.
+            dlib, mediapipe or yolov8. A special value `skip` could be used to skip face-detection
+            and only encode the given image.
 
             align (boolean): alignment according to the eye positions.
 
             normalization (string): normalize the input image before feeding to model
 
     Returns:
-            Represent function returns a list of object with multidimensional vector (embedding).
-            The number of dimensions is changing based on the reference model.
-            E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
+            Represent function returns a list of object, each object has fields as follows:
+            {
+                // Multidimensional vector
+                // The number of dimensions is changing based on the reference model.
+                // E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
+                "embedding": np.array, 
+                
+                // Detected Facial-Area by Face detection in dict format.
+                // (x, y) is left-corner point, and (w, h) is the width and height
+                // If `detector_backend` == `skip`, it is the full image area and nonsense.
+                "facial_area": dict{"x": int, "y": int, "w": int, "h": int},
+                
+                // Face detection confidence. 
+                // If `detector_backend` == `skip`, will be 0 and nonsense.
+                "face_confidence": float
+            }
     """
     resp_objs = []
 
@@ -702,23 +716,23 @@ def represent(
             align=align,
         )
     else:  # skip
-        if isinstance(img_path, str):
-            img = functions.load_image(img_path)
-        elif type(img_path).__module__ == np.__name__:
+        if type(img_path).__module__ == np.__name__:
             img = img_path.copy()
         else:
-            raise ValueError(f"unexpected type for img_path - {type(img_path)}")
+            # Try load. If load error, will raise exception internal
+            img, _ = functions.load_image(img_path)
         # --------------------------------
         if len(img.shape) == 4:
             img = img[0]  # e.g. (1, 224, 224, 3) to (224, 224, 3)
         if len(img.shape) == 3:
             img = cv2.resize(img, target_size)
-            img = np.expand_dims(img, axis=0)
-            # when represent is called from verify, this is already normalized
+            img = np.expand_dims(img, axis=0) # Why we remove a axis=0 previously and here expand one?
+            # when represent is called from verify, this is already normalized. But needed when user given.
             if img.max() > 1:
-                img /= 255
+                img = img.astype(np.float32) / 255.
         # --------------------------------
-        img_region = [0, 0, img.shape[1], img.shape[0]]
+        # make dummy region and confidence to keep compatibility with `extract_faces`
+        img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]} 
         img_objs = [(img, img_region, 0)]
     # ---------------------------------
 
@@ -731,6 +745,9 @@ def represent(
             # model.predict causes memory issue when it is called in a for loop
             # embedding = model.predict(img, verbose=0)[0].tolist()
             embedding = model(img, training=False).numpy()[0].tolist()
+            # if you still get verbose logging. try call
+            # - `tf.keras.utils.disable_interactive_logging()` 
+            # in your main program
         else:
             # SFace and Dlib are not keras models and no verbose arguments
             embedding = model.predict(img)[0].tolist()
diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py
index e368defd1..b8be211bd 100644
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@@ -94,6 +94,14 @@ def load_image(img):
     if type(img).__module__ == np.__name__:
         return img, None
 
+    try:
+        # Test whether img is a Python3's Path. If hit, tranform to str to let following logic work.
+        from pathlib import Path
+        if isinstance(img, Path):
+            img = str(img)
+    except ImportError:
+        pass
+
     # The image is a base64 string
     if img.startswith("data:image/"):
         return loadBase64Img(img), None
diff --git a/tests/unit_tests.py b/tests/unit_tests.py
index 57d71c50c..51617b4e2 100644
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@@ -127,6 +127,27 @@ def test_cases():
     assert exception_thrown is False
     # -------------------------------------------
 
+    # Test represent on user-given image (skip detector)
+    try:
+        face_img = dataset[1][0] # It's a face
+        img_objs = DeepFace.represent(img_path=face_img, detector_backend="skip")
+        assert len(img_objs) == 1
+        img_obj = img_objs[0]
+        assert "embedding" in img_obj.keys()
+        assert "facial_area" in img_obj.keys()
+        assert isinstance(img_obj["facial_area"], dict)
+        assert "x" in img_obj["facial_area"].keys()
+        assert "y" in img_obj["facial_area"].keys()
+        assert "w" in img_obj["facial_area"].keys()
+        assert "h" in img_obj["facial_area"].keys()
+        assert "face_confidence" in img_obj.keys()
+        exception_thrown = False
+    except Exception as e:
+        exception_thrown = True
+
+    assert exception_thrown is False
+
+    # -------------------------------------------
     logger.info("-----------------------------------------")
 
     logger.info("Extract faces test")

From 7b1451ac502f168a6a78d435d79cf8b1f3ae9adf Mon Sep 17 00:00:00 2001
From: xuwei <xuwei@heygen.com>
Date: Thu, 14 Dec 2023 12:29:35 +0000
Subject: [PATCH 2/2] fix(represent): change somthing according to comments and
 adapt pylint

---
 .vscode/settings.json         |  7 ++++++-
 deepface/DeepFace.py          | 28 +++++++++++++---------------
 deepface/commons/functions.py |  9 ++-------
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index f5d9a8322..407926896 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -9,5 +9,10 @@
   "python.formatting.provider": "black",
   "python.formatting.blackArgs": ["--line-length=100"],
   "editor.fontWeight": "normal",
-  "python.analysis.extraPaths": ["./deepface"]
+  "python.analysis.extraPaths": [
+    "./deepface"
+  ],
+  "black-formatter.args": [
+    "--line-length=100"
+  ]
 }
diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py
index a5e2d48e7..8058c6fc3 100644
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@@ -686,15 +686,16 @@ def represent(
             {
                 // Multidimensional vector
                 // The number of dimensions is changing based on the reference model.
-                // E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
-                "embedding": np.array, 
-                
+                // E.g. FaceNet returns 128 dimensional vector;
+                //      VGG-Face returns 2622 dimensional vector.
+                "embedding": np.array,
+
                 // Detected Facial-Area by Face detection in dict format.
                 // (x, y) is left-corner point, and (w, h) is the width and height
                 // If `detector_backend` == `skip`, it is the full image area and nonsense.
                 "facial_area": dict{"x": int, "y": int, "w": int, "h": int},
-                
-                // Face detection confidence. 
+
+                // Face detection confidence.
                 // If `detector_backend` == `skip`, will be 0 and nonsense.
                 "face_confidence": float
             }
@@ -716,23 +717,20 @@ def represent(
             align=align,
         )
     else:  # skip
-        if type(img_path).__module__ == np.__name__:
-            img = img_path.copy()
-        else:
-            # Try load. If load error, will raise exception internal
-            img, _ = functions.load_image(img_path)
+        # Try load. If load error, will raise exception internal
+        img, _ = functions.load_image(img_path)
         # --------------------------------
         if len(img.shape) == 4:
             img = img[0]  # e.g. (1, 224, 224, 3) to (224, 224, 3)
         if len(img.shape) == 3:
             img = cv2.resize(img, target_size)
-            img = np.expand_dims(img, axis=0) # Why we remove a axis=0 previously and here expand one?
-            # when represent is called from verify, this is already normalized. But needed when user given.
+            img = np.expand_dims(img, axis=0)
+            # when called from verify, this is already normalized. But needed when user given.
             if img.max() > 1:
-                img = img.astype(np.float32) / 255.
+                img = img.astype(np.float32) / 255.0
         # --------------------------------
         # make dummy region and confidence to keep compatibility with `extract_faces`
-        img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]} 
+        img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]}
         img_objs = [(img, img_region, 0)]
     # ---------------------------------
 
@@ -746,7 +744,7 @@ def represent(
             # embedding = model.predict(img, verbose=0)[0].tolist()
             embedding = model(img, training=False).numpy()[0].tolist()
             # if you still get verbose logging. try call
-            # - `tf.keras.utils.disable_interactive_logging()` 
+            # - `tf.keras.utils.disable_interactive_logging()`
             # in your main program
         else:
             # SFace and Dlib are not keras models and no verbose arguments
diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py
index b8be211bd..c4bd947b2 100644
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@@ -94,13 +94,8 @@ def load_image(img):
     if type(img).__module__ == np.__name__:
         return img, None
 
-    try:
-        # Test whether img is a Python3's Path. If hit, tranform to str to let following logic work.
-        from pathlib import Path
-        if isinstance(img, Path):
-            img = str(img)
-    except ImportError:
-        pass
+    if isinstance(img, Path):
+        img = str(img)
 
     # The image is a base64 string
     if img.startswith("data:image/"):