fix predicting function

Former-commit-id: bb4b8bb
FecoDoo · Apr 1, 2022 · 201048e · 201048e
1 parent 206135d
commit 201048e
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 112 deletions.
diff --git a/core/decoding/decoder.py b/core/decoding/decoder.py
@@ -58,4 +58,4 @@ def __keras_decode(
             top_paths=top_paths,
         )
 
-        return [path.eval(session=k.get_session()) for path in decoded[0]]
+        return decoded[0]
diff --git a/core/model/lipnet.py b/core/model/lipnet.py
@@ -4,6 +4,7 @@
 
 import core.model.layers as layers
 import env
+import os
 
 
 ADAM_LEARN_RATE = 0.0001
@@ -96,13 +97,11 @@ def compile(self, optimizer=None):
             )
 
         self.model.compile(
-            loss={"ctc": lambda y_true, y_pred: y_pred}, optimizer=optimizer
+            loss={"ctc": lambda inputs, outputs: outputs}, optimizer=optimizer
         )
-        return self
 
-    def load_weights(self, path: str):
+    def load_weights(self, path: os.PathLike):
         self.model.load_weights(path)
-        return self
 
     @staticmethod
     def get_input_shape(
@@ -114,8 +113,8 @@ def get_input_shape(
             return frame_count, image_width, image_height, image_channels
 
     def predict(self, input_batch):
-        return self.capture_softmax_output([input_batch, 0])[0]
+        return self.capture_softmax_output([input_batch])[0]
 
     @property
     def capture_softmax_output(self):
-        return k.function([self.input_layer, k.learning_phase()], [self.y_pred])
+        return k.function([self.input_layer], [self.y_pred])
diff --git a/predict.py b/predict.py
@@ -1,14 +1,13 @@
 import argparse
 import csv
 import os
-from typing import NamedTuple, List
-from pathlib import Path
 import numpy as np
 import skvideo.io
-from colorama import Fore, init
-
-import dlib
 import env
+import logging
+
+from typing import NamedTuple, List
+from pathlib import Path
 from common.decode import create_decoder
 from common.iters import chunks
 from core.helpers.video import (
@@ -17,23 +16,24 @@
 )
 from core.model.lipnet import LipNet
 from core.utils.visualization import visualize_video_subtitle
-from utils.extract import extract_video_data
+from utils.converter import Converter
 
 
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-init(autoreset=True)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = env.TF_CPP_MIN_LOG_LEVEL
+logger = logging.getLogger("predicting")
+logger.setLevel(logging.DEBUG)
+logger.addHandler(logging.StreamHandler())
 
-
-ROOT_PATH = os.path.dirname(os.path.realpath(__file__))
-DICTIONARY_PATH = os.path.realpath(
-    os.path.join(ROOT_PATH, "data", "dictionaries", "grid.txt")
-)
+# define paths
+ROOT = Path(os.path.dirname(os.path.realpath(__file__))).resolve()
+DICTIONARY_PATH = ROOT.joinpath(env.DICTIONARY_PATH)
+VIDEO_PATH = ROOT.joinpath(env.VIDEO_PATH)
+MODEL_PATH = ROOT.joinpath(env.MODEL_PATH)
 
 
 class PredictConfig(NamedTuple):
-    weights: str
-    video_path: str
-    predictor_path: str
+    model: os.PathLike
+    video_path: os.PathLike
     frame_count: int = env.FRAME_COUNT
     image_width: int = env.IMAGE_WIDTH
     image_height: int = env.IMAGE_HEIGHT
@@ -44,87 +44,37 @@ class PredictConfig(NamedTuple):
 def main():
     """
     Entry point of the script for using a trained model for predicting videos.
-    i.e: python predict.py -w data/res/2018-09-26-02-30/lipnet_065_1.96.hdf5 -v data/dataset_eval
     """
 
-    print(
-        r"""
-   __         __     ______   __   __     ______     ______  
-  /\ \       /\ \   /\  == \ /\ "-.\ \   /\  ___\   /\__  _\ 
-  \ \ \____  \ \ \  \ \  _-/ \ \ \-.  \  \ \  __\   \/_/\ \/ 
-   \ \_____\  \ \_\  \ \_\    \ \_\\"\_\  \ \_____\    \ \_\ 
-    \/_____/   \/_/   \/_/     \/_/ \/_/   \/_____/     \/_/ 
-
-  implemented by Omar Salinas
-	"""
-    )
-
-    ap = argparse.ArgumentParser()
+    model = MODEL_PATH
+    video = VIDEO_PATH
 
-    ap.add_argument(
-        "-v",
-        "--video-path",
-        required=True,
-        help="Path to video file or batch directory to analize",
-    )
-    ap.add_argument(
-        "-w", "--weights-path", required=True, help="Path to .hdf5 trained weights file"
-    )
-
-    default_predictor = os.path.join(
-        __file__, "..", "data", "predictors", "shape_predictor_68_face_landmarks.dat"
-    )
-    ap.add_argument(
-        "-pp",
-        "--predictor-path",
-        required=False,
-        help="(Optional) Path to the predictor .dat file",
-        default=default_predictor,
-    )
-
-    args = vars(ap.parse_args())
-
-    weights = Path(os.path.realpath(args["weights_path"]))
-    video = Path(os.path.realpath(args["video_path"]))
-    predictor_path = Path(os.path.realpath(args["predictor_path"]))
-
-    if not weights.is_file() or weights.suffix != ".hdf5":
-        print(Fore.RED + "\nERROR: Trained weights path is not a valid file")
+    if not model.is_file() or model.suffix != ".h5":
+        logger.error("\nERROR: Trained weights path is not a valid file")
         return
 
     if not video.is_file() and not video.is_dir():
-        print(
-            Fore.RED + "\nERROR: Path does not point to a video file nor to a directory"
-        )
-        return
-
-    if not predictor_path.is_file() or predictor_path.suffix != ".dat":
-        print(Fore.RED + "\nERROR: Predictor path is not a valid file")
+        logger.error("\nERROR: Path does not point to a video file nor to a directory")
         return
 
-    config = PredictConfig(weights, video, predictor_path)
+    config = PredictConfig(model, video)
     predict(config)
 
 
 def predict(config: PredictConfig):
-    print("\nPREDICTION\n")
+    logger.info("Loading weights at: {}".format(config.model))
 
-    print("Loading weights at: {}".format(config.weights))
-    print("Using predictor at: {}".format(config.predictor_path))
+    logger.info("\nMaking predictions...\n")
 
-    print("\nMaking predictions...\n")
-
-    lipnet = (
-        LipNet(
-            config.frame_count,
-            config.image_channels,
-            config.image_height,
-            config.image_width,
-            config.max_string,
-        )
-        .compile_model()
-        .load_weights(config.weights)
+    lipnet = LipNet(
+        config.frame_count,
+        config.image_channels,
+        config.image_height,
+        config.image_width,
+        config.max_string,
     )
+    lipnet.compile()
+    lipnet.load_weights(config.model)
 
     valid_paths = []
     input_lengths = []
@@ -133,9 +83,7 @@ def predict(config: PredictConfig):
     elapsed_videos = 0
     video_paths = get_list_of_videos(config.video_path)
 
-    for paths, lengths, y_pred in predict_batches(
-        lipnet, video_paths, config.predictor_path
-    ):
+    for paths, lengths, y_pred in predict_batches(lipnet, video_paths):
         valid_paths += paths
         input_lengths += lengths
 
@@ -146,7 +94,7 @@ def predict(config: PredictConfig):
         y_pred_len = len(y_pred)
         elapsed_videos += y_pred_len
 
-        print(
+        logger.info(
             "Predicted batch of {} videos\t({} elapsed)".format(
                 y_pred_len, elapsed_videos
             )
@@ -155,14 +103,12 @@ def predict(config: PredictConfig):
     decoder = create_decoder(DICTIONARY_PATH)
     results = decode_predictions(predictions, input_lengths, decoder)
 
-    print("\n\nRESULTS:\n")
+    logger.info("\n\nRESULTS:\n")
 
     display = query_yes_no("List all prediction outputs?", True)
     visualize = query_yes_no("Visualize as video captions?", False)
 
-    print()
-
-    save_csv = query_yes_no("Save prediction outputs to a .csv file?", True)
+    save_csv = query_yes_no("Save prediction outputs to a .csv file?", False)
 
     if save_csv:
         csv_path = query_save_csv_path()
@@ -174,10 +120,10 @@ def predict(config: PredictConfig):
 
 def get_list_of_videos(path: os.PathLike) -> List[str]:
     if path.is_file():
-        print("Predicting for video at: {}".format(path))
+        logger.info("Predicting for video at: {}".format(path))
         video_paths = [path]
     else:
-        print("Predicting batch at: {}".format(path))
+        logger.info("Predicting batch at: {}".format(path))
         video_paths = get_video_files_in_dir(path)
 
     return video_paths
@@ -187,9 +133,9 @@ def get_video_files_in_dir(path: str) -> List[str]:
     return [f for ext in ["*.mpg", "*.npy"] for f in path.glob(ext)]
 
 
-def get_video_data(path: str, detector, predictor) -> np.ndarray:
+def get_video_data(path: str, converter) -> np.ndarray:
     if path.suffix == ".mpg":
-        data = extract_video_data(path, detector, predictor, False)
+        data = converter.extract_video_data(path)
         return reshape_and_normalize_video_data(data) if data is not None else None
     else:
         return get_video_data_from_file(path)
@@ -202,14 +148,13 @@ def get_entire_video_data(path: str) -> np.ndarray:
         return get_video_data_from_file(path)
 
 
-def predict_batches(lipnet: LipNet, video_paths: List[str], predictor_path: str):
+def predict_batches(lipnet: LipNet, video_paths: List[str]):
     batch_size = env.BATCH_SIZE
 
-    detector = dlib.get_frontal_face_detector()
-    predictor = dlib.shape_predictor(predictor_path)
+    converter = Converter(logger)
 
     for paths in chunks(video_paths, batch_size):
-        input_data = [(p, get_video_data(p, detector, predictor)) for p in paths]
+        input_data = [(p, get_video_data(p, converter)) for p in paths]
         input_data = [x for x in input_data if x[1] is not None]
 
         if len(input_data) <= 0:
@@ -256,7 +201,7 @@ def display_results(
 
     for p, r in zip(valid_paths, results):
         if display:
-            print("\nVideo: {}\n    Result: {}".format(p, r))
+            logger.info("\nVideo: {}\n    Result: {}".format(p, r))
 
         if visualize:
             v = get_entire_video_data(p)

diff --git a/preprocessing.py b/preprocessing.py
@@ -83,7 +83,7 @@ def manager():
                 logger.error(f"Group {i} is not a directory")
                 groups.remove(i)
                 continue
-                
+
         with Pool(processes=None) as pool:
             res = [
                 pool.apply_async(

diff --git a/train.py b/train.py
@@ -58,9 +58,7 @@ def main():
 
     timestamp = datetime.utcnow().timestamp()
 
-    config = TrainingConfig(
-        dataset_path, aligns_path
-    )
+    config = TrainingConfig(dataset_path, aligns_path)
 
     train(timestamp, config)
 
@@ -90,7 +88,8 @@ def train(timestamp: float, config: TrainingConfig):
         config.image_height,
         config.image_width,
         config.max_string,
-    ).compile_model()
+    )
+    lipnet.compile()
 
     datagen = DatasetGenerator(
         config.dataset_path,
@@ -107,7 +106,7 @@ def train(timestamp: float, config: TrainingConfig):
 
     start_time = time.time()
 
-    lipnet.model.fit(
+    lipnet.fit(
         x=datagen.train_generator,
         validation_data=datagen.val_generator,
         epochs=config.epochs,