From 201048eaa579aa780c5cdd7d73fe9b475ce3fa0c Mon Sep 17 00:00:00 2001 From: fecodoo Date: Fri, 1 Apr 2022 22:35:24 +0300 Subject: [PATCH] fix predicting function Former-commit-id: bb4b8bb75c0069015eb886f50c9937d6c54a5a2d --- core/decoding/decoder.py | 2 +- core/model/lipnet.py | 11 ++- predict.py | 143 ++++++++++++--------------------------- preprocessing.py | 2 +- train.py | 9 ++- 5 files changed, 55 insertions(+), 112 deletions(-) diff --git a/core/decoding/decoder.py b/core/decoding/decoder.py index d9054b4..b0b7cc3 100644 --- a/core/decoding/decoder.py +++ b/core/decoding/decoder.py @@ -58,4 +58,4 @@ def __keras_decode( top_paths=top_paths, ) - return [path.eval(session=k.get_session()) for path in decoded[0]] + return decoded[0] diff --git a/core/model/lipnet.py b/core/model/lipnet.py index 9da3df1..a6d0c37 100644 --- a/core/model/lipnet.py +++ b/core/model/lipnet.py @@ -4,6 +4,7 @@ import core.model.layers as layers import env +import os ADAM_LEARN_RATE = 0.0001 @@ -96,13 +97,11 @@ def compile(self, optimizer=None): ) self.model.compile( - loss={"ctc": lambda y_true, y_pred: y_pred}, optimizer=optimizer + loss={"ctc": lambda inputs, outputs: outputs}, optimizer=optimizer ) - return self - def load_weights(self, path: str): + def load_weights(self, path: os.PathLike): self.model.load_weights(path) - return self @staticmethod def get_input_shape( @@ -114,8 +113,8 @@ def get_input_shape( return frame_count, image_width, image_height, image_channels def predict(self, input_batch): - return self.capture_softmax_output([input_batch, 0])[0] + return self.capture_softmax_output([input_batch])[0] @property def capture_softmax_output(self): - return k.function([self.input_layer, k.learning_phase()], [self.y_pred]) + return k.function([self.input_layer], [self.y_pred]) diff --git a/predict.py b/predict.py index 574fb0b..e501e9a 100644 --- a/predict.py +++ b/predict.py @@ -1,14 +1,13 @@ import argparse import csv import os -from typing import NamedTuple, List -from pathlib import Path import numpy as np import skvideo.io -from colorama import Fore, init - -import dlib import env +import logging + +from typing import NamedTuple, List +from pathlib import Path from common.decode import create_decoder from common.iters import chunks from core.helpers.video import ( @@ -17,23 +16,24 @@ ) from core.model.lipnet import LipNet from core.utils.visualization import visualize_video_subtitle -from utils.extract import extract_video_data +from utils.converter import Converter -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -init(autoreset=True) +os.environ["TF_CPP_MIN_LOG_LEVEL"] = env.TF_CPP_MIN_LOG_LEVEL +logger = logging.getLogger("predicting") +logger.setLevel(logging.DEBUG) +logger.addHandler(logging.StreamHandler()) - -ROOT_PATH = os.path.dirname(os.path.realpath(__file__)) -DICTIONARY_PATH = os.path.realpath( - os.path.join(ROOT_PATH, "data", "dictionaries", "grid.txt") -) +# define paths +ROOT = Path(os.path.dirname(os.path.realpath(__file__))).resolve() +DICTIONARY_PATH = ROOT.joinpath(env.DICTIONARY_PATH) +VIDEO_PATH = ROOT.joinpath(env.VIDEO_PATH) +MODEL_PATH = ROOT.joinpath(env.MODEL_PATH) class PredictConfig(NamedTuple): - weights: str - video_path: str - predictor_path: str + model: os.PathLike + video_path: os.PathLike frame_count: int = env.FRAME_COUNT image_width: int = env.IMAGE_WIDTH image_height: int = env.IMAGE_HEIGHT @@ -44,87 +44,37 @@ class PredictConfig(NamedTuple): def main(): """ Entry point of the script for using a trained model for predicting videos. - i.e: python predict.py -w data/res/2018-09-26-02-30/lipnet_065_1.96.hdf5 -v data/dataset_eval """ - print( - r""" - __ __ ______ __ __ ______ ______ - /\ \ /\ \ /\ == \ /\ "-.\ \ /\ ___\ /\__ _\ - \ \ \____ \ \ \ \ \ _-/ \ \ \-. \ \ \ __\ \/_/\ \/ - \ \_____\ \ \_\ \ \_\ \ \_\\"\_\ \ \_____\ \ \_\ - \/_____/ \/_/ \/_/ \/_/ \/_/ \/_____/ \/_/ - - implemented by Omar Salinas - """ - ) - - ap = argparse.ArgumentParser() + model = MODEL_PATH + video = VIDEO_PATH - ap.add_argument( - "-v", - "--video-path", - required=True, - help="Path to video file or batch directory to analize", - ) - ap.add_argument( - "-w", "--weights-path", required=True, help="Path to .hdf5 trained weights file" - ) - - default_predictor = os.path.join( - __file__, "..", "data", "predictors", "shape_predictor_68_face_landmarks.dat" - ) - ap.add_argument( - "-pp", - "--predictor-path", - required=False, - help="(Optional) Path to the predictor .dat file", - default=default_predictor, - ) - - args = vars(ap.parse_args()) - - weights = Path(os.path.realpath(args["weights_path"])) - video = Path(os.path.realpath(args["video_path"])) - predictor_path = Path(os.path.realpath(args["predictor_path"])) - - if not weights.is_file() or weights.suffix != ".hdf5": - print(Fore.RED + "\nERROR: Trained weights path is not a valid file") + if not model.is_file() or model.suffix != ".h5": + logger.error("\nERROR: Trained weights path is not a valid file") return if not video.is_file() and not video.is_dir(): - print( - Fore.RED + "\nERROR: Path does not point to a video file nor to a directory" - ) - return - - if not predictor_path.is_file() or predictor_path.suffix != ".dat": - print(Fore.RED + "\nERROR: Predictor path is not a valid file") + logger.error("\nERROR: Path does not point to a video file nor to a directory") return - config = PredictConfig(weights, video, predictor_path) + config = PredictConfig(model, video) predict(config) def predict(config: PredictConfig): - print("\nPREDICTION\n") + logger.info("Loading weights at: {}".format(config.model)) - print("Loading weights at: {}".format(config.weights)) - print("Using predictor at: {}".format(config.predictor_path)) + logger.info("\nMaking predictions...\n") - print("\nMaking predictions...\n") - - lipnet = ( - LipNet( - config.frame_count, - config.image_channels, - config.image_height, - config.image_width, - config.max_string, - ) - .compile_model() - .load_weights(config.weights) + lipnet = LipNet( + config.frame_count, + config.image_channels, + config.image_height, + config.image_width, + config.max_string, ) + lipnet.compile() + lipnet.load_weights(config.model) valid_paths = [] input_lengths = [] @@ -133,9 +83,7 @@ def predict(config: PredictConfig): elapsed_videos = 0 video_paths = get_list_of_videos(config.video_path) - for paths, lengths, y_pred in predict_batches( - lipnet, video_paths, config.predictor_path - ): + for paths, lengths, y_pred in predict_batches(lipnet, video_paths): valid_paths += paths input_lengths += lengths @@ -146,7 +94,7 @@ def predict(config: PredictConfig): y_pred_len = len(y_pred) elapsed_videos += y_pred_len - print( + logger.info( "Predicted batch of {} videos\t({} elapsed)".format( y_pred_len, elapsed_videos ) @@ -155,14 +103,12 @@ def predict(config: PredictConfig): decoder = create_decoder(DICTIONARY_PATH) results = decode_predictions(predictions, input_lengths, decoder) - print("\n\nRESULTS:\n") + logger.info("\n\nRESULTS:\n") display = query_yes_no("List all prediction outputs?", True) visualize = query_yes_no("Visualize as video captions?", False) - print() - - save_csv = query_yes_no("Save prediction outputs to a .csv file?", True) + save_csv = query_yes_no("Save prediction outputs to a .csv file?", False) if save_csv: csv_path = query_save_csv_path() @@ -174,10 +120,10 @@ def predict(config: PredictConfig): def get_list_of_videos(path: os.PathLike) -> List[str]: if path.is_file(): - print("Predicting for video at: {}".format(path)) + logger.info("Predicting for video at: {}".format(path)) video_paths = [path] else: - print("Predicting batch at: {}".format(path)) + logger.info("Predicting batch at: {}".format(path)) video_paths = get_video_files_in_dir(path) return video_paths @@ -187,9 +133,9 @@ def get_video_files_in_dir(path: str) -> List[str]: return [f for ext in ["*.mpg", "*.npy"] for f in path.glob(ext)] -def get_video_data(path: str, detector, predictor) -> np.ndarray: +def get_video_data(path: str, converter) -> np.ndarray: if path.suffix == ".mpg": - data = extract_video_data(path, detector, predictor, False) + data = converter.extract_video_data(path) return reshape_and_normalize_video_data(data) if data is not None else None else: return get_video_data_from_file(path) @@ -202,14 +148,13 @@ def get_entire_video_data(path: str) -> np.ndarray: return get_video_data_from_file(path) -def predict_batches(lipnet: LipNet, video_paths: List[str], predictor_path: str): +def predict_batches(lipnet: LipNet, video_paths: List[str]): batch_size = env.BATCH_SIZE - detector = dlib.get_frontal_face_detector() - predictor = dlib.shape_predictor(predictor_path) + converter = Converter(logger) for paths in chunks(video_paths, batch_size): - input_data = [(p, get_video_data(p, detector, predictor)) for p in paths] + input_data = [(p, get_video_data(p, converter)) for p in paths] input_data = [x for x in input_data if x[1] is not None] if len(input_data) <= 0: @@ -256,7 +201,7 @@ def display_results( for p, r in zip(valid_paths, results): if display: - print("\nVideo: {}\n Result: {}".format(p, r)) + logger.info("\nVideo: {}\n Result: {}".format(p, r)) if visualize: v = get_entire_video_data(p) diff --git a/preprocessing.py b/preprocessing.py index dae678b..6222be9 100644 --- a/preprocessing.py +++ b/preprocessing.py @@ -83,7 +83,7 @@ def manager(): logger.error(f"Group {i} is not a directory") groups.remove(i) continue - + with Pool(processes=None) as pool: res = [ pool.apply_async( diff --git a/train.py b/train.py index d6209d8..c5c2071 100644 --- a/train.py +++ b/train.py @@ -58,9 +58,7 @@ def main(): timestamp = datetime.utcnow().timestamp() - config = TrainingConfig( - dataset_path, aligns_path - ) + config = TrainingConfig(dataset_path, aligns_path) train(timestamp, config) @@ -90,7 +88,8 @@ def train(timestamp: float, config: TrainingConfig): config.image_height, config.image_width, config.max_string, - ).compile_model() + ) + lipnet.compile() datagen = DatasetGenerator( config.dataset_path, @@ -107,7 +106,7 @@ def train(timestamp: float, config: TrainingConfig): start_time = time.time() - lipnet.model.fit( + lipnet.fit( x=datagen.train_generator, validation_data=datagen.val_generator, epochs=config.epochs,