Skip to content

Commit

Permalink
fix predicting function
Browse files Browse the repository at this point in the history
Former-commit-id: bb4b8bb
  • Loading branch information
FecoDoo committed Apr 1, 2022
1 parent 206135d commit 201048e
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 112 deletions.
2 changes: 1 addition & 1 deletion core/decoding/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ def __keras_decode(
top_paths=top_paths,
)

return [path.eval(session=k.get_session()) for path in decoded[0]]
return decoded[0]
11 changes: 5 additions & 6 deletions core/model/lipnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import core.model.layers as layers
import env
import os


ADAM_LEARN_RATE = 0.0001
Expand Down Expand Up @@ -96,13 +97,11 @@ def compile(self, optimizer=None):
)

self.model.compile(
loss={"ctc": lambda y_true, y_pred: y_pred}, optimizer=optimizer
loss={"ctc": lambda inputs, outputs: outputs}, optimizer=optimizer
)
return self

def load_weights(self, path: str):
def load_weights(self, path: os.PathLike):
self.model.load_weights(path)
return self

@staticmethod
def get_input_shape(
Expand All @@ -114,8 +113,8 @@ def get_input_shape(
return frame_count, image_width, image_height, image_channels

def predict(self, input_batch):
return self.capture_softmax_output([input_batch, 0])[0]
return self.capture_softmax_output([input_batch])[0]

@property
def capture_softmax_output(self):
return k.function([self.input_layer, k.learning_phase()], [self.y_pred])
return k.function([self.input_layer], [self.y_pred])
143 changes: 44 additions & 99 deletions predict.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import argparse
import csv
import os
from typing import NamedTuple, List
from pathlib import Path
import numpy as np
import skvideo.io
from colorama import Fore, init

import dlib
import env
import logging

from typing import NamedTuple, List
from pathlib import Path
from common.decode import create_decoder
from common.iters import chunks
from core.helpers.video import (
Expand All @@ -17,23 +16,24 @@
)
from core.model.lipnet import LipNet
from core.utils.visualization import visualize_video_subtitle
from utils.extract import extract_video_data
from utils.converter import Converter


os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
init(autoreset=True)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = env.TF_CPP_MIN_LOG_LEVEL
logger = logging.getLogger("predicting")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())


ROOT_PATH = os.path.dirname(os.path.realpath(__file__))
DICTIONARY_PATH = os.path.realpath(
os.path.join(ROOT_PATH, "data", "dictionaries", "grid.txt")
)
# define paths
ROOT = Path(os.path.dirname(os.path.realpath(__file__))).resolve()
DICTIONARY_PATH = ROOT.joinpath(env.DICTIONARY_PATH)
VIDEO_PATH = ROOT.joinpath(env.VIDEO_PATH)
MODEL_PATH = ROOT.joinpath(env.MODEL_PATH)


class PredictConfig(NamedTuple):
weights: str
video_path: str
predictor_path: str
model: os.PathLike
video_path: os.PathLike
frame_count: int = env.FRAME_COUNT
image_width: int = env.IMAGE_WIDTH
image_height: int = env.IMAGE_HEIGHT
Expand All @@ -44,87 +44,37 @@ class PredictConfig(NamedTuple):
def main():
"""
Entry point of the script for using a trained model for predicting videos.
i.e: python predict.py -w data/res/2018-09-26-02-30/lipnet_065_1.96.hdf5 -v data/dataset_eval
"""

print(
r"""
__ __ ______ __ __ ______ ______
/\ \ /\ \ /\ == \ /\ "-.\ \ /\ ___\ /\__ _\
\ \ \____ \ \ \ \ \ _-/ \ \ \-. \ \ \ __\ \/_/\ \/
\ \_____\ \ \_\ \ \_\ \ \_\\"\_\ \ \_____\ \ \_\
\/_____/ \/_/ \/_/ \/_/ \/_/ \/_____/ \/_/
implemented by Omar Salinas
"""
)

ap = argparse.ArgumentParser()
model = MODEL_PATH
video = VIDEO_PATH

ap.add_argument(
"-v",
"--video-path",
required=True,
help="Path to video file or batch directory to analize",
)
ap.add_argument(
"-w", "--weights-path", required=True, help="Path to .hdf5 trained weights file"
)

default_predictor = os.path.join(
__file__, "..", "data", "predictors", "shape_predictor_68_face_landmarks.dat"
)
ap.add_argument(
"-pp",
"--predictor-path",
required=False,
help="(Optional) Path to the predictor .dat file",
default=default_predictor,
)

args = vars(ap.parse_args())

weights = Path(os.path.realpath(args["weights_path"]))
video = Path(os.path.realpath(args["video_path"]))
predictor_path = Path(os.path.realpath(args["predictor_path"]))

if not weights.is_file() or weights.suffix != ".hdf5":
print(Fore.RED + "\nERROR: Trained weights path is not a valid file")
if not model.is_file() or model.suffix != ".h5":
logger.error("\nERROR: Trained weights path is not a valid file")
return

if not video.is_file() and not video.is_dir():
print(
Fore.RED + "\nERROR: Path does not point to a video file nor to a directory"
)
return

if not predictor_path.is_file() or predictor_path.suffix != ".dat":
print(Fore.RED + "\nERROR: Predictor path is not a valid file")
logger.error("\nERROR: Path does not point to a video file nor to a directory")
return

config = PredictConfig(weights, video, predictor_path)
config = PredictConfig(model, video)
predict(config)


def predict(config: PredictConfig):
print("\nPREDICTION\n")
logger.info("Loading weights at: {}".format(config.model))

print("Loading weights at: {}".format(config.weights))
print("Using predictor at: {}".format(config.predictor_path))
logger.info("\nMaking predictions...\n")

print("\nMaking predictions...\n")

lipnet = (
LipNet(
config.frame_count,
config.image_channels,
config.image_height,
config.image_width,
config.max_string,
)
.compile_model()
.load_weights(config.weights)
lipnet = LipNet(
config.frame_count,
config.image_channels,
config.image_height,
config.image_width,
config.max_string,
)
lipnet.compile()
lipnet.load_weights(config.model)

valid_paths = []
input_lengths = []
Expand All @@ -133,9 +83,7 @@ def predict(config: PredictConfig):
elapsed_videos = 0
video_paths = get_list_of_videos(config.video_path)

for paths, lengths, y_pred in predict_batches(
lipnet, video_paths, config.predictor_path
):
for paths, lengths, y_pred in predict_batches(lipnet, video_paths):
valid_paths += paths
input_lengths += lengths

Expand All @@ -146,7 +94,7 @@ def predict(config: PredictConfig):
y_pred_len = len(y_pred)
elapsed_videos += y_pred_len

print(
logger.info(
"Predicted batch of {} videos\t({} elapsed)".format(
y_pred_len, elapsed_videos
)
Expand All @@ -155,14 +103,12 @@ def predict(config: PredictConfig):
decoder = create_decoder(DICTIONARY_PATH)
results = decode_predictions(predictions, input_lengths, decoder)

print("\n\nRESULTS:\n")
logger.info("\n\nRESULTS:\n")

display = query_yes_no("List all prediction outputs?", True)
visualize = query_yes_no("Visualize as video captions?", False)

print()

save_csv = query_yes_no("Save prediction outputs to a .csv file?", True)
save_csv = query_yes_no("Save prediction outputs to a .csv file?", False)

if save_csv:
csv_path = query_save_csv_path()
Expand All @@ -174,10 +120,10 @@ def predict(config: PredictConfig):

def get_list_of_videos(path: os.PathLike) -> List[str]:
if path.is_file():
print("Predicting for video at: {}".format(path))
logger.info("Predicting for video at: {}".format(path))
video_paths = [path]
else:
print("Predicting batch at: {}".format(path))
logger.info("Predicting batch at: {}".format(path))
video_paths = get_video_files_in_dir(path)

return video_paths
Expand All @@ -187,9 +133,9 @@ def get_video_files_in_dir(path: str) -> List[str]:
return [f for ext in ["*.mpg", "*.npy"] for f in path.glob(ext)]


def get_video_data(path: str, detector, predictor) -> np.ndarray:
def get_video_data(path: str, converter) -> np.ndarray:
if path.suffix == ".mpg":
data = extract_video_data(path, detector, predictor, False)
data = converter.extract_video_data(path)
return reshape_and_normalize_video_data(data) if data is not None else None
else:
return get_video_data_from_file(path)
Expand All @@ -202,14 +148,13 @@ def get_entire_video_data(path: str) -> np.ndarray:
return get_video_data_from_file(path)


def predict_batches(lipnet: LipNet, video_paths: List[str], predictor_path: str):
def predict_batches(lipnet: LipNet, video_paths: List[str]):
batch_size = env.BATCH_SIZE

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
converter = Converter(logger)

for paths in chunks(video_paths, batch_size):
input_data = [(p, get_video_data(p, detector, predictor)) for p in paths]
input_data = [(p, get_video_data(p, converter)) for p in paths]
input_data = [x for x in input_data if x[1] is not None]

if len(input_data) <= 0:
Expand Down Expand Up @@ -256,7 +201,7 @@ def display_results(

for p, r in zip(valid_paths, results):
if display:
print("\nVideo: {}\n Result: {}".format(p, r))
logger.info("\nVideo: {}\n Result: {}".format(p, r))

if visualize:
v = get_entire_video_data(p)
Expand Down
2 changes: 1 addition & 1 deletion preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def manager():
logger.error(f"Group {i} is not a directory")
groups.remove(i)
continue

with Pool(processes=None) as pool:
res = [
pool.apply_async(
Expand Down
9 changes: 4 additions & 5 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ def main():

timestamp = datetime.utcnow().timestamp()

config = TrainingConfig(
dataset_path, aligns_path
)
config = TrainingConfig(dataset_path, aligns_path)

train(timestamp, config)

Expand Down Expand Up @@ -90,7 +88,8 @@ def train(timestamp: float, config: TrainingConfig):
config.image_height,
config.image_width,
config.max_string,
).compile_model()
)
lipnet.compile()

datagen = DatasetGenerator(
config.dataset_path,
Expand All @@ -107,7 +106,7 @@ def train(timestamp: float, config: TrainingConfig):

start_time = time.time()

lipnet.model.fit(
lipnet.fit(
x=datagen.train_generator,
validation_data=datagen.val_generator,
epochs=config.epochs,
Expand Down

0 comments on commit 201048e

Please sign in to comment.