From ccd7a34d3d2808cfa1a10f724521f4113e717e61 Mon Sep 17 00:00:00 2001 From: squishyhuman Date: Sat, 30 Sep 2023 06:54:01 -0700 Subject: [PATCH] Import code for optical flow in the browser --- .github/workflows/build.yml | 3 + .github/workflows/clang-format-check.yml | 18 + .gitignore | 9 +- lib/CMakeLists.txt | 174 ++++++++++ lib/api/imgproc.cpp | 23 ++ lib/api/imgproc.hpp | 70 ++++ lib/api/reconstruction.cpp | 15 + lib/api/reconstruction.hpp | 45 +++ lib/api/scene.cpp | 17 + lib/api/scene.hpp | 51 +++ lib/api/video.cpp | 88 +++++ lib/api/video.hpp | 54 +++ lib/build-ci.sh | 67 ++++ lib/kernels/cpu/cpu_imgproc.cpp | 35 ++ lib/kernels/cpu/cpu_imgproc.hpp | 14 + lib/kernels/cpu/cpu_reconstruction.cpp | 50 +++ lib/kernels/cpu/cpu_reconstruction.hpp | 14 + lib/kernels/cpu/cpu_scene.cpp | 34 ++ lib/kernels/cpu/cpu_scene.hpp | 14 + lib/kernels/cpu/cpu_video.cpp | 34 ++ lib/kernels/cpu/cpu_video.hpp | 14 + lib/motion_tracker/motion_tracker.cpp | 317 ++++++++++++++++++ lib/motion_tracker/motion_tracker.hpp | 154 +++++++++ .../motion_tracker_embinder.cpp | 35 ++ lib/motion_tracker/vision_graph.cpp | 221 ++++++++++++ lib/motion_tracker/vision_graph.hpp | 76 +++++ lib/utils/emscripten_utils.cpp | 19 ++ lib/utils/emscripten_utils.hpp | 34 ++ lib/utils/frame_pool.cpp | 45 +++ lib/utils/frame_pool.hpp | 64 ++++ lib/utils/image_utils.cpp | 69 ++++ lib/utils/image_utils.hpp | 101 ++++++ lib/utils/math_utils.cpp | 17 + lib/utils/math_utils.hpp | 23 ++ 34 files changed, 2017 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/clang-format-check.yml create mode 100644 lib/CMakeLists.txt create mode 100644 lib/api/imgproc.cpp create mode 100644 lib/api/imgproc.hpp create mode 100644 lib/api/reconstruction.cpp create mode 100644 lib/api/reconstruction.hpp create mode 100644 lib/api/scene.cpp create mode 100644 lib/api/scene.hpp create mode 100644 lib/api/video.cpp create mode 100644 lib/api/video.hpp create mode 100755 lib/build-ci.sh create mode 100644 lib/kernels/cpu/cpu_imgproc.cpp create mode 100644 lib/kernels/cpu/cpu_imgproc.hpp create mode 100644 lib/kernels/cpu/cpu_reconstruction.cpp create mode 100644 lib/kernels/cpu/cpu_reconstruction.hpp create mode 100644 lib/kernels/cpu/cpu_scene.cpp create mode 100644 lib/kernels/cpu/cpu_scene.hpp create mode 100644 lib/kernels/cpu/cpu_video.cpp create mode 100644 lib/kernels/cpu/cpu_video.hpp create mode 100644 lib/motion_tracker/motion_tracker.cpp create mode 100644 lib/motion_tracker/motion_tracker.hpp create mode 100644 lib/motion_tracker/motion_tracker_embinder.cpp create mode 100644 lib/motion_tracker/vision_graph.cpp create mode 100644 lib/motion_tracker/vision_graph.hpp create mode 100644 lib/utils/emscripten_utils.cpp create mode 100644 lib/utils/emscripten_utils.hpp create mode 100644 lib/utils/frame_pool.cpp create mode 100644 lib/utils/frame_pool.hpp create mode 100644 lib/utils/image_utils.cpp create mode 100644 lib/utils/image_utils.hpp create mode 100644 lib/utils/math_utils.cpp create mode 100644 lib/utils/math_utils.hpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 50a1b0d0b..f76b3937c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -60,3 +60,6 @@ jobs: - name: Build depends if: steps.restore-depends.outputs.cache-hit != 'true' run: tools/build-depends.sh all + + - name: Build libraries + run: lib/build-ci.sh diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml new file mode 100644 index 000000000..7e69bd65e --- /dev/null +++ b/.github/workflows/clang-format-check.yml @@ -0,0 +1,18 @@ +name: clang-format Check + +on: [push, pull_request] + +jobs: + formatting-check: + name: Formatting Check + runs-on: ubuntu-22.04 + defaults: + run: + working-directory: onchain + steps: + - uses: actions/checkout@v3 + - name: Run clang-format style check for C/C++/Protobuf programs + uses: jidicula/clang-format-action@v4 + with: + clang-format-version: '15' + check-path: 'lib' diff --git a/.gitignore b/.gitignore index 96fc2d984..3d0bee259 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Generated dependencies /src/generated +# Generated libraries +/frontend/public/motion_tracker + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -21,7 +24,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ lib64/ parts/ sdist/ @@ -143,3 +145,8 @@ Temporary # libzip build files /third-party/libzip + +# Eclipse project files +.cproject +.project +.settings diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 000000000..3bab98918 --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,174 @@ +################################################################################ +# +# Copyright (C) 2020-2023 retro.ai +# This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp +# +# SPDX-License-Identifier: Apache-2.0 +# See the file LICENSE.md for more information. +# +################################################################################ + +################################################################################ +# +# Build system for C++ libraries +# +# Required CMake variables: +# +# CMAKE_FIND_ROOT_PATH - Point this to dependencies compiled with Emscripten +# CMAKE_INSTALL_PREFIX - Point this to the "public" folder +# +################################################################################ + +################################################################################ +# +# Project settings +# +################################################################################ + +project(retroai) + +cmake_minimum_required(VERSION 3.0.0) + +set(CMAKE_CXX_STANDARD 17) + +################################################################################ +# +# Dependencies +# +################################################################################ + +find_package(Ceres REQUIRED) +find_package(Eigen3 REQUIRED) +find_package(Glog REQUIRED) +find_package(OpenCV REQUIRED) + +add_definitions(-DCERES_FOUND=1) + +################################################################################ +# +# Define sources +# +################################################################################ + +# +# Motion tracker +# + +set(MOTION_TRACKER_SOURCES + api/imgproc.cpp + api/reconstruction.cpp + api/scene.cpp + api/video.cpp + kernels/cpu/cpu_imgproc.cpp + kernels/cpu/cpu_reconstruction.cpp + kernels/cpu/cpu_scene.cpp + kernels/cpu/cpu_video.cpp + motion_tracker/motion_tracker.cpp + motion_tracker/motion_tracker_embinder.cpp + motion_tracker/vision_graph.cpp + utils/emscripten_utils.cpp + utils/frame_pool.cpp + utils/image_utils.cpp + utils/math_utils.cpp +) + +################################################################################ +# +# Build libraries +# +# TODO: +# +# * Build properly instead of shelling out +# * Could refactor this into macros +# +################################################################################ + +include_directories( + ${CMAKE_SOURCE_DIR} +) + +string(APPEND EMSCRIPTEN_LINK_FLAGS + "--bind " + # "-o dist/engine.js " + # " -std=c++11 " + # " -O2 " + # " --preload-file textures " + # " --preload-file shaders " + # " --preload-file fonts " + # " --pre-js pre-module.j " + # " --post-js post-module.j " + "-s ALLOW_MEMORY_GROWTH=1 " + "-s ASSERTIONS=1 " + # " -s DEMANGLE_SUPPORT=1 " + # " -s DISABLE_EXCEPTION_CATCHING=0 " + "-s ERROR_ON_UNDEFINED_SYMBOLS=0 " + # " -s FULL_ES3=1 " + # " -s GL_ASSERTIONS=1 " + # " -s GL_UNSAFE_OPTS=0 " + # " -s INVOKE_RUN=0 " + # " -s LEGACY_GL_EMULATION=0 " + #"-s LLD_REPORT_UNDEFINED " + # " -s OFFSCREENCANVAS_SUPPORT=1 " + # " -s SAFE_HEAP=1 " + #"-s TOTAL_MEMORY=67108864 " + # " -s USE_FREETYPE=1 " + # " -s USE_GLFW=3 " + # " -s USE_WEBGL2=1 " + "-s USE_ZLIB=1 " + # " -s WASM=1 " +) + +# +# Motion tracker +# + +add_executable(motion_tracker + ${MOTION_TRACKER_SOURCES} +) + +target_include_directories(motion_tracker PRIVATE + ${OpenCV_INCLUDE_DIRS} +) + +target_link_libraries(motion_tracker PRIVATE + ${OpenCV_LIBS} +) + +if (${CMAKE_SYSTEM_NAME} MATCHES "Emscripten") + set_target_properties(motion_tracker PROPERTIES + COMPILE_FLAGS " \ + -O0 \ + -g4 \ + -s DISABLE_EXCEPTION_CATCHING=0 \ + -s INITIAL_MEMORY=26214400 \ + " + # 26214400 is 25 MiB + LINK_FLAGS " \ + --bind \ + --source-map-base https://retro.ai/ \ + -O0 \ + -g4 \ + -s DISABLE_EXCEPTION_CATCHING=0 \ + -s INITIAL_MEMORY=26214400 \ + " + ) +endif () + +################################################################################ +# +# Install libraries +# +################################################################################ + +# +# Motion tracker +# + +INSTALL( + FILES + "${CMAKE_BINARY_DIR}/motion_tracker.js" + "${CMAKE_BINARY_DIR}/motion_tracker.wasm" + "${CMAKE_BINARY_DIR}/motion_tracker.wasm.map" + DESTINATION + motion_tracker +) diff --git a/lib/api/imgproc.cpp b/lib/api/imgproc.cpp new file mode 100644 index 000000000..7d2908ec6 --- /dev/null +++ b/lib/api/imgproc.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "imgproc.hpp" + +#include + +cv::GMat imgproc::RGBA2Gray(const cv::GMat &rgbaImage) { + return cv::gapi::RGBA2Gray(rgbaImage); +} + +cv::GArray imgproc::GoodFeaturesToTrack( + const cv::GMat &grayscaleImage, const cv::GScalar &maxFeatures, + const cv::GScalar &minDistance, double qualityLevel, const cv::Mat &mask, + int blockSize, bool useHarrisDetector, double k) { + return GGoodFeatures::on(grayscaleImage, maxFeatures, qualityLevel, + minDistance, mask, blockSize, useHarrisDetector, k); +} diff --git a/lib/api/imgproc.hpp b/lib/api/imgproc.hpp new file mode 100644 index 000000000..b6910903d --- /dev/null +++ b/lib/api/imgproc.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of trajectory-reconstruction - https: *github.com/eigendude/trajectory-reconstruction + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include + +namespace imgproc +{ + G_TYPED_KERNEL(GGoodFeatures, + (cv::GMat, cv::GScalar, double, cv::GScalar, cv::Mat, int, bool, double)>, + "com.trajectoryReconstruction.imgproc.goodFeaturesToTrack") { + static cv::GArrayDesc outMeta(cv::GMatDesc, cv::GScalarDesc, double, cv::GScalarDesc, const cv::Mat&, int, bool, double) { + return cv::empty_array_desc(); + } + }; + + /*! + * \brief Convert RGBA image to grayscale + * + * \param rgbaImage The 4-channel RGBA image + * + * \return The single-channel grayscale image + */ + cv::GMat RGBA2Gray(const cv::GMat& rgbaImage); + + /*! + * \brief Get some good features to track + * + * \param grayscaleImage The single-channel grayscale image + * + * \param maxCorners Maximum number of corners to return. + * If there are more corners than are found, the strongest of them is + * returned. + * + * \param qualityLevel Minimal accepted quality of image corners. + * This parameter characterizes the minimal accepted quality of + * corners. + * + * The parameter value is multiplied by the best corner quality measure, + * which is the minimal eigenvalue or the Harris function response. + * + * The corners with the quality measure less than the product are rejected. + * + * For example, if the best corner has the quality measure = 1500, and the + * qualityLevel = 0.01, then all the corners with the quality measure less + * than 15 are rejected. + * + * \param minDistance Minimum possible Euclidean distance between the + * returned corners + * + * \return A list of good features to track + */ + cv::GArray GoodFeaturesToTrack(const cv::GMat& grayscaleImage, + const cv::GScalar& maxFeatures, + const cv::GScalar& minDistance, + double qualityLevel = 0.01, + const cv::Mat& mask = cv::Mat(), + int blockSize = 3, + bool useHarrisDetector = false, + double k = 0.04); +} diff --git a/lib/api/reconstruction.cpp b/lib/api/reconstruction.cpp new file mode 100644 index 000000000..736309f9f --- /dev/null +++ b/lib/api/reconstruction.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "reconstruction.hpp" + +reconstruction::TplTrajectory reconstruction::ReconstructTrajectory( + const cv::GArray> &pointHistory, + const cv::GMat &cameraMatrix) { + return GReconstructTrajectory::on(pointHistory, cameraMatrix); +} diff --git a/lib/api/reconstruction.hpp b/lib/api/reconstruction.hpp new file mode 100644 index 000000000..6b1ebf9d6 --- /dev/null +++ b/lib/api/reconstruction.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include +#include // std::tuple +#include + +namespace reconstruction +{ + using TplTrajectory = std::tuple; + using TplTrajectoryDesc = std::tuple; + + G_TYPED_KERNEL(GReconstructTrajectory, >, cv::GMat)>, + "com.trajectoryReconstruction.reconstructTrajectory") + { + static TplTrajectoryDesc outMeta(cv::GArrayDesc pointHistory, cv::GMatDesc cameraDesc) + { + return std::make_tuple(cv::empty_gmat_desc(), cameraDesc); + } + }; + + /*! + * Reconstruct the trajectory using 2d point correspondences + * + * \param pointHistory Input vector of vectors of 2d points (the inner vector is per image) + * \param cameraMatrix Input camera matrix used as initial guess + * + * \return Tuple consisting of: + * * Output vector with the 3x4 projections matrices of each image + * * Output array with estimated 3d points + * * Output camera matrix + */ + TplTrajectory ReconstructTrajectory(const cv::GArray>& pointHistory, + const cv::GMat& cameraMatrix); +} diff --git a/lib/api/scene.cpp b/lib/api/scene.cpp new file mode 100644 index 000000000..d911d6de1 --- /dev/null +++ b/lib/api/scene.cpp @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "scene.hpp" + +scene::TplDoubles scene::CalcSceneScore(const cv::GMat &prevImg, + const cv::GMat &nextImg, + const cv::GOpaque &prevMafd, + unsigned int width, + unsigned int height) { + return GCalcSceneScore::on(prevImg, nextImg, prevMafd, width, height); +} diff --git a/lib/api/scene.hpp b/lib/api/scene.hpp new file mode 100644 index 000000000..8491c949e --- /dev/null +++ b/lib/api/scene.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include // std::tuple + +namespace scene +{ + using TplDoubles = std::tuple, cv::GOpaque>; + using TplDoublesDesc = std::tuple; + + G_TYPED_KERNEL(GCalcSceneScore, , unsigned int, unsigned int)>, + "com.trajectoryReconstruction.calcSceneScore") + { + static TplDoublesDesc outMeta(cv::GMatDesc, + cv::GMatDesc, + cv::GOpaqueDesc, + unsigned int width, + unsigned int height) + { + return std::make_tuple(cv::empty_gopaque_desc(), cv::empty_gopaque_desc()); + } + }; + + /*! + * \brief Calculate a score indicating whether a scene change has taken place + * + * \param prevImg The previous frame + * \param nextImg The following frame + * \param prevMafd The previous mean absolute frame difference (MAFD) + * \param width The width of a frame + * \param height The height of a frame + * + * \return Two doubles - the MAFD of the following frame, and its scene score + */ + TplDoubles CalcSceneScore(const cv::GMat& prevImg, + const cv::GMat& nextImg, + const cv::GOpaque& prevMafd, + unsigned int width, + unsigned int height); +} diff --git a/lib/api/video.cpp b/lib/api/video.cpp new file mode 100644 index 000000000..b000dd5d0 --- /dev/null +++ b/lib/api/video.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "video.hpp" + +#include + +cv::GArray +video::PredictPoints(const cv::GArray> &pointHistory) { + return GPredictPoints::on(pointHistory); +} + +cv::gapi::video::GOptFlowLKOutput +video::CalcOpticalFlow(const cv::GMat &prevImg, const cv::GMat &nextImg, + const cv::GArray &prevPts, + const cv::GArray &predPts) { + // TODO: Move parameters out of API + + // Window size of optical flow algorithm used to calculate required padding + // for pyramic levels. + // + // Must be no less than winSize argument of calcOpticalFlowPyrLK(). + // const cv::Size winSize = cv::Size(11, 11); + const cv::Size winSize = cv::Size(21, 21); + + // 0-based maximal pyramid level number. + // + // According to Bouguet, 2001, practical values the height of the pyramid + // (picked heuristically) are 2, 3, 4. + // + // If set to 0, pyramids are not used (single level). If set to 1, two + // levels are used, and so on. + // + // The LK algorithm will use as many levels as pyramids, but no more than + // maxLevel. + const cv::GScalar &maxLevel = 3; + + // Parameter specifying the termination criteria of the iterative search + // algorithm. + // + // The algorithm terminates after the specified maximum number of + // iterations or when the search window moves by less than the epsilon. + const cv::TermCriteria criteria = cv::TermCriteria( + // The maximum number of iterations or elements to compute + cv::TermCriteria::COUNT | + // The desired accuracy or change in parameters at which the iterative + // algorithm stops + cv::TermCriteria::EPS, + // Max number + 30, + // Epsilon + 0.01); + + const int flags = + 0 | + // Uses initial estimations, stored in nextPts; if the flag is + // not set, then prevPts is copied to nextPts and is considered the + // initial estimate. + cv::OPTFLOW_USE_INITIAL_FLOW | + // For the error, use the L1 distance between patches around the original + // and moved point, divided by number of pixels in a window. + // + // Alternatively, set the flag to cv::OPTFLOW_LK_GET_MIN_EIGENVALS to + // use minimum eigen values as an error measure (see minEigThreshold + // description). + //; + 0; + + // The algorithm calculates the minimum eigen value of a 2x2 normal matrix + // of optical flow equations, divided by number of pixels in a window. + // + // If this value is less than minEigThreshold, then a corresponding feature + // is filtered out and its flow is not processed, so it allows to remove + // bad points and get a performance boost. + // + // The 2x2 normal matrix of optical flow equations is called a spatial + // gradient matrix in @cite Bouguet00) + const double minEigThresh = 1e-4; + + return cv::gapi::calcOpticalFlowPyrLK(prevImg, nextImg, prevPts, predPts, + winSize, maxLevel, criteria, flags, + minEigThresh); +} diff --git a/lib/api/video.hpp b/lib/api/video.hpp new file mode 100644 index 000000000..3d49a7af0 --- /dev/null +++ b/lib/api/video.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace video +{ + G_TYPED_KERNEL(GPredictPoints, (cv::GArray>)>, + "com.trajectoryReconstruction.predictPoints") + { + static cv::GArrayDesc outMeta(cv::GArrayDesc in) + { + return cv::empty_array_desc(); + } + }; + + /*! + * \brief Predict the results of an optical flow calculation given the previous points + * + * \param prevPoints The points of the previous frame + * + * \return The predicted points in the next frame + */ + cv::GArray PredictPoints(const cv::GArray>& prevPoints); + + /*! + * \brief Create a graph node that calucates optical flow + * + * @param prevImg First 8-bit input image + * @param nextImg Second input image of the same size and the same type as prevImg + * @param prevPts Vector of 2D points for which the flow needs to be found + * @param predPts Points containing the predicted new positions of input features in the second image + * + * @note When OPTFLOW_USE_INITIAL_FLOW flag is passed, the prediction vector + * must have the same size as in the input + * + * @return G-API optical flow output + */ + cv::gapi::video::GOptFlowLKOutput CalcOpticalFlow(const cv::GMat& prevImg, + const cv::GMat& nextImg, + const cv::GArray& prevPts, + const cv::GArray& predPts); +} diff --git a/lib/build-ci.sh b/lib/build-ci.sh new file mode 100755 index 000000000..4219ffc4d --- /dev/null +++ b/lib/build-ci.sh @@ -0,0 +1,67 @@ +#!/bin/bash +################################################################################ +# +# Copyright (C) 2000-2023 retro.ai +# This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp +# +# SPDX-License-Identifier: Apache-2.0 +# See the file LICENSE.md for more information. +# +################################################################################ + +################################################################################ +# +# Helper for CI infrastructure. Sets the appropriate paths and calls CMake. +# +################################################################################ + +# Enable strict shell mode +set -o errexit +set -o nounset +set -o pipefail + +# +# Environment paths +# + +# Get the absolute path to this script +SOURCE_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Directory of the depends build system +TOOL_DIRECTORY="${SOURCE_DIRECTORY}/../tools" + +# Directory for intermediate build files +BUILD_DIRECTORY="${TOOL_DIRECTORY}/build/cpp-libs" + +# Directory of the Emscripten SDK +EMSDK_DIRECTORY="${TOOL_DIRECTORY}/repos/emsdk" + +# Directory of the installed dependency files +DEPENDS_DIRECTORY="${TOOL_DIRECTORY}/dist" + +# Directory to place the generated libraries +INSTALL_DIRECTORY="${SOURCE_DIRECTORY}/../frontend/public" + +# Ensure directories exist +mkdir -p "${BUILD_DIRECTORY}" +mkdir -p "${INSTALL_DIRECTORY}" + +# +# Setup environment +# + +source "${EMSDK_DIRECTORY}/emsdk_set_env.sh" + +# +# Call CMake +# + +cd "${BUILD_DIRECTORY}" + +emcmake cmake \ + "${SOURCE_DIRECTORY}" \ + -DCMAKE_FIND_ROOT_PATH="${DEPENDS_DIRECTORY}" \ + -DCMAKE_INSTALL_PREFIX="${INSTALL_DIRECTORY}" \ + $(! command -v ccache &> /dev/null || echo "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache") \ + +cmake --build "${BUILD_DIRECTORY}" -j$(shell getconf _NPROCESSORS_ONLN) --target install diff --git a/lib/kernels/cpu/cpu_imgproc.cpp b/lib/kernels/cpu/cpu_imgproc.cpp new file mode 100644 index 000000000..d07110b95 --- /dev/null +++ b/lib/kernels/cpu/cpu_imgproc.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "cpu_imgproc.hpp" + +#include "api/imgproc.hpp" + +#include +#include + +namespace imgproc { +// Find good features +GAPI_OCV_KERNEL(GCPUGoodFeatures, GGoodFeatures){ + static void run(const cv::Mat &image, const cv::Scalar &maxCorners, + double qualityLevel, const cv::Scalar &minDistance, + const cv::Mat &mask, int blockSize, bool useHarrisDetector, + double k, std::vector &out){ + cv::goodFeaturesToTrack(image, out, static_cast(maxCorners[0]), + qualityLevel, minDistance[0], mask, blockSize, + useHarrisDetector, k); +} // namespace imgproc +} +; +} + +cv::gapi::GKernelPackage imgproc::kernels() { + static auto pkg = cv::gapi::kernels(); + + return pkg; +} diff --git a/lib/kernels/cpu/cpu_imgproc.hpp b/lib/kernels/cpu/cpu_imgproc.hpp new file mode 100644 index 000000000..a6637abca --- /dev/null +++ b/lib/kernels/cpu/cpu_imgproc.hpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include + +namespace imgproc +{ + cv::gapi::GKernelPackage kernels(); +} diff --git a/lib/kernels/cpu/cpu_reconstruction.cpp b/lib/kernels/cpu/cpu_reconstruction.cpp new file mode 100644 index 000000000..9f93b0b3e --- /dev/null +++ b/lib/kernels/cpu/cpu_reconstruction.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "cpu_reconstruction.hpp" + +#include "api/reconstruction.hpp" + +#include +#include + +namespace reconstruction { +// Reconstruct trajectory +GAPI_OCV_KERNEL(GCPUReconstructTrajectory, GReconstructTrajectory){ + static void run(const std::vector> &pointHistory, + const cv::Mat &initialCameraMatrix, + cv::Mat &projectionMatrix, cv::Mat &updatedCameraMatrix){ + // If true, the cameras is supposed to be projective + const bool isProjective = true; + +// Initialize the updated camera matrix +updatedCameraMatrix = initialCameraMatrix; + +// Reconstruct the scene using the 2d correspondences +std::vector projections; + +// Unused (we would have to project these back to 2D image space for them +// to be useful) +std::vector estimated3dPoints; + +// Perform reconstruction +cv::sfm::reconstruct(pointHistory, projections, estimated3dPoints, + updatedCameraMatrix, isProjective); + +// We are interested in the most recent projection +projectionMatrix = projections.back(); +} // namespace reconstruction +} +; +} + +cv::gapi::GKernelPackage reconstruction::kernels() { + static auto pkg = cv::gapi::kernels(); + + return pkg; +} diff --git a/lib/kernels/cpu/cpu_reconstruction.hpp b/lib/kernels/cpu/cpu_reconstruction.hpp new file mode 100644 index 000000000..ef55ff000 --- /dev/null +++ b/lib/kernels/cpu/cpu_reconstruction.hpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include + +namespace reconstruction +{ + cv::gapi::GKernelPackage kernels(); +} diff --git a/lib/kernels/cpu/cpu_scene.cpp b/lib/kernels/cpu/cpu_scene.cpp new file mode 100644 index 000000000..c133b6987 --- /dev/null +++ b/lib/kernels/cpu/cpu_scene.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "cpu_scene.hpp" + +#include "api/scene.hpp" +#include "utils/image_utils.hpp" + +#include + +namespace scene { +// Calculate the scene score given a frame and its previous frame +GAPI_OCV_KERNEL(GCPUCalcSceneScore, GCalcSceneScore){ + static void run(const cv::Mat &prevImg, const cv::Mat &nextImg, + double prevMafd, unsigned int width, unsigned int height, + double &nextMafd, double &sceneScore){ + nextMafd = ImageUtils::CalcSceneMAFD(prevImg.data, nextImg.data, width, + height); +sceneScore = ImageUtils::CalcSceneScore(prevMafd, nextMafd); +} // namespace scene +} +; +} + +cv::gapi::GKernelPackage scene::kernels() { + static auto pkg = cv::gapi::kernels(); + + return pkg; +} diff --git a/lib/kernels/cpu/cpu_scene.hpp b/lib/kernels/cpu/cpu_scene.hpp new file mode 100644 index 000000000..28da8d0d6 --- /dev/null +++ b/lib/kernels/cpu/cpu_scene.hpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include + +namespace scene +{ + cv::gapi::GKernelPackage kernels(); +} diff --git a/lib/kernels/cpu/cpu_video.cpp b/lib/kernels/cpu/cpu_video.cpp new file mode 100644 index 000000000..72ebea812 --- /dev/null +++ b/lib/kernels/cpu/cpu_video.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "cpu_video.hpp" + +#include "api/video.hpp" + +#include +#include + +namespace video { +// Predict points for optical flow +GAPI_OCV_KERNEL(GCPUPredictPoints, GPredictPoints){ + static void run(const std::vector> &pointHistory, + std::vector &predictedPoints){ + predictedPoints.resize(pointHistory[0].size()); + +// TODO +predictedPoints = pointHistory.back(); +} // namespace video +} +; +} + +cv::gapi::GKernelPackage video::kernels() { + static auto pkg = cv::gapi::kernels(); + + return pkg; +} diff --git a/lib/kernels/cpu/cpu_video.hpp b/lib/kernels/cpu/cpu_video.hpp new file mode 100644 index 000000000..73ce4d034 --- /dev/null +++ b/lib/kernels/cpu/cpu_video.hpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include + +namespace video +{ + cv::gapi::GKernelPackage kernels(); +} diff --git a/lib/motion_tracker/motion_tracker.cpp b/lib/motion_tracker/motion_tracker.cpp new file mode 100644 index 000000000..9fcf481e6 --- /dev/null +++ b/lib/motion_tracker/motion_tracker.cpp @@ -0,0 +1,317 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "motion_tracker.hpp" +#include "vision_graph.hpp" + +#include "utils/emscripten_utils.hpp" +#include "utils/math_utils.hpp" + +#include +#include +#include +#include + +// Scene score from libav's "select" filter. You may have seen filters that +// look like: +// +// select='gt(scene,0.4)' +// +// Typical values suggested online are 0.3 and 0.4, but this can miss scene +// changes in dark videos. +// +// In testing drone footage, scene changes can dip below 0.2. False positives +// typically occur below 0.15 +// +// We care more about false negatives than false positives, so let's go with +// 0.15. +constexpr double SCENE_THREASHOLD = 0.15; + +// Minimum number of points to force redetection +constexpr unsigned int MIN_POINT_COUNT = 5; + +bool MotionTracker::Initialize(int width, int height) { + if (width <= 0 || height <= 0) + return false; + + // Initialize video parameters + m_width = width; + m_height = height; + + // Initialize buffers + m_rgbaFrameBuffer.create(m_height, m_width, CV_8UC4); + m_currentGrayscaleBuffer.create(m_height, m_width, CV_8UC1); + m_previousGrayscale.create(m_height, m_width, CV_8UC1); + + // Focal length of the camera + const double f = 1082.77717353143; // TODO + + // Principle point of the camera + cv::Point2d pp(static_cast(m_width), + static_cast(m_height)); // TODO + + // Initialize camera calibration matrix with sensible values + m_cameraMatrix = + (cv::Mat_(3, 3) << f, 0, pp.x, 0, f, pp.y, 0, 0, 1.0); + + m_visionGraph.reset(new VisionGraph); + m_visionGraph->Compile(width, height, m_rgbaFrameBuffer, + m_currentGrayscaleBuffer, m_previousGrayscale); + + m_framePool.reset(new FramePool); + + return true; +} + +void MotionTracker::SetConfig(const ConfigOptions &config) { + m_config = config; +} + +FrameInfo MotionTracker::AddVideoFrame(const emscripten::val &frameArray) { + // Dereference buffers + cv::Mat &rgbaFrame = m_rgbaFrameBuffer; + cv::Mat ¤tGrayscale = m_currentGrayscaleBuffer; + + // Get a frame to gather our results + FramePtr currentFrame = m_framePool->GetFrame(); + + // Fetch array from JavaScript + // TODO: Elide copy + ReadArray(frameArray, rgbaFrame.data); + + // Convert to grayscale + ConvertToGrayscale(rgbaFrame, currentGrayscale); + + // TODO: Check for full black image + + // Get the scene score by comparing the last two frames + GetScreenScoreSAD(currentGrayscale, currentFrame->mafd, + currentFrame->sceneScore); + + // Reset frame history when a scene change is detected + if (currentFrame->sceneScore >= SCENE_THREASHOLD) { + std::cout << "Scene change detected (score: " << currentFrame->sceneScore + << ")" << std::endl; + m_frameHistory.clear(); + } + + // TODO + if (m_frameHistory.size() > m_config.maxFrameCount) { + m_frameHistory.clear(); + } + + if (m_frameHistory.empty()) { + FindFeatures(currentGrayscale, currentFrame->points, currentFrame->status, + currentFrame->errors); + } else { + // Calculate optical flow if we have a previous frame + CalculateOpticalFlow(currentGrayscale, currentFrame->points, + currentFrame->status, currentFrame->errors); + + // If feature count drops by 10% or more, consider it a scene change + const unsigned int missing = + std::count(currentFrame->status.begin(), currentFrame->status.end(), 0); + + // TODO: Better scene detection + + if (10 * missing > currentFrame->status.size()) { + std::cout << "Scene change detected (missing points: " << missing << ")" + << std::endl; + m_frameHistory.clear(); + } + + if (currentFrame->points.size() <= MIN_POINT_COUNT) { + std::cout << "Scene change detected (points count: " + << currentFrame->points.size() << ")" << std::endl; + m_frameHistory.clear(); + } + + if (m_frameHistory.empty()) + FindFeatures(currentGrayscale, currentFrame->points, currentFrame->status, + currentFrame->errors); + } + + if (!currentFrame->points.empty()) + AddFrameToHistory(std::move(currentFrame)); + + // Reconstruct trajectory + if (false) { + GetProjectionMatrix(currentFrame->projectionMatrix); + } + + // Update state + std::swap(currentGrayscale, m_previousGrayscale); + + // Create result + FrameInfo frameInfo = GetResult(); + + return frameInfo; +} + +void MotionTracker::ReadArray(const emscripten::val &frameArray, + uint8_t *data) { + // Get array size + const unsigned int dataSize = EmscriptenUtils::ArrayLength(frameArray); + + // Copy data + EmscriptenUtils::GetArrayData(frameArray, data, dataSize); +} + +void MotionTracker::ConvertToGrayscale(const cv::Mat &in, cv::Mat &out) { + m_visionGraph->ApplyGrayscale(in, out); +} + +void MotionTracker::GetScreenScoreSAD(const cv::Mat ¤tGrayscale, + double ¤tMafd, double &sceneScore) { + if (!m_frameHistory.empty()) { + // TODO: Zero-copy + m_pointHistoryBuffer.clear(); + for (const auto &frame : m_frameHistory) + m_pointHistoryBuffer.emplace_back(frame->points); + + const FramePtr &previousFrame = m_frameHistory.back(); + + double previousMafd = previousFrame->mafd; + + // Calculate scene score + m_visionGraph->CalcSceneScore(m_previousGrayscale, currentGrayscale, + previousMafd, currentMafd, sceneScore); + } else { + currentMafd = 0.0; + sceneScore = 0.0; + } +} + +void MotionTracker::FindFeatures(const cv::Mat ¤tGrayscale, + std::vector ¤tPoints, + std::vector &status, + std::vector &errors) { + // TODO + const double minDistance = + std::max(MathUtils::GeometricMean(m_width, m_height) / + (static_cast(m_config.maxPointCount) / 2.0), + 2.0); + + m_visionGraph->FindFeatures(currentGrayscale, m_config.maxPointCount, + minDistance, currentPoints); + status.assign(currentPoints.size(), 1U); + errors.assign(currentPoints.size(), 0.0f); +} + +void MotionTracker::CalculateOpticalFlow( + const cv::Mat ¤tGrayscale, std::vector ¤tPoints, + std::vector &status, std::vector &errors) { + if (!m_frameHistory.empty()) { + const std::vector &previousPoints = + m_frameHistory.back()->points; + if (!previousPoints.empty()) { + // TODO: Zero-copy + m_pointHistoryBuffer.clear(); + for (const auto &frame : m_frameHistory) + m_pointHistoryBuffer.emplace_back(frame->points); + + m_visionGraph->CalcOpticalFlow(m_previousGrayscale, currentGrayscale, + previousPoints, m_pointHistoryBuffer, + currentPoints, status, errors); + } + } +} + +void MotionTracker::AddFrameToHistory(FramePtr &&frame) { + // Check for missing points (value of "status" is 0) + std::vector missing; + for (unsigned int index = 0; index < frame->status.size(); index++) { + if (frame->status[index] == 0) + missing.push_back(index); + } + + m_frameHistory.emplace_back(std::move(frame)); + + if (!missing.empty()) { + // Prune missing points from history + for (auto &frame : m_frameHistory) { + if (frame->points.empty()) + continue; + + // This used to use lambdas, but they were causing function index + // out-of-bound errors in the browser + for (auto it = missing.end(); it != missing.begin(); --it) { + const unsigned int index = *(it - 1); + + frame->points.erase(frame->points.begin() + index); + frame->status.erase(frame->status.begin() + index); + frame->errors.erase(frame->errors.begin() + index); + } + } + } +} + +void MotionTracker::GetProjectionMatrix(cv::Mat &projectionMatrix) { + if (!m_frameHistory.empty()) { + // Make a copy of the camera matrix, it is used as an in/out parameter + cv::Mat previousCameraMatrix = m_cameraMatrix; + + // TODO: Zero-copy + m_pointHistoryBuffer.clear(); + for (const auto &frame : m_frameHistory) + m_pointHistoryBuffer.emplace_back(frame->points); + + m_visionGraph->ReconstructTrajectory(m_pointHistoryBuffer, + previousCameraMatrix, projectionMatrix, + m_cameraMatrix); + } +} + +FrameInfo MotionTracker::GetResult() const { + FrameInfo frameInfo{}; + + m_points.clear(); + m_initialPoints.clear(); + m_projectionMatrix.clear(); + + if (!m_frameHistory.empty()) { + // Grab references to pertinent frames + const FramePtr ¤tFrame = m_frameHistory.back(); + const FramePtr &initialFrame = m_frameHistory.front(); + + // Set the scene score + frameInfo.sceneScore = currentFrame->sceneScore; + + // Set current points + const std::vector ¤tPoints = currentFrame->points; + if (!currentPoints.empty()) { + m_points.reserve(currentPoints.size() * 2); + for (const cv::Point2f &point : currentPoints) { + m_points.push_back(point.x); + m_points.push_back(point.y); + } + frameInfo.pointData = reinterpret_cast(m_points.data()); + frameInfo.pointSize = m_points.size(); + } + + // Set initial points + const std::vector &initialPoints = initialFrame->points; + if (!initialPoints.empty()) { + m_initialPoints.reserve(initialPoints.size() * 2); + for (const cv::Point2f &point : initialPoints) { + m_initialPoints.push_back(point.x); + m_initialPoints.push_back(point.y); + } + frameInfo.initialPointData = + reinterpret_cast(m_initialPoints.data()); + frameInfo.initialPointSize = m_initialPoints.size(); + } + + // Set projection matrix + const cv::Mat &projectionMatrix = currentFrame->projectionMatrix; + m_projectionMatrix.resize(projectionMatrix.rows * + projectionMatrix.cols); // TODO + } + + return frameInfo; +} diff --git a/lib/motion_tracker/motion_tracker.hpp b/lib/motion_tracker/motion_tracker.hpp new file mode 100644 index 000000000..1f165d44f --- /dev/null +++ b/lib/motion_tracker/motion_tracker.hpp @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include "utils/frame_pool.hpp" + +#include +#include +#include + +namespace emscripten +{ +class val; +} + +class VisionGraph; + +struct ConfigOptions +{ + // The maximum number of points to track + unsigned int maxPointCount = 200; + + // The maximum number of frames to solve for + unsigned int maxFrameCount = 40; +}; + +struct FrameInfo +{ + double sceneScore = 0.0; // in the range [0.0, 1.0], 1.0 is a new scene + uintptr_t pointData = 0; + unsigned int pointSize = 0; + uintptr_t initialPointData = 0; + unsigned int initialPointSize = 0; + uintptr_t projectionMatrixData = 0; + unsigned int projectionMatrixSize = 0; +}; + +class MotionTracker +{ +public: + MotionTracker() = default; + ~MotionTracker() { Deinitialize(); } + + /*! + * \brief Initialize the motion tracker with the specified dimensions + * + * \param width The video width + * \param height The video height + */ + bool Initialize(int width, int height); + + void SetConfig(const ConfigOptions& config); + + /*! + * \brief Add a frame to the motion tracker and return the results + * + * \param frameArray Javascript array of type Uint8ClampedArray + * + * \return Results of analyzing the new frame + */ + FrameInfo AddVideoFrame(const emscripten::val& frameArray); + + std::vector GetPoints() const { return m_points; } + std::vector GetInitialPoints() const { return m_initialPoints; } + std::vector GetProjectionMatrix() const { return m_projectionMatrix; } + + /*! + * \brief Deinitialize the motion tracker + */ + void Deinitialize() { } + +private: + /*! + * \brief Copy frame from JavaScript memory to the heap + */ + void ReadArray(const emscripten::val& frameArray, uint8_t* data); + + /*! + * \brief Convert a 32-bit RGBA frame to 8-bit grayscale + */ + void ConvertToGrayscale(const cv::Mat& in, cv::Mat& out); + + /*! + * \brief Calculates a scene score based on the last two frames + */ + void GetScreenScoreSAD(const cv::Mat& currentGrayscale, double& currentMafd, double& sceneScore); + + /*! + * \brief Find features in a frame that will be good for tracking + */ + void FindFeatures(const cv::Mat& currentGrayscale, + std::vector& currentPoints, + std::vector& status, + std::vector& errors); + + /*! + * \brief Calculates the optical flow between the last two frames + */ + void CalculateOpticalFlow(const cv::Mat& currentGrayscale, + std::vector& currentPoints, + std::vector& status, + std::vector& errors); + + /*! + * \brief Add frame to history vector + */ + void AddFrameToHistory(FramePtr&& frame); + + /*! + * \brief Perform trajectory reconstruction and get the resulting projection + * matrix + */ + void GetProjectionMatrix(cv::Mat& projectionMatrix); + + /*! + * \brief Fill out the frame struct being returned to JavaScript land + */ + FrameInfo GetResult() const; + + // Video parameters + unsigned int m_width = 0; + unsigned int m_height = 0; + + // Config parameters + ConfigOptions m_config; + + // State parameters + std::vector m_frameHistory; + cv::Mat m_previousGrayscale; + cv::Mat m_cameraMatrix; // 3x3 camera calibration matrix + + // Vision graph + std::shared_ptr m_visionGraph; + + // Frame pool + std::shared_ptr m_framePool; + + // Buffers + cv::Mat m_rgbaFrameBuffer; + cv::Mat m_currentGrayscaleBuffer; + std::vector> m_pointHistoryBuffer; + std::vector m_statusBuffer; + + // Output buffer (holds data returned from AddVideoFrame()) + mutable std::vector m_points; + mutable std::vector m_initialPoints; + mutable std::vector m_projectionMatrix; +}; diff --git a/lib/motion_tracker/motion_tracker_embinder.cpp b/lib/motion_tracker/motion_tracker_embinder.cpp new file mode 100644 index 000000000..9b53da9ba --- /dev/null +++ b/lib/motion_tracker/motion_tracker_embinder.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "motion_tracker.hpp" + +#include + +using namespace emscripten; + +EMSCRIPTEN_BINDINGS(motion_tracker) { + value_object("FrameInfo") + .field("sceneScore", &FrameInfo::sceneScore) + .field("pointData", &FrameInfo::pointData) + .field("pointSize", &FrameInfo::pointSize) + .field("initialPointData", &FrameInfo::initialPointData) + .field("initialPointSize", &FrameInfo::initialPointSize) + .field("projectionMatrixData", &FrameInfo::projectionMatrixData) + .field("projectionMatrixSize", &FrameInfo::projectionMatrixSize); + + value_object("ConfigOptions") + .field("maxPointCount", &ConfigOptions::maxPointCount) + .field("maxFrameCount", &ConfigOptions::maxFrameCount); + + class_("MotionTracker") + .constructor<>() + .function("initialize", &MotionTracker::Initialize) + .function("setConfig", &MotionTracker::SetConfig) + .function("addVideoFrame", &MotionTracker::AddVideoFrame) + .function("deinitialize", &MotionTracker::Deinitialize); +} diff --git a/lib/motion_tracker/vision_graph.cpp b/lib/motion_tracker/vision_graph.cpp new file mode 100644 index 000000000..e88d6e9f4 --- /dev/null +++ b/lib/motion_tracker/vision_graph.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "vision_graph.hpp" + +#include "api/imgproc.hpp" +#include "api/reconstruction.hpp" +#include "api/scene.hpp" +#include "api/video.hpp" +#include "kernels/cpu/cpu_imgproc.hpp" +#include "kernels/cpu/cpu_reconstruction.hpp" +#include "kernels/cpu/cpu_scene.hpp" +#include "kernels/cpu/cpu_video.hpp" + +#include +#include +#include +#include + +VisionGraph::~VisionGraph() = default; + +void VisionGraph::Compile(unsigned int width, unsigned int height, + const cv::Mat ¤tFrame, + const cv::Mat ¤tGrayscale, + const cv::Mat &previousGrayscale) { + // Inputs + double previousMafd = 0.0; + std::vector currentPoints; + std::vector> pointHistory; + cv::Mat previousCameraMatrix; + cv::Scalar maxFeatures; + cv::Scalar minDistance; + + // Declare graph + // The version of a pipeline expression with a lambda-based constructor is + // used to keep all temporary objects in a dedicated scope + + // Convert to grayscale + cv::GComputation grayscalePipeline([]() { + // Input + cv::GMat rgbaImage; + + // Output + cv::GMat grayscaleImage; + + grayscaleImage = imgproc::RGBA2Gray(rgbaImage); + + return cv::GComputation(cv::GIn(rgbaImage), cv::GOut(grayscaleImage)); + }); + + // Find features + cv::GComputation featurePipeline([width, height]() { + // Input + cv::GMat grayscaleImage; + cv::GScalar maxFeatures; + cv::GScalar minDistance; + + // Output + cv::GArray features; + + features = + imgproc::GoodFeaturesToTrack(grayscaleImage, maxFeatures, minDistance); + + return cv::GComputation(cv::GIn(grayscaleImage, maxFeatures, minDistance), + cv::GOut(features)); + }); + + // Calculate scene score + cv::GComputation sceneScorePipeline([width, height]() { + // Input + cv::GMat prevImg; + cv::GMat nextImg; + cv::GOpaque prevMafd; + + // Output + cv::GOpaque newMafd; + cv::GOpaque sceneScore; + + std::tie(newMafd, sceneScore) = + scene::CalcSceneScore(prevImg, nextImg, prevMafd, width, height); + + return cv::GComputation(cv::GIn(prevImg, nextImg, prevMafd), + cv::GOut(newMafd, sceneScore)); + }); + + // Calculate optical flow + cv::GComputation opticalFlowPipeline([]() { + // Input + cv::GMat prevImg; + cv::GMat nextImg; + cv::GArray previousPoints; + cv::GArray> pointHistory; + + // Intermediate + cv::GArray predictedPoints; + + // Output + cv::GArray newPoints; + cv::GArray status; + cv::GArray errors; + + // Predict next discovered points for optical flow + predictedPoints = video::PredictPoints(pointHistory); + + // Perform optical flow calculation + std::tie(newPoints, status, errors) = video::CalcOpticalFlow( + prevImg, nextImg, previousPoints, predictedPoints); + + return cv::GComputation( + cv::GIn(prevImg, nextImg, previousPoints, pointHistory), + cv::GOut(newPoints, status, errors)); + }); + + // Reconstruct trajectory + cv::GComputation reconstructTrajectoryPipeline([]() { + // Input + cv::GArray> pointHistory; + cv::GMat initialCameraMatrix; + + // Output + cv::GMat projectionMatrix; + cv::GMat outputCameraMatrix; + + std::tie(projectionMatrix, outputCameraMatrix) = + reconstruction::ReconstructTrajectory(pointHistory, + initialCameraMatrix); + + return cv::GComputation(cv::GIn(pointHistory, initialCameraMatrix), + cv::GOut(projectionMatrix, outputCameraMatrix)); + }); + + // Declare custom and gapi kernels + static auto kernels = cv::gapi::combine( + cv::gapi::imgproc::cpu::kernels(), cv::gapi::video::cpu::kernels(), + imgproc::kernels(), reconstruction::kernels(), scene::kernels(), + video::kernels()); + + // Compile computation graphs in serial mode + m_applyGrayscale = grayscalePipeline.compile(cv::descr_of(currentFrame), + cv::compile_args(kernels)); + m_findFeatures = featurePipeline.compile( + cv::descr_of(currentGrayscale), cv::descr_of(maxFeatures), + cv::descr_of(minDistance), cv::compile_args(kernels)); + m_calcSceneScore = sceneScorePipeline.compile( + cv::descr_of(previousGrayscale), cv::descr_of(currentGrayscale), + cv::descr_of(previousMafd), cv::compile_args(kernels)); + m_calcOpticalFlow = opticalFlowPipeline.compile( + cv::descr_of(previousGrayscale), cv::descr_of(currentGrayscale), + cv::descr_of(currentPoints), cv::descr_of(pointHistory), + cv::compile_args(kernels)); + m_reconstructTrajectory = reconstructTrajectoryPipeline.compile( + cv::descr_of(pointHistory), cv::descr_of(previousCameraMatrix), + cv::compile_args(kernels)); +} + +void VisionGraph::ApplyGrayscale( + // Input + const cv::Mat ¤tFrame, + // Output + cv::Mat ¤tGrayscale) { + auto inVector = cv::gin(currentFrame); + auto outVector = cv::gout(currentGrayscale); + m_applyGrayscale(std::move(inVector), std::move(outVector)); +} + +void VisionGraph::CalcSceneScore( + // Input + const cv::Mat &previousGrayscale, const cv::Mat ¤tGrayscale, + double previousMafd, + // Output + double ¤tMafd, double &sceneScore) { + auto inVector = cv::gin(previousGrayscale, currentGrayscale, previousMafd); + auto outVector = cv::gout(currentMafd, sceneScore); + m_calcSceneScore(std::move(inVector), std::move(outVector)); +} + +void VisionGraph::CalcOpticalFlow( + // Input + const cv::Mat &previousGrayscale, const cv::Mat ¤tGrayscale, + const std::vector &previousPoints, + const std::vector> &pointHistory, + // Output + std::vector ¤tPoints, std::vector &status, + std::vector &errors) { + auto inVector = cv::gin(previousGrayscale, currentGrayscale, previousPoints, + pointHistory); + auto outVector = cv::gout(currentPoints, status, errors); + m_calcOpticalFlow(std::move(inVector), std::move(outVector)); +} + +void VisionGraph::FindFeatures( + // Input + const cv::Mat ¤tGrayscale, unsigned int maxFeatures, + double minDistance, + // Output + std::vector ¤tPoints) { + cv::Scalar maxFeaturesScalar( + maxFeatures > 0 ? static_cast(maxFeatures) : -1.0); + cv::Scalar minDistanceScalar(minDistance); + + auto inVector = + cv::gin(currentGrayscale, maxFeaturesScalar, minDistanceScalar); + auto outVector = cv::gout(currentPoints); + m_findFeatures(std::move(inVector), std::move(outVector)); +} + +void VisionGraph::ReconstructTrajectory( + // Input + const std::vector> &pointHistory, + const cv::Mat &previousCameraMatrix, + // Output + cv::Mat &projectionMatrix, cv::Mat &updatedCameraMatrix) { + auto inVector = cv::gin(pointHistory, previousCameraMatrix); + auto outVector = cv::gout(projectionMatrix, updatedCameraMatrix); + m_reconstructTrajectory(std::move(inVector), std::move(outVector)); +} diff --git a/lib/motion_tracker/vision_graph.hpp b/lib/motion_tracker/vision_graph.hpp new file mode 100644 index 000000000..fcb02be3f --- /dev/null +++ b/lib/motion_tracker/vision_graph.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include + +class VisionGraph +{ +public: + VisionGraph() = default; + ~VisionGraph(); + + void Compile(unsigned int width, + unsigned int height, + const cv::Mat& currentFrame, + const cv::Mat& currentGrayscale, + const cv::Mat& previousGrayscale); + + void ApplyGrayscale( + // Input + const cv::Mat& currentFrame, + // Output + cv::Mat& currentGrayscale); + + void CalcSceneScore( + // Input + const cv::Mat& previousGrayscale, + const cv::Mat& currentGrayscale, + double previousMafd, + // Output + double& currentMafd, + double& sceneScore); + + void CalcOpticalFlow( + // Input + const cv::Mat& previousGrayscale, + const cv::Mat& currentGrayscale, + const std::vector& previousPoints, + const std::vector>& pointHistory, + // Output + std::vector& currentPoints, + std::vector& status, + std::vector& errors); + + void FindFeatures( + // Input + const cv::Mat& currentGrayscale, + unsigned int maxFeatures, + double minDistance, + // Output + std::vector& currentPoints); + + void ReconstructTrajectory( + // Input + const std::vector>& pointHistory, + const cv::Mat& previousCameraMatrix, + // Output + cv::Mat& projectionMatrix, + cv::Mat& updatedCameraMatrix); + +private: + cv::GCompiled m_applyGrayscale; + cv::GCompiled m_findFeatures; + cv::GCompiled m_calcSceneScore; + cv::GCompiled m_calcOpticalFlow; + cv::GCompiled m_reconstructTrajectory; +}; diff --git a/lib/utils/emscripten_utils.cpp b/lib/utils/emscripten_utils.cpp new file mode 100644 index 000000000..00ffc089c --- /dev/null +++ b/lib/utils/emscripten_utils.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "emscripten_utils.hpp" + +unsigned int EmscriptenUtils::ArrayLength(const emscripten::val &array) { + return array["length"].as(); +} + +void EmscriptenUtils::GetArrayData(const emscripten::val &array, uint8_t *dest, + unsigned int destLength) { + emscripten::val memoryView{emscripten::typed_memory_view(destLength, dest)}; + memoryView.call("set", array); +} diff --git a/lib/utils/emscripten_utils.hpp b/lib/utils/emscripten_utils.hpp new file mode 100644 index 000000000..0c849034f --- /dev/null +++ b/lib/utils/emscripten_utils.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include + +class EmscriptenUtils +{ +public: + /*! + * \brief Get the length of the JavaScript array + * + * \param array An array of type Uint8ClampedArray + * + * \return The length as returned by array.length + */ + static unsigned int ArrayLength(const emscripten::val& array); + + /*! + * \brief Copy array data from JavaScript into the WASM engine + * + * \param array An array of type Uint8ClampedArray + * \param dest The destination memory + * \param destLength The size of the buffer pointed to by dest + */ + static void GetArrayData(const emscripten::val& array, uint8_t* dest, unsigned int destLength); +}; diff --git a/lib/utils/frame_pool.cpp b/lib/utils/frame_pool.cpp new file mode 100644 index 000000000..a54f94359 --- /dev/null +++ b/lib/utils/frame_pool.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "frame_pool.hpp" + +#include + +void FrameData::Reset() { + mafd = 0.0; + sceneScore = 0.0; + points.clear(); + status.clear(); + errors.clear(); + projectionMatrix.create(3, 4, CV_32F); + projectionMatrix = cv::Scalar::all(0); +} + +FramePtr FramePool::GetFrame() { + // Consider a frame unused if its reference count drops to 1 + auto isUnused = [](const FramePtr &frameData) { + return frameData.use_count() == 1; + }; + + // Search for unused frame + auto it = std::find_if(m_frames.begin(), m_frames.end(), isUnused); + + // If found, reset and return it + if (it != m_frames.end()) { + (*it)->Reset(); + return *it; + } + + // Unused frame not found, create a new one + FramePtr newFrame = std::make_shared(); + + // Add frame to frame pool + m_frames.push_back(newFrame); + + return newFrame; +} diff --git a/lib/utils/frame_pool.hpp b/lib/utils/frame_pool.hpp new file mode 100644 index 000000000..df7fc8b88 --- /dev/null +++ b/lib/utils/frame_pool.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include +#include +#include +#include + +struct FrameData; + +// Alias used for frames on the heap +using FramePtr = std::shared_ptr; + +struct FrameData +{ + // Mean absolute frame difference (see image_utils.hpp) + double mafd = 0.0; + + // Scene score (see image_utils.hpp) + double sceneScore = 0.0; + + // Points corresponding to features being tracked + std::vector points; + + // Status; each element of the vector is set to 1 if the flow for the + // corresponding features has been found, otherwise, it is set to 0. + std::vector status; + + // Errors; each element of the vector is set to an error for the + // corresponding feature, type of the error measure can be set in flags + // parameter; if the flow wasn't found then the error is not defined + // (use the status parameter to find such cases). + std::vector errors; + + // 3x4 projection matrix + cv::Mat projectionMatrix; + + void Reset(); +}; + +/* + * \brief Nice, simple frame pool + * + * Frames are reference-tracked using std::shared_ptr. When a new frame is + * requested, we search the pool for a frame with a reference count of one. + * When the frame becomes unused, simply reset the pointer, and it will become + * available in the pool again. + */ +class FramePool +{ +public: + FramePtr GetFrame(); + +private: + std::vector m_frames; +}; diff --git a/lib/utils/image_utils.cpp b/lib/utils/image_utils.cpp new file mode 100644 index 000000000..a84a00f6c --- /dev/null +++ b/lib/utils/image_utils.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "image_utils.hpp" + +#include +#include + +unsigned int ImageUtils::GetStride(unsigned int width) { return width; } + +unsigned int ImageUtils::GetImageBufferLength(unsigned int width, + unsigned int height) { + return GetStride(width) * height; +} + +float ImageUtils::CalcSceneScore(float currentMafd, float previousMafd) { + const float diff = std::abs(currentMafd - previousMafd); + + return ClipValue(std::min(currentMafd, diff) / 100.0f, 0.0f, 1.0f); +} + +float ImageUtils::CalcSceneMAFD(const uint8_t *previousFrame, + const uint8_t *currentFrame, unsigned int width, + unsigned int height) { + // Calculate SAD + const uint8_t *const src1 = previousFrame; + const ptrdiff_t stride1 = GetStride(width); + const uint8_t *const src2 = currentFrame; + const ptrdiff_t stride2 = GetStride(width); + const uint64_t sad = + CalcSceneSAD(src1, stride1, src2, stride2, width, height); + + // Count pixels + const uint64_t count = width * height; + + // Calculate mean absolute frame difference + const float mafd = static_cast(sad) / static_cast(count); + + return mafd; +} + +uint64_t ImageUtils::CalcSceneSAD(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2, + ptrdiff_t width, ptrdiff_t height) { + uint64_t sad = 0; + + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x++) + sad += std::abs(src1[x] - src2[x]); + src1 += stride1; + src2 += stride2; + } + + return sad; +} + +float ImageUtils::ClipValue(float value, float min, float max) { + if (value < min) + value = min; + else if (value > max) + value = max; + + return value; +} diff --git a/lib/utils/image_utils.hpp b/lib/utils/image_utils.hpp new file mode 100644 index 000000000..20e28697a --- /dev/null +++ b/lib/utils/image_utils.hpp @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +#include +#include + +class ImageUtils +{ +public: + /*! + * \brief Get byte length of line for grayscale image + * + * \param width The image width + * + * \return The length of each line in the image + */ + static unsigned int GetStride(unsigned int width); + + /*! + * \brief Get length of buffer for an RGBA image of the given size + * + * \param width The image width + * \param height The image height + * + * \return The length of the buffer to hold the RGBA image + */ + static unsigned int GetImageBufferLength(unsigned int width, unsigned int height); + + /*! + * \brief Calculate the "scene score" of an image + * + * The scene score is a value in the range [0.0, 1.0] that indicates the + * likelihood that an abrupt scene change has occurred. + * + * This approach calculates the sum of absolute differences (SAD) to measure + * the similarity of the two images. + * + * Using the SAD value, a mean absolute frame difference difference (MAFD) + * is computed. The scene score is then given by comparing MAFD values + * between successive frames. + * + * \param previousMafd The MAFD of the previous frame + * \param currentMafd The MAFD of the current frame + * + * \return The scene score + * + */ + static float CalcSceneScore(float currentMafd, float previousMafd); + + /*! + * \brief Get the Mean Absolute Frame Difference + * + * \param currentFrame The current frame + * \param previousFrame The previous frame + * \param width The image width + * \param height The image height + * + * \return The MAFD + * + * \sa CalcSceneScore() + */ + static float CalcSceneMAFD(const uint8_t* previousFrame, + const uint8_t* currentFrame, + unsigned int width, unsigned int height); + +private: + /*! + * \brief Get the Sum of Absolute Differences (a measure of image similarity) + * between two frames + * + * \param src1 The buffer for the first frame + * \param stride1 The length of each line of the first frame + * \param src2 The buffer for the second frame + * \param stride2 The length of each line of the second frame + * \param width The image width + * \param height The image height + * + * \return The SAD of the two frames + */ + static uint64_t CalcSceneSAD(const uint8_t* src1, ptrdiff_t stride1, + const uint8_t* src2, ptrdiff_t stride2, + ptrdiff_t width, ptrdiff_t height); + + /** + * Clip a float value into the min-max range + * + * \param a The value to clip + * \param min Minimum value of the clip range + * \param max Maximum value of the clip range + * + * \return Clipped value + */ + static float ClipValue(float value, float min, float max); +}; diff --git a/lib/utils/math_utils.cpp b/lib/utils/math_utils.cpp new file mode 100644 index 000000000..ca72be060 --- /dev/null +++ b/lib/utils/math_utils.cpp @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#include "math_utils.hpp" + +#include + +double MathUtils::GeometricMean(unsigned int width, unsigned int height) { + double product = static_cast(width) * static_cast(height); + + return std::sqrt(product); +} diff --git a/lib/utils/math_utils.hpp b/lib/utils/math_utils.hpp new file mode 100644 index 000000000..5bdf88286 --- /dev/null +++ b/lib/utils/math_utils.hpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2020-2023 retro.ai + * This file is part of retro-dapp - https://github.com/RetroAI/retro-dapp + * + * SPDX-License-Identifier: Apache-2.0 + * See LICENSE.txt for more information. + */ + +#pragma once + +class MathUtils +{ +public: + /*! + * \brief Calculate the geometric mean of two dimensions + * + * \param width An image width, in pixels + * \param height An image height, in pixels + * + * \return The geometric mean, the square root of the product + */ + static double GeometricMean(unsigned int width, unsigned int height); +};