Skip to content

Commit

Permalink
Preprocess C++
Browse files Browse the repository at this point in the history
Differential Revision: D61833480

Pull Request resolved: pytorch#4987
  • Loading branch information
lucylq authored Aug 30, 2024
1 parent 12039af commit 1263964
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 0 deletions.
118 changes: 118 additions & 0 deletions examples/models/flamingo/preprocess/preprocess.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "executorch/examples/models/flamingo/preprocess/preprocess.h"

#include <algorithm>
#include <cassert>

std::vector<int> _get_factors(int n) {
std::vector<int> factors;
for (int i = 1; i <= n; i++) {
if (n % i == 0) {
factors.push_back(i);
}
}
return factors;
}

std::vector<std::vector<int>> find_supported_resolutions(
int max_num_tiles,
int tile_size) {
std::vector<std::vector<int>> supported_resolutions;
for (int _tile_size = max_num_tiles; _tile_size > 0; _tile_size--) {
auto factors = _get_factors(_tile_size);
for (int i = 0; i < factors.size(); i++) {
int height = factors[i];
int width = _tile_size / factors[i];
supported_resolutions.push_back({height * tile_size, width * tile_size});
}
}
return supported_resolutions;
}

std::vector<int> get_canvas_best_fit(
std::vector<int> image_size,
std::vector<std::vector<int>> possible_resolutions,
bool resize_to_max_canvas) {
assert(image_size.size() == 2);
int image_h = image_size[0];
int image_w = image_size[1];

float best_scale = -0.1;
std::vector<int> best_resolution;
int best_area = 0;

for (int i = 0; i < possible_resolutions.size(); i++) {
assert(possible_resolutions[i].size() == 2);
float scale_h = possible_resolutions[i][0] / (float)image_h;
float scale_w = possible_resolutions[i][1] / (float)image_w;

// Get limiting side scaling -> no distortion
float scale = scale_h < scale_w ? scale_h : scale_w;

bool is_candidate = false;

if (scale >= 1.0) {
// Upscaling options.
if (resize_to_max_canvas) {
is_candidate = scale >= best_scale;
} else {
is_candidate = ((scale <= best_scale) || (best_resolution.size() == 0));
}
} else {
// If no upscaling options, find the minimum downscaling (max scale for
// scales < 1)
is_candidate = ((scale >= best_scale) || (best_resolution.size() == 0));
}

// Select the best resolution.
if (is_candidate) {
// @lint-ignore CLANGTIDY facebook-hte-ParameterUncheckedArrayBounds
int area = possible_resolutions[i][0] * possible_resolutions[i][1];
if (scale == best_scale) {
// If there are multiple resolutions, get the one with minimum area to
// reduce padding.
if (scale >= 1.0 && area < best_area) {
best_resolution = possible_resolutions[i];
best_area = area;
}
} else {
best_resolution = possible_resolutions[i];
best_scale = scale;
best_area = area;
}
}
}
return best_resolution;
}

std::vector<int> get_inscribed_size(
std::vector<int> image_size,
std::vector<int> target_size,
int max_size) {
assert(image_size.size() == 2);
assert(target_size.size() == 2);

int target_height = target_size[0];
int target_width = target_size[1];

if (max_size > 0) {
target_height = std::min(std::max(image_size[0], max_size), target_size[0]);
target_width = std::min(std::max(image_size[1], max_size), target_size[1]);
}

int resize_height = std::min(
(int)(image_size[0] * (target_width / (float)image_size[1])),
target_height);
int resize_width = std::min(
(int)(image_size[1] * (target_height / (float)image_size[0])),
target_width);

return {resize_height, resize_width};
}
41 changes: 41 additions & 0 deletions examples/models/flamingo/preprocess/preprocess.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <vector>

// C++ implementation of the python functions in torchtune:
// https://github.com/pytorch/torchtune/tree/main/torchtune/modules/transforms/vision_utils

// Calculate all factors of a given number.
std::vector<int> _get_factors(int n);

// Computes all combinations of resolutions, multiple of tile_size,
// that contain up to max_num_tiles. Useful for when dividing an image into
// tiles. For example, if we want at most 2 tiles per image, then we can support
// the following resolutions: (1x1, 1x2, 2x1) * tile_size Returns a vector of
// tuples of (height, width).
std::vector<std::vector<int>> find_supported_resolutions(
int max_num_tiles,
int tile_size);

// Determines the best canvas possible from a list of possible resolutions to
// resize an image to, without distortion.
std::vector<int> get_canvas_best_fit(
std::vector<int> image_size,
std::vector<std::vector<int>> possible_resolutions,
bool resize_to_max_canvas);

// Calculates the size of an image, if it was resized to be inscribed within the
// target_size. It is upscaled or downscaled such that one size is equal to the
// target_size, and the second size is less than or equal to the target_size.
std::vector<int> get_inscribed_size(
std::vector<int> image_size,
std::vector<int> canvas_size,
int max_size);
113 changes: 113 additions & 0 deletions examples/models/flamingo/preprocess/preprocess_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/examples/models/flamingo/preprocess/preprocess.h>
#include <gtest/gtest.h>

using namespace ::testing;

// Mirror the torchtune python testing:
// https://github.com/pytorch/torchtune/tree/main/tests/torchtune/modules/transforms

void test_find_supported_resolutions(
int max_num_tiles,
int tile_size,
std::vector<std::vector<int>> expected_resolutions) {
std::vector<std::vector<int>> resolutions =
find_supported_resolutions(max_num_tiles, tile_size);

EXPECT_EQ(resolutions.size(), expected_resolutions.size());

for (int i = 0; i < resolutions.size(); i++) {
EXPECT_EQ(resolutions[i].size(), expected_resolutions[i].size());
EXPECT_EQ(resolutions[i][0], expected_resolutions[i][0]); // height
EXPECT_EQ(resolutions[i][1], expected_resolutions[i][1]); // width
}
}

TEST(PreprocessTest, TestFindSupportedResolution) {
test_find_supported_resolutions(1, 224, {{224, 224}});
test_find_supported_resolutions(2, 100, {{100, 200}, {200, 100}, {100, 100}});
test_find_supported_resolutions(
3, 50, {{50, 150}, {150, 50}, {50, 100}, {100, 50}, {50, 50}});
test_find_supported_resolutions(
4,
300,
{
{300, 1200},
{600, 600},
{1200, 300},
{300, 900},
{900, 300},
{300, 600},
{600, 300},
{300, 300},
});
}

void test_get_canvas_best_fit(
std::vector<int> image_size,
std::vector<std::vector<int>> possible_resolutions,
bool resize_to_max_canvas,
std::vector<int> expected_best_resolution) {
std::vector<int> best_resolution = get_canvas_best_fit(
image_size, possible_resolutions, resize_to_max_canvas);
EXPECT_EQ(best_resolution[0], expected_best_resolution[0]); // height
EXPECT_EQ(best_resolution[1], expected_best_resolution[1]); // width
}

TEST(PreprocessTest, TestGetCanvasBestFit_200x300_F) {
std::vector<std::vector<int>> possible_resolutions = {
{224, 896},
{448, 448},
{224, 224},
{896, 224},
{224, 672},
{672, 224},
{224, 448},
{448, 224},
};
test_get_canvas_best_fit(
{200, 300},
possible_resolutions,
false, // resize_to_max_canvas
{224, 448});

test_get_canvas_best_fit(
{200, 500},
possible_resolutions,
true, // resize_to_max_canvas
{224, 672});
test_get_canvas_best_fit(
{200, 200},
possible_resolutions,
false, // resize_to_max_canvas
{224, 224});
test_get_canvas_best_fit(
{200, 100},
possible_resolutions,
true, // resize_to_max_canvas
{448, 224});
}

void test_get_inscribed_size(
std::vector<int> image_size,
std::vector<int> target_size,
int max_size,
std::vector<int> expected_target_size) {
std::vector<int> result =
get_inscribed_size(image_size, target_size, max_size);
EXPECT_EQ(result[0], expected_target_size[0]); // height
EXPECT_EQ(result[1], expected_target_size[1]); // width
}
TEST(PreprocessTest, GetInscribedSize) {
test_get_inscribed_size({200, 100}, {1000, 1200}, 600, {600, 300});
test_get_inscribed_size({2000, 200}, {1000, 1200}, 2000, {1000, 100});
test_get_inscribed_size({400, 200}, {1000, 1200}, -1, {1000, 500});
test_get_inscribed_size({1000, 500}, {400, 300}, -1, {400, 200});
}
20 changes: 20 additions & 0 deletions examples/models/flamingo/preprocess/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.
The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""

runtime.cxx_library(
name = "preprocess",
srcs = ["preprocess.cpp"],
exported_headers = ["preprocess.h"],
)

runtime.cxx_test(
name = "preprocess_test",
srcs = ["preprocess_test.cpp"],
deps = [":preprocess"],
)

0 comments on commit 1263964

Please sign in to comment.