forked from pytorch/executorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Differential Revision: D61833480 Pull Request resolved: pytorch#4987
- Loading branch information
Showing
4 changed files
with
292 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include "executorch/examples/models/flamingo/preprocess/preprocess.h" | ||
|
||
#include <algorithm> | ||
#include <cassert> | ||
|
||
std::vector<int> _get_factors(int n) { | ||
std::vector<int> factors; | ||
for (int i = 1; i <= n; i++) { | ||
if (n % i == 0) { | ||
factors.push_back(i); | ||
} | ||
} | ||
return factors; | ||
} | ||
|
||
std::vector<std::vector<int>> find_supported_resolutions( | ||
int max_num_tiles, | ||
int tile_size) { | ||
std::vector<std::vector<int>> supported_resolutions; | ||
for (int _tile_size = max_num_tiles; _tile_size > 0; _tile_size--) { | ||
auto factors = _get_factors(_tile_size); | ||
for (int i = 0; i < factors.size(); i++) { | ||
int height = factors[i]; | ||
int width = _tile_size / factors[i]; | ||
supported_resolutions.push_back({height * tile_size, width * tile_size}); | ||
} | ||
} | ||
return supported_resolutions; | ||
} | ||
|
||
std::vector<int> get_canvas_best_fit( | ||
std::vector<int> image_size, | ||
std::vector<std::vector<int>> possible_resolutions, | ||
bool resize_to_max_canvas) { | ||
assert(image_size.size() == 2); | ||
int image_h = image_size[0]; | ||
int image_w = image_size[1]; | ||
|
||
float best_scale = -0.1; | ||
std::vector<int> best_resolution; | ||
int best_area = 0; | ||
|
||
for (int i = 0; i < possible_resolutions.size(); i++) { | ||
assert(possible_resolutions[i].size() == 2); | ||
float scale_h = possible_resolutions[i][0] / (float)image_h; | ||
float scale_w = possible_resolutions[i][1] / (float)image_w; | ||
|
||
// Get limiting side scaling -> no distortion | ||
float scale = scale_h < scale_w ? scale_h : scale_w; | ||
|
||
bool is_candidate = false; | ||
|
||
if (scale >= 1.0) { | ||
// Upscaling options. | ||
if (resize_to_max_canvas) { | ||
is_candidate = scale >= best_scale; | ||
} else { | ||
is_candidate = ((scale <= best_scale) || (best_resolution.size() == 0)); | ||
} | ||
} else { | ||
// If no upscaling options, find the minimum downscaling (max scale for | ||
// scales < 1) | ||
is_candidate = ((scale >= best_scale) || (best_resolution.size() == 0)); | ||
} | ||
|
||
// Select the best resolution. | ||
if (is_candidate) { | ||
// @lint-ignore CLANGTIDY facebook-hte-ParameterUncheckedArrayBounds | ||
int area = possible_resolutions[i][0] * possible_resolutions[i][1]; | ||
if (scale == best_scale) { | ||
// If there are multiple resolutions, get the one with minimum area to | ||
// reduce padding. | ||
if (scale >= 1.0 && area < best_area) { | ||
best_resolution = possible_resolutions[i]; | ||
best_area = area; | ||
} | ||
} else { | ||
best_resolution = possible_resolutions[i]; | ||
best_scale = scale; | ||
best_area = area; | ||
} | ||
} | ||
} | ||
return best_resolution; | ||
} | ||
|
||
std::vector<int> get_inscribed_size( | ||
std::vector<int> image_size, | ||
std::vector<int> target_size, | ||
int max_size) { | ||
assert(image_size.size() == 2); | ||
assert(target_size.size() == 2); | ||
|
||
int target_height = target_size[0]; | ||
int target_width = target_size[1]; | ||
|
||
if (max_size > 0) { | ||
target_height = std::min(std::max(image_size[0], max_size), target_size[0]); | ||
target_width = std::min(std::max(image_size[1], max_size), target_size[1]); | ||
} | ||
|
||
int resize_height = std::min( | ||
(int)(image_size[0] * (target_width / (float)image_size[1])), | ||
target_height); | ||
int resize_width = std::min( | ||
(int)(image_size[1] * (target_height / (float)image_size[0])), | ||
target_width); | ||
|
||
return {resize_height, resize_width}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <vector> | ||
|
||
// C++ implementation of the python functions in torchtune: | ||
// https://github.com/pytorch/torchtune/tree/main/torchtune/modules/transforms/vision_utils | ||
|
||
// Calculate all factors of a given number. | ||
std::vector<int> _get_factors(int n); | ||
|
||
// Computes all combinations of resolutions, multiple of tile_size, | ||
// that contain up to max_num_tiles. Useful for when dividing an image into | ||
// tiles. For example, if we want at most 2 tiles per image, then we can support | ||
// the following resolutions: (1x1, 1x2, 2x1) * tile_size Returns a vector of | ||
// tuples of (height, width). | ||
std::vector<std::vector<int>> find_supported_resolutions( | ||
int max_num_tiles, | ||
int tile_size); | ||
|
||
// Determines the best canvas possible from a list of possible resolutions to | ||
// resize an image to, without distortion. | ||
std::vector<int> get_canvas_best_fit( | ||
std::vector<int> image_size, | ||
std::vector<std::vector<int>> possible_resolutions, | ||
bool resize_to_max_canvas); | ||
|
||
// Calculates the size of an image, if it was resized to be inscribed within the | ||
// target_size. It is upscaled or downscaled such that one size is equal to the | ||
// target_size, and the second size is less than or equal to the target_size. | ||
std::vector<int> get_inscribed_size( | ||
std::vector<int> image_size, | ||
std::vector<int> canvas_size, | ||
int max_size); |
113 changes: 113 additions & 0 deletions
113
examples/models/flamingo/preprocess/preprocess_test.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <executorch/examples/models/flamingo/preprocess/preprocess.h> | ||
#include <gtest/gtest.h> | ||
|
||
using namespace ::testing; | ||
|
||
// Mirror the torchtune python testing: | ||
// https://github.com/pytorch/torchtune/tree/main/tests/torchtune/modules/transforms | ||
|
||
void test_find_supported_resolutions( | ||
int max_num_tiles, | ||
int tile_size, | ||
std::vector<std::vector<int>> expected_resolutions) { | ||
std::vector<std::vector<int>> resolutions = | ||
find_supported_resolutions(max_num_tiles, tile_size); | ||
|
||
EXPECT_EQ(resolutions.size(), expected_resolutions.size()); | ||
|
||
for (int i = 0; i < resolutions.size(); i++) { | ||
EXPECT_EQ(resolutions[i].size(), expected_resolutions[i].size()); | ||
EXPECT_EQ(resolutions[i][0], expected_resolutions[i][0]); // height | ||
EXPECT_EQ(resolutions[i][1], expected_resolutions[i][1]); // width | ||
} | ||
} | ||
|
||
TEST(PreprocessTest, TestFindSupportedResolution) { | ||
test_find_supported_resolutions(1, 224, {{224, 224}}); | ||
test_find_supported_resolutions(2, 100, {{100, 200}, {200, 100}, {100, 100}}); | ||
test_find_supported_resolutions( | ||
3, 50, {{50, 150}, {150, 50}, {50, 100}, {100, 50}, {50, 50}}); | ||
test_find_supported_resolutions( | ||
4, | ||
300, | ||
{ | ||
{300, 1200}, | ||
{600, 600}, | ||
{1200, 300}, | ||
{300, 900}, | ||
{900, 300}, | ||
{300, 600}, | ||
{600, 300}, | ||
{300, 300}, | ||
}); | ||
} | ||
|
||
void test_get_canvas_best_fit( | ||
std::vector<int> image_size, | ||
std::vector<std::vector<int>> possible_resolutions, | ||
bool resize_to_max_canvas, | ||
std::vector<int> expected_best_resolution) { | ||
std::vector<int> best_resolution = get_canvas_best_fit( | ||
image_size, possible_resolutions, resize_to_max_canvas); | ||
EXPECT_EQ(best_resolution[0], expected_best_resolution[0]); // height | ||
EXPECT_EQ(best_resolution[1], expected_best_resolution[1]); // width | ||
} | ||
|
||
TEST(PreprocessTest, TestGetCanvasBestFit_200x300_F) { | ||
std::vector<std::vector<int>> possible_resolutions = { | ||
{224, 896}, | ||
{448, 448}, | ||
{224, 224}, | ||
{896, 224}, | ||
{224, 672}, | ||
{672, 224}, | ||
{224, 448}, | ||
{448, 224}, | ||
}; | ||
test_get_canvas_best_fit( | ||
{200, 300}, | ||
possible_resolutions, | ||
false, // resize_to_max_canvas | ||
{224, 448}); | ||
|
||
test_get_canvas_best_fit( | ||
{200, 500}, | ||
possible_resolutions, | ||
true, // resize_to_max_canvas | ||
{224, 672}); | ||
test_get_canvas_best_fit( | ||
{200, 200}, | ||
possible_resolutions, | ||
false, // resize_to_max_canvas | ||
{224, 224}); | ||
test_get_canvas_best_fit( | ||
{200, 100}, | ||
possible_resolutions, | ||
true, // resize_to_max_canvas | ||
{448, 224}); | ||
} | ||
|
||
void test_get_inscribed_size( | ||
std::vector<int> image_size, | ||
std::vector<int> target_size, | ||
int max_size, | ||
std::vector<int> expected_target_size) { | ||
std::vector<int> result = | ||
get_inscribed_size(image_size, target_size, max_size); | ||
EXPECT_EQ(result[0], expected_target_size[0]); // height | ||
EXPECT_EQ(result[1], expected_target_size[1]); // width | ||
} | ||
TEST(PreprocessTest, GetInscribedSize) { | ||
test_get_inscribed_size({200, 100}, {1000, 1200}, 600, {600, 300}); | ||
test_get_inscribed_size({2000, 200}, {1000, 1200}, 2000, {1000, 100}); | ||
test_get_inscribed_size({400, 200}, {1000, 1200}, -1, {1000, 500}); | ||
test_get_inscribed_size({1000, 500}, {400, 300}, -1, {400, 200}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") | ||
|
||
def define_common_targets(): | ||
"""Defines targets that should be shared between fbcode and xplat. | ||
The directory containing this targets.bzl file should also contain both | ||
TARGETS and BUCK files that call this function. | ||
""" | ||
|
||
runtime.cxx_library( | ||
name = "preprocess", | ||
srcs = ["preprocess.cpp"], | ||
exported_headers = ["preprocess.h"], | ||
) | ||
|
||
runtime.cxx_test( | ||
name = "preprocess_test", | ||
srcs = ["preprocess_test.cpp"], | ||
deps = [":preprocess"], | ||
) |