From d76cdf4507ebf2811c0e48d46fafdfdb8b446c94 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 25 Jun 2024 16:11:25 -0700 Subject: [PATCH] Push initial version of kernel 0 --- .../hdr_plus/hdr_plus_generator_kernel_0.cpp | 446 +++++++++++++++++ .../apps/hdr_plus/process_kernel_0.cpp | 473 ++++++++++++++++++ 2 files changed, 919 insertions(+) create mode 100644 apps/hardware_benchmarks/apps/hdr_plus/hdr_plus_generator_kernel_0.cpp create mode 100644 apps/hardware_benchmarks/apps/hdr_plus/process_kernel_0.cpp diff --git a/apps/hardware_benchmarks/apps/hdr_plus/hdr_plus_generator_kernel_0.cpp b/apps/hardware_benchmarks/apps/hdr_plus/hdr_plus_generator_kernel_0.cpp new file mode 100644 index 000000000..b0dc30ac7 --- /dev/null +++ b/apps/hardware_benchmarks/apps/hdr_plus/hdr_plus_generator_kernel_0.cpp @@ -0,0 +1,446 @@ +/* + * An application that performs a simplified version of Google's HDR+ pipeline + * + */ +#define T_SIZE 16 +#define LOG_2_T_SIZE 4 +#define G_T_SIZE 8 +#include "Halide.h" + +namespace { +int blockSize = 9; +//int blockSize = 5; +//int blockSize = 1; +constexpr int maxJ = 6; + +using namespace Halide; +using namespace Halide::ConciseCasts; +using std::vector; +Var x("x"), y("y"), tx("tx"), ty("ty"), xy("xy"), xo("xo"), yo("yo"), xi("xi"), yi("yi"), c("c"), n("n"), k("k"); + +void fill_funcnames(vector& funcs, std::string name) { + for (size_t i=0; i { +public: + // FOR ALIGN + GeneratorParam pyramid_levels{"pyramid_levels", 5}; + GeneratorParam total_downsample_factor{"total_downsample_factor", 16}; // 2^ pyramid_levels-1 + + // Operate on raw bayer image: so 2 channels, plus 1 channel b/c receiving multiple images + Input> input{"input", 3}; + + // Output a single 8-bit RGB image + Output> output{"output", 3}; + + // Multiply with maximum precision and make result 8.8 p + Expr mul1(Expr a, Expr b) { + return i16(( (i32(a)) * (i32(b)) ) >> 8); + } + + + Expr mul_1023(Expr a, Expr b) { + return i16(( (i32(a)) * (i32(b)) ) >> 10); + } + + + Expr mul_1023_unsigned(Expr a, Expr b) { + return u16(( (u32(a)) * (u32(b)) ) >> 10); + } + + void generate() { + // Algorithm + + /* + * ALIGN STEP + */ + const int J = pyramid_levels; + + + /* Func: clamped_input + * dtype: u16 + * True range: [0, 1023] + * Consumer(s): hw_input + */ + Func clamped_input; + clamped_input = Halide::BoundaryConditions::repeat_edge(input); + + + /* Func: hw_input + * dtype: u16 + * True range: [0, 1023] + * Consumer(s): deinterleaved, val (in merge) + */ + Func hw_input; + hw_input(x, y, n) = clamped_input(x, y, n); + + Func hw_input_copy; + hw_input_copy(x, y, n) = hw_input(x, y, n) + u16(0); + + + /* Func: deinterleaved + * dtype: u16 + * True range: [0, 1023] + * Consumer(s): gray + */ + Func deinterleaved; + deinterleaved(x, y, c, n) = select(c == 0, hw_input_copy(2 * x, 2 * y, n), (select(c == 1, hw_input_copy(2 * x + 1, 2 * y, n), + (select(c == 2, hw_input_copy(2 * x, 2 * y + 1, n), hw_input_copy(2 * x + 1, 2 * y + 1, n)))))); + + + // STEP 1: Convert to grayscale + /* Func: gray + * dtype: u16 + * True range: [0, 1023] + * Consumer(s): pyramids in align, dist_tile (in merge) + */ + Func gray; + gray(x, y, n) = u16((deinterleaved(x, y, 1, n) + deinterleaved(x, y, 0, n) + deinterleaved(x, y, 3, n) + deinterleaved(x, y, 2, n)) >> 2); + + + //STEP 2: Downsample to form image pyramids + + Expr initialGaussWidth[J]; + Expr initialGaussHeight[J]; + + // initialGaussWidth[0] = 625; + // initialGaussWidth[1] = 312; + // initialGaussWidth[2] = 156; + // initialGaussWidth[3] = 78; + // initialGaussWidth[4] = 39; + + + initialGaussWidth[0] = 256; + initialGaussWidth[1] = 128; + initialGaussWidth[2] = 64; + initialGaussWidth[3] = 32; + initialGaussWidth[4] = 16; + + // initialGaussHeight[0] = 560; + // initialGaussHeight[1] = 280; + // initialGaussHeight[2] = 140; + // initialGaussHeight[3] = 70; + // initialGaussHeight[4] = 35; + + + initialGaussHeight[0] = 256; + initialGaussHeight[1] = 128; + initialGaussHeight[2] = 64; + initialGaussHeight[3] = 32; + initialGaussHeight[4] = 16; + + + /* Func: gPyramid[J] + * dtype: u16 + * True range: [0, 1023] + * Consumer(s): dist & scores calculation (in align) + */ + //Func gPyramid[J]; + vector gPyramid(J); + gPyramid[0](x, y, n) = gray(x, y, n); + + gPyramid[1](x, y, n) = downsample_u16_hdr(gPyramid[0], 2, initialGaussWidth[0], initialGaussHeight[0])(x, y, n); + gPyramid[2](x, y, n) = downsample_u16_hdr(gPyramid[1], 2, initialGaussWidth[1], initialGaussHeight[1])(x, y, n); + gPyramid[3](x, y, n) = downsample_u16_hdr(gPyramid[2], 2, initialGaussWidth[2], initialGaussHeight[2])(x, y, n); + gPyramid[4](x, y, n) = downsample_u16_hdr(gPyramid[3], 2, initialGaussWidth[3], initialGaussHeight[3])(x, y, n); + + + // gPyramid[4].bound(x, 0, 16); + // gPyramid[4].bound(y, 0, 16); + // gPyramid[4].bound(n, 0, 3); + + // gPyramid[3].bound(x, 0, 32); + // gPyramid[3].bound(y, 0, 32); + // gPyramid[3].bound(n, 0, 3); + + // Func provisional_output; + // provisional_output(x, y, n) = gPyramid[4](x, y, n); + // output(x, y, c) = u8(provisional_output(x, y, c)); + // output.bound(c, 0, 3); + // output.bound(x, 0, 16); + // output.bound(y, 0, 16); + + // STEP 3: Align pyramids and upsample the alignment back to the bottom layer + Func initialAlign; + + initialAlign(tx, ty, xy, n) = 0; + // initialAlign.bound(tx, 0, 16); + // initialAlign.bound(tx, 0, 16); + // initialAlign.bound(xy, 0, 1); + // initialAlign.bound(n, 0, 3); + + vector alignPyramid(J); + Expr min_align[J]; + Expr max_align[J]; + Expr gauss_width[J]; + Expr gauss_height[J]; + min_align[J-1] = i16(Expr(-4)); + max_align[J-1] = i16(Expr(4)); + + // gauss_width[4] = 39; + // gauss_width[3] = 78; + // gauss_width[2] = 156; + // gauss_width[1] = 312; + // gauss_width[0] = 625; + + gauss_width[4] = 16; + gauss_width[3] = 32; + gauss_width[2] = 64; + gauss_width[1] = 128; + gauss_width[0] = 256; + + // gauss_height[4] = 35; + // gauss_height[3] = 70; + // gauss_height[2] = 140; + // gauss_height[1] = 280; + // gauss_height[0] = 560; + + gauss_height[4] = 16; + gauss_height[3] = 32; + gauss_height[2] = 64; + gauss_height[1] = 128; + gauss_height[0] = 256; + + + Expr upsample_flow_gauss_widths[J]; + Expr upsample_flow_gauss_heights[J]; + + upsample_flow_gauss_widths[4] = 1; + upsample_flow_gauss_widths[3] = 1; + upsample_flow_gauss_widths[2] = 2; + upsample_flow_gauss_widths[1] = 4; + upsample_flow_gauss_widths[0] = 8; + + upsample_flow_gauss_heights[4] = 1; + upsample_flow_gauss_heights[3] = 1; + upsample_flow_gauss_heights[2] = 2; + upsample_flow_gauss_heights[1] = 4; + upsample_flow_gauss_heights[0] = 8; + + + /* ALIGN PYRAMID LEVEL 4*/ + //Var tx, ty, xy, n; + Var x_s_lvl_4, y_s_lvl_4; + + RDom r_tile_lvl_4(0, T_SIZE, 0, T_SIZE); + RDom r_search_lvl_4(-4, 9, -4, 9); + + + /* Func: coarse_offset_lvl_4 + * dtype: i16 + * True range: [0] + * Consumer(s): alignPyramid[4] + */ + Func coarse_offset_lvl_4; + coarse_offset_lvl_4(tx, ty, xy, n) = i16(2 * upsample_u16_size_2_for_alignment(initialAlign, upsample_flow_gauss_widths[4], upsample_flow_gauss_heights[4])(tx, ty, xy, n)); + + Expr x_ref_lvl_4 = clamp(tx * T_SIZE + r_tile_lvl_4.x, 0, gauss_width[4]-1); + Expr y_ref_lvl_4 = clamp(ty * T_SIZE + r_tile_lvl_4.y, 0, gauss_height[4]-1); + + Expr x_cmp_lvl_4 = clamp(tx * T_SIZE + r_tile_lvl_4.x + coarse_offset_lvl_4(tx, ty, 0, n) + x_s_lvl_4, 0, gauss_width[4]-1); + Expr y_cmp_lvl_4 = clamp(ty * T_SIZE + r_tile_lvl_4.y + coarse_offset_lvl_4(tx, ty, 1, n) + y_s_lvl_4, 0, gauss_height[4]-1); + + Expr dist_lvl_4 = abs(i16(gPyramid[4](x_ref_lvl_4, y_ref_lvl_4, 0)) - i16(gPyramid[4](x_cmp_lvl_4, y_cmp_lvl_4, n))); + + /* Func: scores_lvl_4 + * dtype: u32 + * True range: [0, 261888] (worst case) + * Consumer(s): alignPyramid[4] + */ + Func scores_lvl_4; + scores_lvl_4(tx, ty, x_s_lvl_4, y_s_lvl_4, n) = sum(u32(dist_lvl_4)); + + Func min_val_lvl_4, min_x_lvl_4, min_y_lvl_4; + min_val_lvl_4(tx, ty, n) = cast(std::numeric_limits::max()); + //min_val_lvl_4(tx, ty, n) = cast(std::numeric_limits::max()); + min_x_lvl_4(tx, ty, n) = 0; + min_y_lvl_4(tx, ty, n) = 0; + + min_x_lvl_4.bound(tx, 0, 1); + min_x_lvl_4.bound(ty, 0, 1); + min_x_lvl_4.bound(n, 0, 3); + + min_y_lvl_4.bound(tx, 0, 1); + min_y_lvl_4.bound(ty, 0, 1); + min_y_lvl_4.bound(n, 0, 3); + + min_val_lvl_4.bound(tx, 0, 1); + min_val_lvl_4.bound(ty, 0, 1); + min_val_lvl_4.bound(n, 0, 3); + + // Update the minimum function and coordinates + Expr condition = scores_lvl_4(tx, ty, r_search_lvl_4.x, r_search_lvl_4.y, n) < min_val_lvl_4(tx, ty, n); + Expr new_min_lvl_4 = select(condition, scores_lvl_4(tx, ty, r_search_lvl_4.x, r_search_lvl_4.y, n), min_val_lvl_4(tx, ty, n)); + Expr new_min_x_lvl_4 = select(condition, r_search_lvl_4.x, min_x_lvl_4(tx, ty, n)); + Expr new_min_y_lvl_4 = select(condition, r_search_lvl_4.y, min_y_lvl_4(tx, ty, n)); + + min_val_lvl_4(tx, ty, n) = new_min_lvl_4; + min_x_lvl_4(tx, ty, n) = new_min_x_lvl_4; + min_y_lvl_4(tx, ty, n) = new_min_y_lvl_4; + + + /* Func: alignPyramid[4] + * dtype: i16 + * True range: [-4, 4] (worst case) + * Consumer(s): coarse_offset_lvl_3 + */ + alignPyramid[4](tx, ty, xy, n) = select(n == 0, i16(0), + xy == 0, i16(min_x_lvl_4(tx, ty, n)) + coarse_offset_lvl_4(tx, ty, 0, n), + i16(min_y_lvl_4(tx, ty, n)) + coarse_offset_lvl_4(tx, ty, 1, n)); + + alignPyramid[4].bound(tx, 0, 1); + alignPyramid[4].bound(ty, 0, 1); + alignPyramid[4].bound(xy, 0, 1); + alignPyramid[4].bound(n, 0, 3); + + Func provisional_output; + provisional_output(x, y, n) = alignPyramid[4](x, y, 0, n); + output(x, y, c) = u8(provisional_output(x, y, c)); + output.bound(c, 0, 3); + output.bound(x, 0, 1); + output.bound(y, 0, 1); + + + + // Schedule + //Var xo("xo"), yo("yo"), xi("xi"), yi("yi"), outer("outer"); + + if (get_target().has_feature(Target::CoreIR)) { + + } else if (get_target().has_feature(Target::Clockwork)) { + + const int output_x_size = 1; + const int output_y_size = 1; + + output.bound(x, 0, output_x_size); + output.bound(y, 0, output_y_size); + + provisional_output.in().compute_root(); + + provisional_output.in().tile(x, y, xo, yo, xi, yi, output_x_size, output_y_size) + .reorder(n, xi, yi, xo, yo) + .hw_accelerate(xi, xo); + + provisional_output.tile(x, y, xo, yo, xi, yi, output_x_size, output_y_size) + .reorder(n, xi, yi, xo, yo); + provisional_output.compute_at(provisional_output.in(), xo); + provisional_output.store_in(MemoryType::GLB); + + alignPyramid[4].compute_at(provisional_output, xo); + + gPyramid[0].compute_at(provisional_output, xo); + gPyramid[1].compute_at(provisional_output, xo); + gPyramid[2].compute_at(provisional_output, xo); + gPyramid[3].compute_at(provisional_output, xo); + gPyramid[4].compute_at(provisional_output, xo); + + gray.compute_at(provisional_output, xo); + hw_input_copy.compute_at(provisional_output, xo); + + hw_input.in().in().compute_at(provisional_output, xo); // represents the mem tile + + hw_input.in().compute_at(provisional_output.in(), xo); // represents the glb level + hw_input.in().store_in(MemoryType::GLB); + + hw_input.bound(x, 0, 512); + hw_input.bound(y, 0, 512); + hw_input.bound(n, 0, 3); + hw_input.compute_root() + .accelerator_input(); + + } else { + + // // ALIGN SCHEDULE + // Var xo("xo"), yo("yo"), outer("outer"); + // align_output.reorder(xy, n, tx, ty).tile(tx, ty, xo, yo, xi, yi, 4, 4).fuse(xo, yo, outer).parallel(outer); + // //align_output.reorder(xy, n, tx, ty).parallel(outer); + // align_output.compute_root(); + // //deinterleaved.compute_root(); + // //hw_input_float.compute_root(); + // //deinterleaved.compute_root(); + // gray.compute_root().parallel(y, 32).vectorize(x, 8); + + // for (int j = 1; j < J; j++) { + // gPyramid[j] + // .compute_root().parallel(y, 8).vectorize(x, 8); + + + // // USE THIS FOR PRINTING COARSE OFFSET + // // if (j <= 0) + // // alignPyramid[j] + // // .store_at(align_output, outer).compute_at(align_output, yi); + // // else + // // //alignPyramid[j] + // // //.store_at(align_output, outer).compute_at(align_output, yi); + // // alignPyramid[j].compute_at(coarse_offset_lvl_0, n); + + + // // USE THIS FOR PRINTING SCORES + // alignPyramid[j].compute_root(); + // } + // //coarse_offset_lvl_4.compute_root(); + // //coarse_offset_lvl_3.compute_root(); + // //coarse_offset_lvl_2.compute_root(); + // //coarse_offset_lvl_1.compute_root(); + // //coarse_offset_lvl_0.compute_root(); + + // scores_lvl_0.compute_root(); + + // // alignPyramid[0-].compute_root() + // alignPyramid[0].compute_at(align_output, yi); + } + } +private: + Func upsample_u16_size_2_for_alignment(Func f_in, Expr gauss_width, Expr gauss_height) { + Var tx, ty, xy, n; + using Halide::_; + Func up, up_pre_shift, f_in_shift; + + up(tx, ty, xy, n) = f_in(tx/2, ty/2, xy, n); + return up; + } + + Func downsample_u16_hdr(Func f_in, Expr size, Expr gauss_width, Expr gauss_height) { + Var x, y, n; + using Halide::_; + Func f, down_pre_shift, down; + + f(x, y, n) = f_in(x, y, n); + Expr x_index_0 = clamp(size*x-1, 0, gauss_width-1); + Expr x_index_1 = clamp(size*x, 0, gauss_width-1); + Expr x_index_2 = clamp(size*x+1, 0, gauss_width-1); + Expr x_index_3 = clamp(size*x+2, 0, gauss_width-1); + + + Expr y_index_0 = clamp(size*y-1, 0, gauss_height-1); + Expr y_index_1 = clamp(size*y, 0, gauss_height-1); + Expr y_index_2 = clamp(size*y+1, 0, gauss_height-1); + Expr y_index_3 = clamp(size*y+2, 0, gauss_height-1); + + + /* Func: down_pre_shift + * dtype: u16 + * True range: [0, 65472] (worst case) + * Consumer(s): down + */ + down_pre_shift(x, y, n) = (1) * f(clamp(size*x-1, 0, gauss_width-1), clamp(size*y-1, 0, gauss_height-1), n) + (3) * f(clamp(size*x-1, 0, gauss_width-1), clamp(size*y, 0, gauss_height-1), n) + (3) * f(clamp(size*x-1, 0, gauss_width-1), clamp(size*y+1, 0, gauss_height-1), n) + (1) * f(clamp(size*x-1, 0, gauss_width-1), clamp(size*y+2, 0, gauss_height-1), n) + + (3) * f(clamp(size*x, 0, gauss_width-1), clamp(size*y-1, 0, gauss_height-1), n) + (9) * f(clamp(size*x, 0, gauss_width-1), clamp(size*y, 0, gauss_height-1), n) + (9) * f(clamp(size*x, 0, gauss_width-1), clamp(size*y+1, 0, gauss_height-1), n) + (3) * f(clamp(size*x, 0, gauss_width-1), clamp(size*y+2, 0, gauss_height-1), n) + + (3) * f(clamp(size*x+1, 0, gauss_width-1), clamp(size*y-1, 0, gauss_height-1), n) + (9) * f(clamp(size*x+1, 0, gauss_width-1), clamp(size*y, 0, gauss_height-1), n) + (9) * f(clamp(size*x+1, 0, gauss_width-1), clamp(size*y+1, 0, gauss_height-1), n) + (3) * f(clamp(size*x+1, 0, gauss_width-1), clamp(size*y+2, 0, gauss_height-1), n) + + (1) * f(clamp(size*x+2, 0, gauss_width-1), clamp(size*y-1, 0, gauss_height-1), n) + (3) * f(clamp(size*x+2, 0, gauss_width-1), clamp(size*y, 0, gauss_height-1), n) + (3) * f(clamp(size*x+2, 0, gauss_width-1), clamp(size*y+1, 0, gauss_height-1), n) + (1) * f(clamp(size*x+2, 0, gauss_width-1), clamp(size*y+2, 0, gauss_height-1), n); + + + /* Func: down + * dtype: u16 + * True range: [0, 1023] + * Consumer(s): returned by downsample_u16_hdr + */ + down(x, y, n) = down_pre_shift(x, y, n) >> 6; + + return down; + } + }; +} +HALIDE_REGISTER_GENERATOR(HDRPlus, hdr_plus) \ No newline at end of file diff --git a/apps/hardware_benchmarks/apps/hdr_plus/process_kernel_0.cpp b/apps/hardware_benchmarks/apps/hdr_plus/process_kernel_0.cpp new file mode 100644 index 000000000..5ba799941 --- /dev/null +++ b/apps/hardware_benchmarks/apps/hdr_plus/process_kernel_0.cpp @@ -0,0 +1,473 @@ +#include +#include "hardware_process_helper.h" +#include "halide_image_io.h" +#include "dirent.h" +#include +#include +#include +//#include +//#include + + + + +#if defined(WITH_CPU) + #include "hdr_plus.h" +#endif + +#if defined(WITH_COREIR) + #include "coreir_interpret.h" +#endif + +#if defined(WITH_CLOCKWORK) + #include "rdai_api.h" + #include "clockwork_sim_platform.h" + #include "hdr_plus_clockwork.h" +#endif + +using namespace Halide::Tools; +using namespace Halide::Runtime; +using std::string; +using std::vector; + +bool load_imgs(std::string dir_path, Buffer &imgs) { + std::vector img_names; + + DIR* dir; + struct dirent* ent; + if ((dir = opendir(dir_path.c_str())) != NULL) { + std::string file_name; + std::string ext; + while ((ent = readdir(dir)) != NULL) { + file_name = std::string(ent->d_name); + ext = Internal::get_lowercase_extension(file_name); + if (ext != "pgm") + continue; + img_names.push_back(file_name); + } + std::sort(img_names.begin(), img_names.end()); + closedir(dir); + } else { + perror (""); + return EXIT_FAILURE; + } + + Buffer img_tmp; + //img_tmp = Buffer (im_width, im_height); + /*printf("Right before calling load_image!\n"); + // Making this uint16_t so it operates on bayer raw + LibRaw rawProcessor; + + std::string file_path = dir_path + "/" + img_names[0]; + // Open the DNG file + if (rawProcessor.open_file("./images/20171106_subset-bursts-0006_20160722_115157_431-payload_N000.dng") != LIBRAW_SUCCESS) { + std::cerr << "Error: Cannot open DNG file" << std::endl; + return 1; + } + + printf("Succesfully opened DNG file!\n"); + + // Decode the DNG file + if (rawProcessor.unpack() != LIBRAW_SUCCESS) { + std::cerr << "Error: Cannot unpack DNG file" << std::endl; + return 1; + } + + + rawProcessor.raw2image(); + + // Get image data + int im_width = rawProcessor.imgdata.sizes.iwidth; + int im_height = rawProcessor.imgdata.sizes.iheight; + printf("The width and height are %d, %d\n", im_width, im_height); + //FIXME!!! + imgs = Buffer(im_width, im_height, 1); + + printf("Right BEFORE copying buffer data!\n"); + // Copy image data to buffer + for (int y = 0; y < im_height; ++y) { + for (int x = 0; x < im_width; ++x) { + imgs(x, y, 1) = rawProcessor.imgdata.image[y][x]; + //img_tmp(x, y) = 0; + //printf("Data: %d", rawProcessor.imgdata.image[y][x]); + } + } + + printf("Right after copying buffer data!\n"); + + // Close the DNG file + rawProcessor.recycle(); + + + printf("Right after loading image"); + //exit(0); + printf("Load image has been called!\n"); + return true; + */ + //FIXME NOT A GOOD WAY OF DOING THINGS. FIX THIS!!! + int num_imgs = img_names.size(); + // Getting rid of channel since we are operating on Bayer raw images: there isn't a channel (color) dimension yet + //int width, height, channel; + int width, height; + int ref = num_imgs/2; + for (int i = ref; i > 0 ; i--) { + std::iter_swap(img_names.begin() + i, img_names.begin() + i - 1); + } + + for (int i = 0; i < num_imgs; i++) { + img_tmp = load_image(dir_path + "/" + img_names[i]); + if (i == 0) { + width = img_tmp.width(); + height = img_tmp.height(); + //channel = img_tmp.channels(); + //imgs = Buffer(width, height, channel, num_imgs); + imgs = Buffer(width, height, num_imgs); + //printf("width: %d, height: %d, channel: %d, number of images: %d\n", + // width, height, channel, num_imgs); + printf("width: %d, height: %d, number of images: %d\n", + width, height, num_imgs); + } + printf("loaded %s image\n", img_names[i].c_str()); + // changing this to slice along dimension 2 b/c operating on bayer raw + //imgs.sliced(3, i).copy_from(img_tmp); + imgs.sliced(2, i).copy_from(img_tmp); + } + + return true; +} + +int main( int argc, char **argv ) { + std::map> functions; + //OneInOneOut_ProcessController processor("hdr_plus"); + //FIXME: FOR NOW, outputing int16_t. Once outputting RGB images, should send out uint8_t. + //OneInOneOut_ProcessController processor("hdr_plus"); + //OneInOneOut_ProcessController processor("hdr_plus"); + OneInOneOut_ProcessController processor("hdr_plus"); + //OneInOneOut_ProcessController processor("hdr_plus"); + + #if defined(WITH_CPU) + auto cpu_process = [&]( auto &proc ) { + hdr_plus( proc.input, proc.output ); + }; + functions["cpu"] = [&](){ cpu_process( processor ); } ; + #endif + + #if defined(WITH_COREIR) + auto coreir_process = [&]( auto &proc ) { + }; + functions["coreir"] = [&](){ coreir_process( processor ); }; + #endif + + #if defined(WITH_CLOCKWORK) + auto clockwork_process = [&]( auto &proc ) { + RDAI_Platform *rdai_platform = RDAI_register_platform( &rdai_clockwork_sim_ops ); + if ( rdai_platform ) { + printf( "[RUN_INFO] found an RDAI platform\n" ); + hdr_plus_clockwork( proc.input, proc.output ); + printf( "[RUN_INFO] finished running hdr_plus_clockwork\n" ); + RDAI_unregister_platform( rdai_platform ); + } else { + printf("[RUN_INFO] failed to register RDAI platform!\n"); + } + }; + functions["clockwork"] = [&](){ clockwork_process( processor ); }; + #endif + + // Add all defined functions + processor.run_calls = functions; + + + +/* +// BEGIN LOAD IMAGES FUNCTION +std::string dir_path = "./images/"; +std::vector img_names; + + DIR* dir; + struct dirent* ent; + if ((dir = opendir(dir_path.c_str())) != NULL) { + std::string file_name; + std::string ext; + while ((ent = readdir(dir)) != NULL) { + file_name = std::string(ent->d_name); + ext = Internal::get_lowercase_extension(file_name); + if (ext != "dng") + continue; + img_names.push_back(file_name); + } + std::sort(img_names.begin(), img_names.end()); + closedir(dir); + } else { + perror (""); + return EXIT_FAILURE; + } + + printf("Right before calling load_image!\n"); + // Making this uint16_t so it operates on bayer raw + LibRaw rawProcessor; + + std::string file_path = dir_path + "/" + img_names[0]; + // Open the DNG file + if (rawProcessor.open_file("./images/20171106_subset-bursts-0006_20160722_115157_431-payload_N000.dng") != LIBRAW_SUCCESS) { + std::cerr << "Error: Cannot open DNG file" << std::endl; + return 1; + } + + printf("Succesfully opened DNG file!\n"); + + // Decode the DNG file + if (rawProcessor.unpack() != LIBRAW_SUCCESS) { + std::cerr << "Error: Cannot unpack DNG file" << std::endl; + return 1; + } + + + //rawProcessor.raw2image(); + + // Get image data + int im_width = rawProcessor.imgdata.sizes.raw_width; + int im_height = rawProcessor.imgdata.sizes.raw_height; + printf("The width and height are %d, %d\n", im_width, im_height); + //FIXME!!! + imgs = Buffer(im_width, im_height, 1); + Buffer img_tmp; + img_tmp = Buffer (im_width, im_height); + + printf("Right BEFORE copying buffer data!\n"); + // Copy image data to buffer + int raw_index; + auto boosted = Buffer(im_width, im_height); + for (int y = 0; y < im_height; ++y) { + for (int x = 0; x < im_width; ++x) { + raw_index = x*y; + imgs(x, y, 0) = rawProcessor.imgdata.rawdata.raw_image[raw_index]; + boosted(x, y) = rawProcessor.imgdata.rawdata.raw_image[raw_index] * 64; + //img_tmp(x, y) = rawProcessor.imgdata.image[y][x]; + //printf("Data: %d ", rawProcessor.imgdata.rawdata.raw_image[raw_index]); + } + //printf("\n"); + } + save_image(boosted, "boosted_input.png"); + + + printf("Right after copying buffer data!\n"); + // Close the DNG file + rawProcessor.recycle(); + printf("Right after loading image"); + //imgs = Buffer(im_width, im_height, 1); + //imgs.sliced(2, i).copy_from(img_tmp); + //exit(0); + printf("Load image has been called!\n"); + +// END LOAD IMAGES FUNCTION +*/ + + + + // LOADING THE IMAGES IN AND CONCATENATING + //if(!load_imgs("./images/", imgs)) + // return -1; + + + // Load the input images (bayer raw) + //Buffer imgs; + //Buffer imgs; + + // int im_width = 1250; + // int im_height = 1120; + + int im_width = 512; + int im_height = 512; + //imgs = Buffer(im_width, im_height, 3); + //imgs = Buffer(im_width, im_height, 3); + processor.input = Buffer(im_width, im_height, 3); + + bool use_k_10bit = true; + bool use_k_raw = false; + + if (use_k_10bit){ + + for (int frame_num = 0; frame_num < 3; frame_num++){ + + // Open the input file + std::string taxi_10bit_file_prefix = "./taxi_10bit_"; + //std::string taxi_10bit_filename = taxi_10bit_file_prefix + std::to_string(frame_num) + "_small.txt"; + std::string taxi_10bit_filename = taxi_10bit_file_prefix + std::to_string(frame_num) + ".txt"; + std::ifstream taxi_10bit_file(taxi_10bit_filename); + + // Check if the file is opened successfully + int count = 0; + if (!taxi_10bit_file.is_open()) { + std::cerr << "Error: Unable to open taxi file!" << std::endl; + return 1; // Exit with error code + } + + // Read the file line by line + std::string line; + int y = 0; + + while (std::getline(taxi_10bit_file, line)) { + //while(count < im_width * im_height) { + // Create a string stream from the current line + std::istringstream iss(line); + + + // Tokenize the line using ',' as the delimiter + std::string token; + int x = 0; + while (std::getline(iss, token, ',')) { + //std::getline(iss, token, ','); + count++; + // convert this to unsigned short + //imgs(x, y, frame_num) = static_cast(stoul(token)); + processor.input(x, y, frame_num) = static_cast(stoul(token)); + x++; + if (x == im_width) { + break; + } + } + + y++; + if (y == im_height) { + break; + } + } + + // Close the input file + taxi_10bit_file.close(); + + } + + } else if(use_k_raw) { + + for (int frame_num = 0; frame_num < 3; frame_num++){ + + // Open the input file + std::string taxi_raw_file_prefix = "./taxi_"; + //std::string taxi_10bit_filename = taxi_10bit_file_prefix + std::to_string(frame_num) + "_small.txt"; + std::string taxi_raw_filename = taxi_raw_file_prefix + std::to_string(frame_num) + "_raw.txt"; + std::ifstream taxi_raw_file(taxi_raw_filename); + + // Check if the file is opened successfully + int count = 0; + if (!taxi_raw_file.is_open()) { + std::cerr << "Error: Unable to open file!" << std::endl; + return 1; // Exit with error code + } + + // Read the file line by line + std::string line; + int y = 0; + + while (std::getline(taxi_raw_file, line)) { + //while(count < im_width * im_height) { + // Create a string stream from the current line + std::istringstream iss(line); + + + // Tokenize the line using ',' as the delimiter + std::string token; + int x = 0; + while (std::getline(iss, token, ',')) { + //while(x < im_width){ + //std::getline(iss, token, ','); + count++; + // convert this to unsigned short + //imgs(x, y, frame_num) = static_cast(stoul(token)); + //imgs(x, y, frame_num) = std::stof(token); + processor.input(x, y, frame_num) = std::stof(token); + x++; + } + y++; + } + + // Close the input file + taxi_raw_file.close(); + + } + + + } else { + // cv::Mat input_frame_0 = cv::imread("../hdr_plus/images/png/taxi/taxi_0.png", cv::IMREAD_UNCHANGED); + // cv::Mat input_frame_1 = cv::imread("../hdr_plus/images/png/taxi/taxi_1.png", cv::IMREAD_UNCHANGED); + // cv::Mat input_frame_2 = cv::imread("../hdr_plus/images/png/taxi/taxi_2.png", cv::IMREAD_UNCHANGED); + + // if (input_frame_0.empty()) { + // std::cerr << "Error: Could not open the image file." << std::endl; + // return -1; + // } + + // printf("The Input image dimensions are: (%d, %d)\n", input_frame_0.rows, input_frame_0.cols); + // // Loop over the elements of the cv::Mat matrix + // for (int y = 0; y < input_frame_0.rows; ++y) { + // for (int x = 0; x < input_frame_0.cols; ++x) { + // cv::Vec3b pixel_0 = input_frame_0.at(y, x); + // imgs(x, y, 0) = pixel_0[0] * 1; + + // cv::Vec3b pixel_1 = input_frame_1.at(y, x); + // imgs(x, y, 1) = pixel_1[0] * 1; + + // cv::Vec3b pixel_2 = input_frame_2.at(y, x); + // imgs(x, y, 2) = pixel_2[0] * 1; + // //std::cout << "Pixel value = " << static_cast(pixel[0]) << std::endl; + // } + // } + } + + + //processor.input = imgs; + + /* + printf("The load images function returned!"); + processor.input = imgs; + auto boosted = Buffer(processor.input.dim(0).extent(), processor.input.dim(1).extent()); + for (int y = 0; y < processor.input.dim(1).extent(); y++) { + for (int x = 0; x < processor.input.dim(0).extent(); x++) { + printf("Value: %.2f ", processor.input(x, y)); + //printf("Value: %d", processor.input(x, y)); + boosted(x, y) = processor.input(x, y) * 64; + } + printf("\n"); + } + save_image(boosted, "boosted_input.png"); + */ + processor.inputs_preset = true; + printf("Successfully load input images!\n"); + + + //processor.output = Buffer(processor.input.dim(0).extent(), processor.input.dim(1).extent()); + //processor.output = Buffer(1250, 1120, 3); + + + //processor.output = Buffer(1248, 1120, 3); + //processor.output = Buffer(512, 512, 3); + //processor.output = Buffer(32, 32, 3); + processor.output = Buffer(1, 1, 3); + //processor.output = Buffer(625, 560, 3); + + //processor.output = Buffer(64, 64, 3); + + + //processor.output = Buffer(1096, 1112, 3); + //processor.output = Buffer(128, 128, 3); + auto cmd_output = processor.process_command(argc, argv); + printf("Ran process command!"); + + //auto boosted_output = Buffer(processor.output.dim(0).extent(), processor.output.dim(1).extent()); + //auto output_2 = Buffer(processor.output.dim(0).extent(), processor.output.dim(1).extent()); + auto boosted_output = Buffer(processor.output.dim(0).extent(), processor.output.dim(1).extent(), processor.output.dim(2).extent()); + auto output_2 = Buffer(processor.output.dim(0).extent(), processor.output.dim(1).extent(), processor.output.dim(2).extent()); + for (int y = 0; y < processor.output.dim(1).extent(); y++) { + for (int x = 0; x < processor.output.dim(0).extent(); x++) { + boosted_output(x, y) = processor.output(x, y) * 64; + output_2(x, y) = processor.output(x, y); + } + } + //save_image(boosted_output, "boosted_output.png"); + //save_image(output_2, "output_2.png"); + + + + + return cmd_output; +}