From 755eb527e7328ca422ffa7adf4160cfc652c0f56 Mon Sep 17 00:00:00 2001 From: LakshmiKumar23 Date: Tue, 26 Sep 2023 16:11:34 -0700 Subject: [PATCH 01/63] tensor changes --- rocAL/CMakeLists.txt | 1 + rocAL/include/api/rocal_api.h | 24 +- rocAL/include/api/rocal_api_augmentation.h | 1706 ++++++----- rocAL/include/api/rocal_api_data_loaders.h | 1358 +++++---- rocAL/include/api/rocal_api_data_transfer.h | 75 +- rocAL/include/api/rocal_api_info.h | 95 +- rocAL/include/api/rocal_api_meta_data.h | 315 +- rocAL/include/api/rocal_api_parameters.h | 161 +- rocAL/include/api/rocal_api_tensor.h | 70 + rocAL/include/api/rocal_api_types.h | 125 +- .../color_augmentations/node_blend.h | 16 +- .../color_augmentations/node_blur.h | 21 +- .../color_augmentations/node_brightness.h | 23 +- .../node_color_temperature.h | 18 +- .../color_augmentations/node_color_twist.h | 27 +- .../color_augmentations/node_contrast.h | 31 +- .../color_augmentations/node_exposure.h | 26 +- .../color_augmentations/node_gamma.h | 21 +- .../color_augmentations/node_hue.h | 16 +- .../color_augmentations/node_saturation.h | 20 +- .../color_augmentations/node_vignette.h | 19 +- .../effects_augmentations/node_fog.h | 18 +- .../effects_augmentations/node_jitter.h | 21 +- .../effects_augmentations/node_pixelate.h | 16 +- .../effects_augmentations/node_rain.h | 23 +- .../effects_augmentations/node_snow.h | 21 +- .../effects_augmentations/node_snp_noise.h | 28 +- .../geometry_augmentations/node_crop.h | 29 +- .../node_crop_mirror_normalize.h | 36 +- .../geometry_augmentations/node_crop_resize.h | 29 +- .../geometry_augmentations/node_fisheye.h | 15 +- .../geometry_augmentations/node_flip.h | 34 +- .../node_lens_correction.h | 19 +- .../geometry_augmentations/node_random_crop.h | 27 +- .../geometry_augmentations/node_resize.h | 6 +- .../node_resize_crop_mirror.h | 39 +- .../node_resize_mirror_normalize.h | 35 +- .../geometry_augmentations/node_rotate.h | 32 +- .../geometry_augmentations/node_warp_affine.h | 29 +- rocAL/include/augmentations/node_copy.h | 13 +- rocAL/include/augmentations/node_nop.h | 12 +- .../augmentations/node_sequence_rearrange.h | 19 +- .../augmentations/node_ssd_random_crop.h | 92 +- rocAL/include/decoders/image/decoder.h | 53 +- .../include/decoders/image/decoder_factory.h | 1 + .../decoders/image/fused_crop_decoder.h | 52 +- .../include/decoders/image/hw_jpeg_decoder.h | 31 +- .../include/decoders/image/open_cv_decoder.h | 44 +- .../decoders/image/turbo_jpeg_decoder.h | 52 +- .../decoders/video/ffmpeg_video_decoder.h | 8 +- .../decoders/video/hardware_video_decoder.h | 8 +- rocAL/include/decoders/video/video_decoder.h | 45 +- .../decoders/video/video_decoder_factory.h | 3 +- rocAL/include/device/device_code.h | 14 +- .../device/device_data_transfer_code.h | 80 +- rocAL/include/device/device_manager.h | 26 +- rocAL/include/device/device_manager_hip.h | 11 +- rocAL/include/device/ocl_setup.h | 2 +- rocAL/include/loaders/circular_buffer.h | 54 +- .../loaders/image/cifar10_data_loader.h | 33 +- rocAL/include/loaders/image/image_loader.h | 38 +- .../loaders/image/image_loader_sharded.h | 15 +- .../loaders/image/image_read_and_decode.h | 58 +- .../loaders/image/node_cifar10_loader.h | 21 +- .../loaders/image/node_fused_jpeg_crop.h | 21 +- .../image/node_fused_jpeg_crop_single_shard.h | 21 +- .../include/loaders/image/node_image_loader.h | 17 +- .../image/node_image_loader_single_shard.h | 17 +- .../include/loaders/image_source_evaluator.h | 39 +- .../loaders/{image => }/loader_module.h | 37 +- .../include/loaders/video/node_video_loader.h | 22 +- .../video/node_video_loader_single_shard.h | 28 +- rocAL/include/loaders/video/video_loader.h | 47 +- .../loaders/video/video_loader_module.h | 66 - .../loaders/video/video_loader_sharded.h | 26 +- .../loaders/video/video_read_and_decode.h | 28 +- .../meta_data/augmentations_meta_nodes.h | 7 +- rocAL/include/meta_data/bounding_box_graph.h | 32 +- .../meta_data/caffe2_meta_data_reader.h | 28 +- .../caffe2_meta_data_reader_detection.h | 30 +- .../meta_data/caffe_meta_data_reader.h | 28 +- .../caffe_meta_data_reader_detection.h | 30 +- .../meta_data/cifar10_meta_data_reader.h | 26 +- .../include/meta_data/coco_meta_data_reader.h | 28 +- .../coco_meta_data_reader_key_points.h | 27 +- .../include/meta_data/label_reader_folders.h | 25 +- rocAL/include/meta_data/lookahead_parser.h | 119 +- rocAL/include/meta_data/meta_data.h | 549 +++- rocAL/include/meta_data/meta_data_graph.h | 20 +- rocAL/include/meta_data/meta_data_reader.h | 64 +- .../meta_data/meta_data_reader_factory.h | 2 +- rocAL/include/meta_data/meta_node.h | 18 +- rocAL/include/meta_data/meta_node_crop.h | 27 +- .../meta_node_crop_mirror_normalize.h | 25 +- .../include/meta_data/meta_node_crop_resize.h | 27 +- rocAL/include/meta_data/meta_node_flip.h | 29 +- rocAL/include/meta_data/meta_node_resize.h | 23 +- .../meta_data/meta_node_resize_crop_mirror.h | 27 +- .../meta_node_resize_mirror_normalize.h | 43 + rocAL/include/meta_data/meta_node_rotate.h | 31 +- .../meta_data/meta_node_ssd_random_crop.h | 18 +- .../meta_data/mxnet_meta_data_reader.h | 39 +- .../meta_data/randombboxcrop_meta_data.h | 43 +- .../randombboxcrop_meta_data_reader.h | 54 +- .../randombboxcrop_meta_data_reader_factory.h | 2 +- .../include/meta_data/randombboxcrop_reader.h | 96 +- .../meta_data/text_file_meta_data_reader.h | 20 +- rocAL/include/meta_data/tf_meta_data_reader.h | 31 +- .../meta_data/tf_meta_data_reader_detection.h | 40 +- rocAL/include/meta_data/video_label_reader.h | 26 +- rocAL/include/parameters/parameter.h | 10 +- rocAL/include/parameters/parameter_crop.h | 98 +- rocAL/include/parameters/parameter_factory.h | 68 +- rocAL/include/parameters/parameter_random.h | 117 +- .../parameters/parameter_random_crop.h | 28 +- .../parameter_random_crop_decoder.h | 64 +- .../include/parameters/parameter_rocal_crop.h | 23 +- rocAL/include/parameters/parameter_simple.h | 29 +- rocAL/include/parameters/parameter_vx.h | 85 +- rocAL/include/pipeline/commons.h | 81 +- rocAL/include/pipeline/context.h | 25 +- rocAL/include/pipeline/exception.h | 21 +- rocAL/include/pipeline/graph.h | 16 +- rocAL/include/pipeline/image.h | 100 +- rocAL/include/pipeline/log.h | 16 +- rocAL/include/pipeline/master_graph.h | 339 ++- rocAL/include/pipeline/node.h | 41 +- rocAL/include/pipeline/ring_buffer.h | 50 +- rocAL/include/pipeline/tensor.h | 336 +++ rocAL/include/pipeline/timing_debug.h | 70 +- .../readers/image/caffe2_lmdb_record_reader.h | 43 +- .../readers/image/caffe_lmdb_record_reader.h | 44 +- .../readers/image/cifar10_data_reader.h | 30 +- .../readers/image/coco_file_source_reader.h | 32 +- .../readers/image/file_source_reader.h | 28 +- rocAL/include/readers/image/image_reader.h | 75 +- .../readers/image/mxnet_recordio_reader.h | 39 +- .../include/readers/image/tf_record_reader.h | 43 +- .../video/sequence_file_source_reader.h | 32 +- .../readers/video/video_file_source_reader.h | 23 +- .../include/readers/video/video_properties.h | 18 +- rocAL/include/readers/video/video_reader.h | 73 +- .../readers/video/video_reader_factory.h | 3 +- rocAL/rocAL_hip/CMakeLists.txt | 2 +- rocAL/rocAL_hip/box_encoder_hip.cpp | 227 +- rocAL/rocAL_hip/box_encoder_hip.h | 96 +- rocAL/rocAL_hip/rocal_hip_kernels.cpp | 74 +- rocAL/rocAL_hip/rocal_hip_kernels.h | 66 +- rocAL/source/api/rocal_api.cpp | 71 +- rocAL/source/api/rocal_api_augmentation.cpp | 2575 ++++++++-------- rocAL/source/api/rocal_api_data_loaders.cpp | 2628 +++++++---------- rocAL/source/api/rocal_api_data_transfer.cpp | 105 +- rocAL/source/api/rocal_api_info.cpp | 113 +- rocAL/source/api/rocal_api_meta_data.cpp | 405 ++- rocAL/source/api/rocal_api_parameter.cpp | 163 +- .../color_augmentations/node_blend.cpp | 39 +- .../color_augmentations/node_blur.cpp | 42 +- .../color_augmentations/node_brightness.cpp | 43 +- .../node_color_temperature.cpp | 40 +- .../color_augmentations/node_color_twist.cpp | 53 +- .../color_augmentations/node_contrast.cpp | 54 +- .../color_augmentations/node_exposure.cpp | 44 +- .../color_augmentations/node_gamma.cpp | 49 +- .../color_augmentations/node_hue.cpp | 42 +- .../color_augmentations/node_saturation.cpp | 50 +- .../color_augmentations/node_vignette.cpp | 38 +- .../effects_augmentations/node_fog.cpp | 36 +- .../effects_augmentations/node_jitter.cpp | 41 +- .../effects_augmentations/node_pixelate.cpp | 31 +- .../effects_augmentations/node_rain.cpp | 56 +- .../effects_augmentations/node_snow.cpp | 43 +- .../effects_augmentations/node_snp_noise.cpp | 64 +- .../geometry_augmentations/node_crop.cpp | 107 +- .../node_crop_mirror_normalize.cpp | 106 +- .../node_crop_resize.cpp | 73 +- .../geometry_augmentations/node_fisheye.cpp | 30 +- .../geometry_augmentations/node_flip.cpp | 50 +- .../node_lens_correction.cpp | 45 +- .../node_random_crop.cpp | 60 +- .../geometry_augmentations/node_resize.cpp | 67 +- .../node_resize_crop_mirror.cpp | 75 +- .../node_resize_mirror_normalize.cpp | 235 +- .../geometry_augmentations/node_rotate.cpp | 57 +- .../node_warp_affine.cpp | 111 +- rocAL/source/augmentations/node_copy.cpp | 23 +- rocAL/source/augmentations/node_nop.cpp | 24 +- .../augmentations/node_sequence_rearrange.cpp | 48 +- .../augmentations/node_ssd_random_crop.cpp | 133 +- .../source/decoders/image/decoder_factory.cpp | 12 +- .../decoders/image/fused_crop_decoder.cpp | 70 +- .../source/decoders/image/hw_jpeg_decoder.cpp | 147 +- .../source/decoders/image/open_cv_decoder.cpp | 64 +- .../decoders/image/turbo_jpeg_decoder.cpp | 177 +- .../decoders/video/ffmpeg_video_decoder.cpp | 95 +- .../decoders/video/hardware_video_decoder.cpp | 112 +- .../decoders/video/video_decoder_factory.cpp | 14 +- rocAL/source/device/device_manager.cpp | 81 +- rocAL/source/device/device_manager_hip.cpp | 19 +- rocAL/source/device/ocl_setup.cpp | 68 +- rocAL/source/loaders/circular_buffer.cpp | 347 +-- .../loaders/image/cifar10_data_loader.cpp | 190 +- rocAL/source/loaders/image/image_loader.cpp | 155 +- .../loaders/image/image_loader_sharded.cpp | 104 +- .../loaders/image/image_read_and_decode.cpp | 141 +- .../loaders/image/node_cifar10_loader.cpp | 23 +- .../loaders/image/node_fused_jpeg_crop.cpp | 29 +- .../node_fused_jpeg_crop_single_shard.cpp | 31 +- .../loaders/image/node_image_loader.cpp | 27 +- .../image/node_image_loader_single_shard.cpp | 29 +- .../source/loaders/image_source_evaluator.cpp | 63 +- .../loaders/video/node_video_loader.cpp | 26 +- .../video/node_video_loader_single_shard.cpp | 24 +- rocAL/source/loaders/video/video_loader.cpp | 173 +- .../loaders/video/video_loader_sharded.cpp | 75 +- .../loaders/video/video_read_and_decode.cpp | 116 +- rocAL/source/meta_data/bounding_box_graph.cpp | 222 +- .../meta_data/caffe2_meta_data_reader.cpp | 86 +- .../caffe2_meta_data_reader_detection.cpp | 114 +- .../meta_data/caffe_meta_data_reader.cpp | 82 +- .../caffe_meta_data_reader_detection.cpp | 91 +- .../meta_data/cifar10_meta_data_reader.cpp | 114 +- .../meta_data/coco_meta_data_reader.cpp | 294 +- .../source/meta_data/label_reader_folders.cpp | 102 +- .../meta_data/meta_data_graph_factory.cpp | 19 +- .../meta_data/meta_data_reader_factory.cpp | 229 +- rocAL/source/meta_data/meta_node_crop.cpp | 66 +- .../meta_node_crop_mirror_normalize.cpp | 89 +- .../meta_data/meta_node_crop_resize.cpp | 78 +- rocAL/source/meta_data/meta_node_flip.cpp | 66 +- rocAL/source/meta_data/meta_node_resize.cpp | 42 +- .../meta_node_resize_crop_mirror.cpp | 91 +- .../meta_node_resize_mirror_normalize.cpp | 81 + rocAL/source/meta_data/meta_node_rotate.cpp | 73 +- .../meta_data/meta_node_ssd_random_crop.cpp | 41 +- .../meta_data/mxnet_meta_data_reader.cpp | 106 +- ...andombboxcrop_meta_data_reader_factory.cpp | 31 +- .../meta_data/randombboxcrop_reader.cpp | 173 +- .../meta_data/text_file_meta_data_reader.cpp | 69 +- .../source/meta_data/tf_meta_data_reader.cpp | 115 +- .../tf_meta_data_reader_detection.cpp | 191 +- rocAL/source/meta_data/video_label_reader.cpp | 124 +- rocAL/source/parameters/parameter_crop.cpp | 89 +- rocAL/source/parameters/parameter_factory.cpp | 119 +- .../parameters/parameter_random_crop.cpp | 82 +- .../parameter_random_crop_decoder.cpp | 80 +- .../parameters/parameter_rocal_crop.cpp | 67 +- rocAL/source/pipeline/graph.cpp | 82 +- rocAL/source/pipeline/image.cpp | 272 +- rocAL/source/pipeline/master_graph.cpp | 1668 +++++------ rocAL/source/pipeline/node.cpp | 41 +- rocAL/source/pipeline/ring_buffer.cpp | 390 +-- rocAL/source/pipeline/tensor.cpp | 430 +++ .../image/caffe2_lmdb_record_reader.cpp | 202 +- .../image/caffe_lmdb_record_reader.cpp | 128 +- .../readers/image/cifar10_data_reader.cpp | 129 +- .../readers/image/coco_file_source_reader.cpp | 130 +- .../coco_meta_data_reader_key_points.cpp | 222 +- .../readers/image/file_source_reader.cpp | 165 +- .../readers/image/mxnet_recordio_reader.cpp | 155 +- rocAL/source/readers/image/reader_factory.cpp | 84 +- .../source/readers/image/tf_record_reader.cpp | 141 +- .../video/sequence_file_source_reader.cpp | 138 +- .../video/video_file_source_reader.cpp | 87 +- .../source/readers/video/video_properties.cpp | 94 +- .../readers/video/video_reader_factory.cpp | 20 +- rocAL_pybind/README.md | 2 +- rocAL_pybind/__init__.py | 2 +- rocAL_pybind/amd/__init__.py | 2 +- rocAL_pybind/amd/rocal/__init__.py | 2 +- rocAL_pybind/amd/rocal/decoders.py | 190 +- rocAL_pybind/amd/rocal/fn.py | 1167 +++++--- rocAL_pybind/amd/rocal/pipeline.py | 308 +- rocAL_pybind/amd/rocal/plugin/__init__.py | 2 +- rocAL_pybind/amd/rocal/plugin/generic.py | 253 +- rocAL_pybind/amd/rocal/plugin/pytorch.py | 363 +-- rocAL_pybind/amd/rocal/plugin/tf.py | 249 +- rocAL_pybind/amd/rocal/random.py | 30 +- rocAL_pybind/amd/rocal/readers.py | 355 ++- rocAL_pybind/amd/rocal/types.py | 30 +- .../examples/Default_anchors_retinanet.bin | Bin 0 -> 1921392 bytes .../examples/PYTHON_UNITTEST_TEST_FILE.sh | 284 +- rocAL_pybind/examples/READERS_TEST_FILE.sh | 20 +- rocAL_pybind/examples/README.md | 10 +- rocAL_pybind/examples/image_comparison.py | 180 ++ rocAL_pybind/examples/parse_config.py | 93 +- .../prefetch_queue_depth.py | 68 +- .../examples/rocAL_api_caffe2_reader.py | 71 +- .../examples/rocAL_api_caffe_reader.py | 90 +- .../examples/rocAL_api_coco_pipeline.py | 274 +- rocAL_pybind/examples/rocAL_api_pipeline.py | 122 + .../examples/rocAL_api_python_unittest.py | 513 +++- ...rocAL_api_pytorch_classification_reader.py | 222 +- .../rocAL_api_tf_classification_reader.py | 72 +- .../rocAL_api_tf_detection_pipeline.py | 82 +- .../examples/rocAL_api_video_pipeline.py | 108 +- .../Augmentation_examples.ipynb | 189 ++ ...lassification_training_flowerdataset.ipynb | 667 +++++ .../VideoReader_with_label.ipynb | 293 ++ .../rocAL_example_doc/Video_decoder.ipynb | 285 ++ .../create_classification_flower_dataset.py | 83 + .../resize_implementation.ipynb | 201 ++ .../rocAL_example_doc/tf_dataloader.ipynb | 179 ++ .../tf_petsTrainingExample/requirements.sh | 1 + .../train_withROCAL_withTFRecordReader.py | 319 +- rocAL_pybind/getrocALwheelname.py | 4 +- rocAL_pybind/rocal_pybind.cpp | 1216 ++++---- rocAL_pybind/setup.py | 29 +- 307 files changed, 19598 insertions(+), 17295 deletions(-) create mode 100644 rocAL/include/api/rocal_api_tensor.h rename rocAL/include/loaders/{image => }/loader_module.h (68%) delete mode 100644 rocAL/include/loaders/video/video_loader_module.h create mode 100644 rocAL/include/meta_data/meta_node_resize_mirror_normalize.h create mode 100644 rocAL/include/pipeline/tensor.h create mode 100644 rocAL/source/meta_data/meta_node_resize_mirror_normalize.cpp create mode 100644 rocAL/source/pipeline/tensor.cpp create mode 100644 rocAL_pybind/examples/Default_anchors_retinanet.bin create mode 100644 rocAL_pybind/examples/image_comparison.py create mode 100644 rocAL_pybind/examples/rocAL_api_pipeline.py create mode 100644 rocAL_pybind/examples/rocAL_example_doc/Augmentation_examples.ipynb create mode 100644 rocAL_pybind/examples/rocAL_example_doc/Classification_training_flowerdataset.ipynb create mode 100644 rocAL_pybind/examples/rocAL_example_doc/VideoReader_with_label.ipynb create mode 100644 rocAL_pybind/examples/rocAL_example_doc/Video_decoder.ipynb create mode 100644 rocAL_pybind/examples/rocAL_example_doc/create_classification_flower_dataset.py create mode 100644 rocAL_pybind/examples/rocAL_example_doc/resize_implementation.ipynb create mode 100644 rocAL_pybind/examples/rocAL_example_doc/tf_dataloader.ipynb diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt index 9861c5ab9..ec3c4ffbf 100644 --- a/rocAL/CMakeLists.txt +++ b/rocAL/CMakeLists.txt @@ -318,6 +318,7 @@ if(${BUILD_ROCAL}) include/api/rocal_api_data_transfer.h include/api/rocal_api_parameters.h include/api/rocal_api_meta_data.h + include/api/rocal_api_tensor.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocal ) endif(BUILD_DEV) diff --git a/rocAL/include/api/rocal_api.h b/rocAL/include/api/rocal_api.h index 484ef8c4b..78c0acb17 100644 --- a/rocAL/include/api/rocal_api.h +++ b/rocAL/include/api/rocal_api.h @@ -1,4 +1,5 @@ /* +MIT License Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy @@ -24,6 +25,7 @@ THE SOFTWARE. #define ROCAL_H #include "rocal_api_types.h" +#include "rocal_api_tensor.h" #include "rocal_api_parameters.h" #include "rocal_api_data_loaders.h" #include "rocal_api_augmentation.h" @@ -42,27 +44,21 @@ THE SOFTWARE. /*! * \brief rocalCreate creates the context for a new augmentation pipeline. Initializes all the required internals for the pipeline * \ingroup group_rocal - * - * \param [in] batch_size + * \param [in] batch_size batch size * \param [in] affinity RocalProcessMode: Defines whether rocal data loading should be on the CPU or GPU. - * \param [in] gpu_id - * \param [in] cpu_thread_count - * \param [in] prefetch_queue_depth + * \param [in] gpu_id GPU id + * \param [in] cpu_thread_count number of cpu threads + * \param [in] prefetch_queue_depth The depth of the prefetch queue. * \param [in] output_tensor_data_type RocalTensorOutputType: Defines whether the output of rocal tensor is FP32 or FP16. * \return A \ref RocalContext - The context for the pipeline */ -extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, - RocalProcessMode affinity, - int gpu_id = 0, - size_t cpu_thread_count = 1, - size_t prefetch_queue_depth = 3, - RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); +extern "C" RocalContext ROCAL_API_CALL rocalCreate(size_t batch_size, RocalProcessMode affinity, int gpu_id = 0, size_t cpu_thread_count = 1, size_t prefetch_queue_depth = 3, RocalTensorOutputType output_tensor_data_type = RocalTensorOutputType::ROCAL_FP32); /*! * \brief rocalVerify function to verify the graph for all the inputs and outputs * \ingroup group_rocal * - * \param [in] context + * \param [in] context the rocal context * \return A \ref RocalStatus - A status code indicating the success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); @@ -71,7 +67,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalVerify(RocalContext context); * \brief rocalRun function to process and run the built and verified graph. * \ingroup group_rocal * - * \param [in] context + * \param [in] context the rocal context * \return A \ref RocalStatus - A status code indicating the success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); @@ -80,7 +76,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalRun(RocalContext context); * \brief rocalRelease function to free all the resources allocated during the graph creation process. * \ingroup group_rocal * - * \param [in] context + * \param [in] context the rocal context * \return A \ref RocalStatus - A status code indicating the success or failure. */ extern "C" RocalStatus ROCAL_API_CALL rocalRelease(RocalContext rocal_context); diff --git a/rocAL/include/api/rocal_api_augmentation.h b/rocAL/include/api/rocal_api_augmentation.h index d397e8002..e9c9a68b0 100644 --- a/rocAL/include/api/rocal_api_augmentation.h +++ b/rocAL/include/api/rocal_api_augmentation.h @@ -33,725 +33,1069 @@ THE SOFTWARE. */ /*! - * \brief Rearranges the order of the frames in the sequences with respect to new_order. - * new_order can have values in the range [0, sequence_length). - * Frames can be repeated or dropped in the new_order. + * \brief Rearranges the order of the frames in the sequences with respect to new_order. new_order can have values in the range [0, sequence_length). Frames can be repeated or dropped in the new_order. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] p_context context for the pipeline. + * \param [in] p_input Input Rocal Tensor + * \param [in] new_order represents the new order of the frames in the sequence + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceRearrange(RocalContext p_context, RocalTensor p_input, + std::vector &new_order, + bool is_output); + +/*! \brief Resize images. + * \note Accepts U8 and RGB24 input. * \ingroup group_rocal_augmentations * \note: Accepts U8 and RGB24 input. - * \param context context for the pipeline. - * \param input - * \param new_order - * \param new_sequence_length - * \param sequence_length - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSequenceRearrange(RocalContext context, RocalImage input, - unsigned int *new_order, unsigned int new_sequence_length, - unsigned int sequence_length, bool is_output); - -/*! \brief Accepts U8 and RGB24 input. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param dest_width - * \param dest_height - * \param is_output - * \param scaling_mode The resize scaling_mode to resize the image. - * \param max_size Limits the size of the resized image. - * \param resize_shorter The length of the shorter dimension of the image. - * \param resize_longer The length of the larger dimension of the image. - * \param interpolation_type The type of interpolation to be used for resize. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalResize(RocalContext context, RocalImage input, - unsigned dest_width, unsigned dest_height, + * \param [in] context context for the pipeline. + * \param [in] input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height ouput Height + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] scaling_mode The resize scaling_mode to resize the image. + * \param [in] max_size Limits the size of the resized image. + * \param [in] resize_shorter The length of the shorter dimension of the image. + * \param [in] resize_longer The length of the larger dimension of the image. + * \param [in] interpolation_type The type of interpolation to be used for resize. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResize(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs resize, normalize and flip on images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] p_context Rocal context + * \param [in] p_input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] mean The channel mean values + * \param [in] std_dev The channel standard deviation values + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] scaling_mode The resize scaling_mode to resize the image. + * \param [in] max_size Limits the size of the resized image. + * \param [in] resize_shorter The length of the shorter dimension of the image. + * \param [in] resize_longer The length of the larger dimension of the image. + * \param [in] interpolation_type The type of interpolation to be used for resize. + * \param [in] mirror Parameter to enable horizontal flip for output image. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResizeMirrorNormalize(RocalContext p_context, RocalTensor p_input, unsigned dest_width, + unsigned dest_height, std::vector &mean, std::vector &std_dev, + bool is_output, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH, + std::vector max_size = {}, unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalIntParam mirror = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which perrforms crop and resize on images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] area Target area for the crop + * \param [in] aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] x_center_drift Horizontal shift of the crop center from its original position in the input image + * \param [in] y_center_drift Vertical shift of the crop center from its original position in the input image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropResize(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + RocalFloatParam area = NULL, + RocalFloatParam aspect_ratio = NULL, + RocalFloatParam x_center_drift = NULL, + RocalFloatParam y_center_drift = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which perrforms crop and resize on images with fixed crop coordinates. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] is_output True: the output image is needed by user and will be copied to output buffers using the data transfer API calls. False: the output image is just an intermediate image, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] area Target area for the crop + * \param [in] aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] x_center_drift Horizontal shift of the crop center from its original position in the input image + * \param [in] y_center_drift Vertical shift of the crop center from its original position in the input image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropResizeFixed(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + float area, float aspect_ratio, + float x_center_drift, float y_center_drift, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Rotates images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal Tensor + * \param [in] is_output True: the output tensor is needed by user and will be copied to output buffers using the data transfer API calls. False: the output tensor is just an intermediate tensor, user is not interested in using it directly. This option allows certain optimizations to be achieved. + * \param [in] angle Rocal parameter defining the rotation angle value in degrees. + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] interpolation_type The type of interpolation to be used for rotate. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRotate(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam angle = NULL, unsigned dest_width = 0, + unsigned dest_height = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Rotates images with fixed angle value. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal Tensor + * \param [in] dest_width output width + * \param [in] dest_height output height + * \param [in] is_output Is the output tensor part of the graph output + * \param [in] angle The rotation angle value in degrees. + * \param [in] interpolation_type The type of interpolation to be used for rotate. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRotateFixed(RocalContext context, RocalTensor input, float angle, + bool is_output, unsigned dest_width = 0, unsigned dest_height = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts brightness of the image. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] alpha controls contrast of the image + * \param [in] beta controls brightness of the image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalBrightness(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam alpha = NULL, RocalFloatParam beta = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts brightness of the image with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] alpha controls contrast of the image + * \param [in] beta controls brightness of the image + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalBrightnessFixed(RocalContext context, RocalTensor input, + float alpha, float beta, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies gamma correction on image. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] gamma gamma value for the image. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalGamma(RocalContext context, RocalTensor input, bool is_output, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_STRETCH, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); - -/*! \brief Accepts U8 and RGB24 input. - * \ingroup group_rocal_augmentations - * \param context Rocal context - * \param input Input Rocal Image - * \param dest_width The output width - * \param dest_height The output height - * \param mean The channel mean values - * \param std_dev The channel standard deviation values - * \param is_output True: the output image is needed by user and will be copied to output buffers using the data - * transfer API calls. False: the output image is just an intermediate image, user is not interested in - * using it directly. This option allows certain optimizations to be achieved. - * \param p_mirror Parameter to enable horizontal flip for output image. - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalResizeMirrorNormalize(RocalContext p_context, RocalImage p_input, - unsigned dest_width, unsigned dest_height, - std::vector &mean, std::vector &std_dev, - bool is_output, RocalIntParam p_mirror = NULL); - -/*! \brief Accepts U8 and RGB24 input. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param dest_width - * \param dest_height - * \param is_output - * \param area - * \param x_center_drift - * \param y_center_drift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropResize(RocalContext context, RocalImage input, unsigned dest_width, - unsigned dest_height, bool is_output, - RocalFloatParam area = NULL, - RocalFloatParam aspect_ratio = NULL, - RocalFloatParam x_center_drift = NULL, - RocalFloatParam y_center_drift = NULL); - -/*! \brief Accepts U8 and RGB24 input. Crops the input image to a new area and same aspect ratio. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param dest_width - * \param dest_height - * \param is_output - * \param area - * \param x_center_drift - * \param y_center_drift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropResizeFixed(RocalContext context, RocalImage input, unsigned dest_width, - unsigned dest_height, bool is_output, float area, float aspect_ratio, - float x_center_drift, float y_center_drift); - -/*! \brief Accepts U8 and RGB24 input. The output image dimension can be set to new values allowing the rotated image to fit, - * otherwise; the image is cropped to fit the result. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. Rocal context - * \param input Input Rocal Image - * \param is_output True: the output image is needed by user and will be copied to output buffers using the data - * transfer API calls. False: the output image is just an intermediate image, user is not interested in - * using it directly. This option allows certain optimizations to be achieved. - * \param angle Rocal parameter defining the rotation angle value in degrees. - * \param dest_width The output width - * \param dest_height The output height - * \return Returns a new image that keeps the result. - */ -extern "C" RocalImage ROCAL_API_CALL rocalRotate(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam angle = NULL, unsigned dest_width = 0, - unsigned dest_height = 0); - -/*! \brief Accepts U8 and RGB24 input. The output image dimension can be set to new values allowing the rotated image to fit, - * otherwise; the image is cropped to fit the result. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. Rocal context - * \param input Input Rocal Image - * \param dest_width The output width - * \param dest_height The output height - * \param is_output Is the output image part of the graph output - * \param angle The rotation angle value in degrees. - * \return Returns a new image that keeps the result. - */ -extern "C" RocalImage ROCAL_API_CALL rocalRotateFixed(RocalContext context, RocalImage input, float angle, - bool is_output, unsigned dest_width = 0, unsigned dest_height = 0); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \param beta - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalBrightness(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam alpha = NULL, RocalFloatParam beta = NULL); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param alpha - * \param beta - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalBrightnessFixed(RocalContext context, RocalImage input, - float alpha, float beta, - bool is_output); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalGamma(RocalContext context, RocalImage input, - bool is_output, - RocalFloatParam alpha = NULL); + RocalFloatParam gamma = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs +/*! \brief Applies gamma correction on image with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param alpha - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] gamma gamma value for the image. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalGammaFixed(RocalContext context, RocalImage input, float alpha, bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalGammaFixed(RocalContext context, RocalTensor input, + float gamma, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs. +/*! \brief Adjusts contrast of the image. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] contrast_factor parameter representing the contrast factor for the contrast operation + * \param [in] contrast_center parameter representing the contrast center for the contrast operation + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalContrast(RocalContext context, RocalImage input, bool is_output, - RocalIntParam min = NULL, RocalIntParam max = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalContrast(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam contrast_factor = NULL, RocalFloatParam contrast_center = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs. +/*! \brief Adjusts contrast of the image with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param min - * \param max - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] contrast_factor parameter representing the contrast factor for the contrast operation + * \param [in] contrast_center parameter representing the contrast center for the contrast operation + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalContrastFixed(RocalContext context, RocalImage input, - unsigned min, unsigned max, - bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalContrastFixed(RocalContext context, RocalTensor input, + float contrast_factor, float contrast_center, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief +/*! \brief Flip images horizontally and/or vertically based on inputs. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param axis - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] horizonal_flag determines whether the input tensor should be flipped horizontally + * \param [in] vertical_flag determines whether the input tensor should be flipped vertically + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFlip(RocalContext context, RocalImage input, bool is_output, - RocalIntParam flip_axis = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalFlip(RocalContext context, RocalTensor input, bool is_output, + RocalIntParam horizonal_flag = NULL, RocalIntParam vertical_flag = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief +/*! \brief Flip images horizontally and/or vertically with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param axis - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] horizonal_flag determines whether the input tensor should be flipped horizontally + * \param [in] vertical_flag determines whether the input tensor should be flipped vertically + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFlipFixed(RocalContext context, RocalImage input, int flip_axis, bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalFlipFixed(RocalContext context, RocalTensor input, + int horizonal_flag, int vertical_flag, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs +/*! \brief Applies blur effect to images. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size size ofthr kernel used for blurring + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalBlur(RocalContext context, RocalImage input, bool is_output, - RocalIntParam sdev = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalBlur(RocalContext context, RocalTensor input, + bool is_output, + RocalIntParam kernel_size = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief +/*! \brief Applies blur effect to images with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size size of the kernel used for blurring + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalBlurFixed(RocalContext context, RocalImage input, int sdev, bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalBlurFixed(RocalContext context, RocalTensor input, + int kernel_size, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); /*! \brief Blends two input images given the ratio: output = input1*ratio + input2*(1-ratio) * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input1 - * \param input2 - * \param is_output - * \param ratio Rocal parameter defining the blending ratio, should be between 0.0 and 1.0. - * \return + * \param [in] context Rocal context + * \param [in] input1 Input1 Rocal tensor + * \param [in] input2 Input2 Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] ratio Rocal parameter defining the blending ratio, should be between 0.0 and 1.0 + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalBlend(RocalContext context, RocalImage input1, RocalImage input2, bool is_output, - RocalFloatParam ratio = NULL); +extern "C" RocalTensor ROCAL_API_CALL rocalBlend(RocalContext context, RocalTensor input1, RocalTensor input2, + bool is_output, + RocalFloatParam ratio = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Blends two input images given the ratio: output = input1*ratio + input2*(1-ratio) +/*! \brief Blends two input images given the fixed ratio: output = input1*ratio + input2*(1-ratio) * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input1 - * \param input2 - * \param ratio Float value defining the blending ratio, should be between 0.0 and 1.0. - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalBlendFixed(RocalContext context, RocalImage input1, RocalImage input2, - float ratio, - bool is_output); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param x0 - * \param x1 - * \param y0 - * \param y1 - * \param o0 - * \param o1 - * \param dest_height - * \param dest_width - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalWarpAffine(RocalContext context, RocalImage input, bool is_output, - unsigned dest_height = 0, unsigned dest_width = 0, - RocalFloatParam x0 = NULL, RocalFloatParam x1 = NULL, - RocalFloatParam y0 = NULL, RocalFloatParam y1 = NULL, - RocalFloatParam o0 = NULL, RocalFloatParam o1 = NULL); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param x0 - * \param x1 - * \param y0 - * \param y1 - * \param o0 - * \param o1 - * \param is_output - * \param dest_height - * \param dest_width - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalWarpAffineFixed(RocalContext context, RocalImage input, float x0, float x1, - float y0, float y1, float o0, float o1, bool is_output, - unsigned int dest_height = 0, unsigned int dest_width = 0); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFishEye(RocalContext context, RocalImage input, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVignette(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam sdev = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalVignetteFixed(RocalContext context, RocalImage input, float sdev, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalJitter(RocalContext context, RocalImage input, bool is_output, - RocalIntParam kernel_size = NULL); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param min - * \param max - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalJitterFixed(RocalContext context, RocalImage input, - int kernel_size, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnPNoise(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam sdev = NULL); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnPNoiseFixed(RocalContext context, RocalImage input, float sdev, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param sdev - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnow(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam shift = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param sdev - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSnowFixed(RocalContext context, RocalImage input, float shift, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param rain_value - * \param rain_width - * \param rain_heigth - * \param rain_transparency - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalRain(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam rain_value = NULL, - RocalIntParam rain_width = NULL, - RocalIntParam rain_height = NULL, - RocalFloatParam rain_transparency = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param rain_value - * \param rain_width - * \param rain_heigth - * \param rain_transparency - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalRainFixed(RocalContext context, RocalImage input, - float rain_value, - int rain_width, - int rain_height, - float rain_transparency, - bool is_output); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param adjustment - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTemp(RocalContext context, RocalImage input, bool is_output, - RocalIntParam adjustment = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param adjustment - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTempFixed(RocalContext context, RocalImage input, int adjustment, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param fog_value - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFog(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam fog_value = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param fog_value - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalFogFixed(RocalContext context, RocalImage input, float fog_value, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param strength - * \param zoom - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalLensCorrection(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam strength = NULL, - RocalFloatParam zoom = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param strength - * \param zoom - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalLensCorrectionFixed(RocalContext context, RocalImage input, - float strength, float zoom, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalPixelate(RocalContext context, RocalImage input, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param shift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalExposure(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam shift = NULL); -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param shift - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalExposureFixed(RocalContext context, RocalImage input, float shift, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalHue(RocalContext context, RocalImage input, - bool is_output, - RocalFloatParam hue = NULL); - -/*! \brief + * \param [in] context Rocal context + * \param [in] input1 Input1 Rocal tensor + * \param [in] input2 Input2 Rocal tensor + * \param [in] ratio Float value defining the blending ratio, should be between 0.0 and 1.0. + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalBlendFixed(RocalContext context, RocalTensor input1, RocalTensor input2, + float ratio, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies affine transformation to images. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param hue - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] x0 float parameter representing the coefficient of affine tensor matrix + * \param [in] x1 float parameter representing the coefficient of affine tensor matrix + * \param [in] y0 float parameter representing the coefficient of affine tensor matrix + * \param [in] y1 float parameter representing the coefficient of affine tensor matrix + * \param [in] o0 float parameter representing the coefficient of affine tensor matrix + * \param [in] o1 float parameter representing the coefficient of affine tensor matrix + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] interpolation_type The type of interpolation to be used for warp affine. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalHueFixed(RocalContext context, RocalImage input, - float hue, - bool is_output); +extern "C" RocalTensor ROCAL_API_CALL rocalWarpAffine(RocalContext context, RocalTensor input, bool is_output, + unsigned dest_height = 0, unsigned dest_width = 0, + RocalFloatParam x0 = NULL, RocalFloatParam x1 = NULL, + RocalFloatParam y0 = NULL, RocalFloatParam y1 = NULL, + RocalFloatParam o0 = NULL, RocalFloatParam o1 = NULL, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs. +/*! \brief Applies affine transformation to images with fixed affine matrix. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] x0 float parameter representing the coefficient of affine tensor matrix + * \param [in] x1 float parameter representing the coefficient of affine tensor matrix + * \param [in] y0 float parameter representing the coefficient of affine tensor matrix + * \param [in] y1 float parameter representing the coefficient of affine tensor matrix + * \param [in] o0 float parameter representing the coefficient of affine tensor matrix + * \param [in] o1 float parameter representing the coefficient of affine tensor matrix + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] interpolation_type The type of interpolation to be used for warp affine. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalSaturation(RocalContext context, - RocalImage input, +extern "C" RocalTensor ROCAL_API_CALL rocalWarpAffineFixed(RocalContext context, RocalTensor input, float x0, float x1, + float y0, float y1, float o0, float o1, bool is_output, + unsigned int dest_height = 0, unsigned int dest_width = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies fish eye effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalFishEye(RocalContext context, RocalTensor input, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies vignette effect on images. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] sdev standard deviation for the vignette effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVignette(RocalContext context, RocalTensor input, + bool is_output, RocalFloatParam sdev = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies vignette effect on images with fixed parameters. + * \ingroup group_rocal_augmentations + * \note Accepts U8 and RGB24 input. + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] sdev standard deviation for the vignette effect + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVignetteFixed(RocalContext context, RocalTensor input, + float sdev, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies jitter effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size kernel size used for the jitter effect + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJitter(RocalContext context, RocalTensor input, + bool is_output, + RocalIntParam kernel_size = NULL, + int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies jitter effect on images with fixed kernel size. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] kernel_size kernel size used for the jitter effect + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJitterFixed(RocalContext context, RocalTensor input, + int kernel_size, bool is_output, int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies salt and pepper noise effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] noise_prob probability of applying the Salt and Pepper noise. + * \param [in] salt_prob probability of applying salt noise + * \param [in] salt_val specifies the value of the salt noise + * \param [in] pepper_val specifies the value of the pepper noise + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnPNoise(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam noise_prob = NULL, RocalFloatParam salt_prob = NULL, + RocalFloatParam salt_val = NULL, RocalFloatParam pepper_val = NULL, + int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies salt and pepper noise on images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] noise_prob probability of applying the Salt and Pepper noise. + * \param [in] salt_prob probability of applying salt noise + * \param [in] salt_val specifies the value of the salt noise + * \param [in] pepper_val specifies the value of the pepper noise + * \param [in] seed seed value for the random number generator + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnPNoiseFixed(RocalContext context, RocalTensor input, + float noise_prob, float salt_prob, + float salt_val, float pepper_val, + bool is_output, int seed = 0, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies snow effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] snow Float param representing the intensity of snow effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnow(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam snow = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies snow effect on images with fixed parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] snow Float param representing the intensity of snow effect + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSnowFixed(RocalContext context, RocalTensor input, + float snow, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies rain effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] rain_value parameter represents the intensity of rain effect + * \param [in] rain_width parameter represents the width of the rain effect + * \param [in] rain_height parameter represents the width of the rain effect + * \param [in] rain_transparency parameter represents the transperancy of the rain effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRain(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam rain_value = NULL, + RocalIntParam rain_width = NULL, + RocalIntParam rain_height = NULL, + RocalFloatParam rain_transparency = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies rain effect on images with fixed parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] rain_value parameter represents the intensity of rain effect + * \param [in] rain_width parameter represents the width of the rain effect + * \param [in] rain_height parameter represents the width of the rain effect + * \param [in] rain_transparency parameter represents the transperancy of the rain effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRainFixed(RocalContext context, RocalTensor input, + float rain_value, + int rain_width, + int rain_height, + float rain_transparency, bool is_output, - RocalFloatParam sat = NULL); - -/*! \brief rocalSaturationFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalSaturationFixed(RocalContext context, RocalImage input, float sat, - bool is_output); - -/*! \brief Accepts U8 and RGB24 inputs. - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param min - * \param max - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalCopy(RocalContext context, RocalImage input, bool is_output); - -/*! \brief - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalNop(RocalContext context, RocalImage input, bool is_output); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \param beta - * \param hue - * \param sat - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTwist(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam alpha = NULL, - RocalFloatParam beta = NULL, - RocalFloatParam hue = NULL, - RocalFloatParam sat = NULL); - -/*! \brief Accepts U8 and RGB24 inputs - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param is_output - * \param alpha - * \param beta - * \param hue - * \param sat - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalColorTwistFixed(RocalContext context, RocalImage input, - float alpha, - float beta, - float hue, - float sat, - bool is_output); -/*! \brief rocalCropMirrorNormalize - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext context, RocalImage input, - unsigned crop_depth, - unsigned crop_height, - unsigned crop_width, - float start_x, - float start_y, - float start_z, - std::vector &mean, - std::vector &std_dev, - bool is_output, - RocalIntParam mirror = NULL); - -/*! \brief rocalCrop - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalCrop(RocalContext context, RocalImage input, bool is_output, - RocalFloatParam crop_width = NULL, - RocalFloatParam crop_height = NULL, - RocalFloatParam crop_depth = NULL, - RocalFloatParam crop_pox_x = NULL, - RocalFloatParam crop_pos_y = NULL, - RocalFloatParam crop_pos_z = NULL); - -/*! \brief rocalCropFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropFixed(RocalContext context, RocalImage input, - unsigned crop_width, - unsigned crop_height, - unsigned crop_depth, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the color temperature in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] adjustment color temperature adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTemp(RocalContext context, RocalTensor input, + bool is_output, + RocalIntParam adjustment = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the color temperature in images with fixed value. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] adjustment color temperature adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTempFixed(RocalContext context, RocalTensor input, + int adjustment, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies fog effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] fog_value parameter representing the intensity of fog effect + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalFog(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam fog_value = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies fog effect on images with fixed parameter. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] fog_value parameter representing the intensity of fog effect + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalFogFixed(RocalContext context, RocalTensor input, + float fog_value, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies lens correction effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] strength parameter representing the strength of the lens correction. + * \param [in] zoom parameter representing the zoom factor of the lens correction. + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalLensCorrection(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam strength = NULL, + RocalFloatParam zoom = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies lens correction effect on images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] strength parameter representing the strength of the lens correction. + * \param [in] zoom parameter representing the zoom factor of the lens correction. + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalLensCorrectionFixed(RocalContext context, RocalTensor input, + float strength, float zoom, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Applies pixelate effect on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalPixelate(RocalContext context, RocalTensor input, bool is_output, - float crop_pox_x, - float crop_pos_y, - float crop_pos_z); -/*! \brief rocalCropCenterFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param crop_width - */ -extern "C" RocalImage ROCAL_API_CALL rocalCropCenterFixed(RocalContext context, RocalImage input, - unsigned crop_width, - unsigned crop_height, - unsigned crop_depth, - bool output); -/*! \brief rocalResizeCropMirrorFixed - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalResizeCropMirrorFixed(RocalContext context, RocalImage input, - unsigned dest_width, unsigned dest_height, - bool is_output, - unsigned crop_h, - unsigned crop_w, - RocalIntParam mirror); -/*! \brief rocalResizeCropMirror - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - */ -extern "C" RocalImage ROCAL_API_CALL rocalResizeCropMirror(RocalContext context, RocalImage input, - unsigned dest_width, unsigned dest_height, - bool is_output, RocalFloatParam crop_height = NULL, - RocalFloatParam crop_width = NULL, RocalIntParam mirror = NULL); + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the exposure in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] exposure_factor exposure adjustment factor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalExposure(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam exposure_factor = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the exposure in images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] exposure_factor exposure adjustment factor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalExposureFixed(RocalContext context, RocalTensor input, + float exposure_factor, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the hue in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] hue hue adjustment value in degrees + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalHue(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam hue = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the hue in images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] hue hue adjustment value in degrees + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalHueFixed(RocalContext context, RocalTensor input, + float hue, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the saturation in images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] saturation saturation adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSaturation(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam saturation = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); -/*! \brief Accepts U8 and RGB24 inputs and Ouptus Cropped Images, valid bounding boxes and labels +/*! \brief Adjusts the saturation in images with fixed parameters. * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param num_of_attmpts - * \return + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] saturation saturation adjustment value + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor */ -extern "C" RocalImage ROCAL_API_CALL rocalRandomCrop(RocalContext context, RocalImage input, +extern "C" RocalTensor ROCAL_API_CALL rocalSaturationFixed(RocalContext context, RocalTensor input, + float saturation, bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Copies input tensor to output tensor. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCopy(RocalContext context, RocalTensor input, bool is_output); + +/*! \brief Performs no operation. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalNop(RocalContext context, RocalTensor input, bool is_output); + +/*! \brief Adjusts the brightness, hue and saturation of the images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] alpha parameter that controls the brightness of an image + * \param [in] beta parameter that helps in tuning the color balance of an image + * \param [in] hue parameter that adjusts the hue of an image + * \param [in] sat parameter that controls the intensity of colors + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTwist(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam alpha = NULL, + RocalFloatParam beta = NULL, + RocalFloatParam hue = NULL, + RocalFloatParam sat = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Adjusts the brightness, hue and saturation of the images with fixed parameters. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] alpha parameter that controls the brightness of an image + * \param [in] beta parameter that helps in tuning the color balance of an image + * \param [in] hue parameter that adjusts the hue of an image + * \param [in] sat parameter that controls the intensity of colors + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \param [in] is_output is the output tensor part of the graph output + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalColorTwistFixed(RocalContext context, RocalTensor input, + float alpha, + float beta, + float hue, + float sat, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs crop, normalize and flip on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] start_x x-coordinate, start of the input tensor to be cropped + * \param [in] start_y y-coordinate, start of the input tensor to be cropped + * \param [in] mean mean value (specified for each channel) for tensor normalization + * \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropMirrorNormalize(RocalContext context, RocalTensor input, + unsigned crop_height, + unsigned crop_width, + float start_x, + float start_y, + std::vector &mean, + std::vector &std_dev, + bool is_output, + RocalIntParam mirror = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] crop_depth crop depth of the tensor + * \param [in] crop_pox_x x-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_y y-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_z z-coordinate, start of the input tensor to be cropped + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCrop(RocalContext context, RocalTensor input, bool is_output, + RocalFloatParam crop_width = NULL, + RocalFloatParam crop_height = NULL, + RocalFloatParam crop_depth = NULL, + RocalFloatParam crop_pox_x = NULL, + RocalFloatParam crop_pos_y = NULL, + RocalFloatParam crop_pos_z = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images with fixed coordinates. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] crop_depth crop depth of the tensor + * \param [in] crop_pox_x x-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_y y-coordinate, start of the input tensor to be cropped + * \param [in] crop_pos_z z-coordinate, start of the input tensor to be cropped + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropFixed(RocalContext context, RocalTensor input, + unsigned crop_width, + unsigned crop_height, + unsigned crop_depth, bool is_output, - RocalFloatParam crop_area_factor = NULL, - RocalFloatParam crop_aspect_ratio = NULL, - RocalFloatParam crop_pos_x = NULL, - RocalFloatParam crop_pos_y = NULL, - int num_of_attempts = 20); - -/*! \brief Accepts U8 and RGB24 inputs and Ouptus Cropped Images, valid bounding boxes and labels - * \ingroup group_rocal_augmentations - * \param context context for the pipeline. - * \param input - * \param IOU_threshold - * \param num_of_attmpts - * \return - */ -extern "C" RocalImage ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, RocalImage input, - bool is_output, - RocalFloatParam threshold = NULL, - RocalFloatParam crop_area_factor = NULL, - RocalFloatParam crop_aspect_ratio = NULL, - RocalFloatParam crop_pos_x = NULL, - RocalFloatParam crop_pos_y = NULL, - int num_of_attempts = 20); - -#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H + float crop_pox_x, + float crop_pos_y, + float crop_pos_z, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images at the center with fixed coordinates. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] crop_depth crop depth of the tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalCropCenterFixed(RocalContext context, RocalTensor input, + unsigned crop_width, + unsigned crop_height, + unsigned crop_depth, + bool is_output, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs resize, crop and flip on images with fixed crop. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] crop_h crop width of the tensor + * \param [in] crop_w crop height of the tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResizeCropMirrorFixed(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, + unsigned crop_h, + unsigned crop_w, + RocalIntParam mirror, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Fused function which performs resize, crop and flip on images. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] dest_height output height + * \param [in] dest_width output width + * \param [in] crop_height crop width of the tensor + * \param [in] crop_width crop height of the tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] mirror controls horizontal flip of the tensor + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalResizeCropMirror(RocalContext context, RocalTensor input, + unsigned dest_width, unsigned dest_height, + bool is_output, RocalFloatParam crop_height = NULL, + RocalFloatParam crop_width = NULL, RocalIntParam mirror = NULL, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images randomly. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] num_of_attempts maximum number of attempts the function will make to find a valid crop + * \param [in] crop_area_factor specifies the proportion of the input image to be included in the cropped region + * \param [in] crop_aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] crop_pos_x specifies a specific horizontal position for the crop + * \param [in] crop_pos_y specifies a specific vertical position for the crop + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRandomCrop(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam crop_area_factor = NULL, + RocalFloatParam crop_aspect_ratio = NULL, + RocalFloatParam crop_pos_x = NULL, + RocalFloatParam crop_pos_y = NULL, + int num_of_attempts = 20, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +/*! \brief Crops images randomly used for SSD training. + * \ingroup group_rocal_augmentations + * \param [in] context Rocal context + * \param [in] input Input Rocal tensor + * \param [in] is_output is the output tensor part of the graph output + * \param [in] threshold the threshold parameter for crop operation + * \param [in] crop_area_factor specifies the proportion of the input image to be included in the cropped region + * \param [in] crop_aspect_ratio specifies the aspect ratio of the cropped region + * \param [in] crop_pos_x specifies a specific horizontal position for the crop + * \param [in] crop_pos_y specifies a specific vertical position for the crop + * \param [in] num_of_attempts he maximum number of attempts the function will make to find a valid crop + * \param [in] output_layout the layout of the output tensor + * \param [in] output_datatype the data type of the output tensor + * \return RocalTensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalSSDRandomCrop(RocalContext context, RocalTensor input, + bool is_output, + RocalFloatParam threshold = NULL, + RocalFloatParam crop_area_factor = NULL, + RocalFloatParam crop_aspect_ratio = NULL, + RocalFloatParam crop_pos_x = NULL, + RocalFloatParam crop_pos_y = NULL, + int num_of_attempts = 20, + RocalTensorLayout output_layout = ROCAL_NONE, + RocalTensorOutputType output_datatype = ROCAL_UINT8); + +#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H diff --git a/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/include/api/rocal_api_data_loaders.h index 311227857..8d5295417 100644 --- a/rocAL/include/api/rocal_api_data_loaders.h +++ b/rocAL/include/api/rocal_api_data_loaders.h @@ -32,310 +32,241 @@ THE SOFTWARE. * \brief The AMD rocAL data loader functions. */ -/*! - * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded tensors to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegFileSource(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSource(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); -/*! - * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded tensor to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegFileSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); -/*! - * \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. +/*! \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images in a sequence will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. - * \param sequence_length: The number of frames in a sequence. - * \param is_output Determines if the user wants the loaded sequences to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the sequences or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \return Reference to the output image. + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images in a sequence will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] is_output Determines if the user wants the loaded sequences to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the sequences or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \return Reference to the output tensor. */ -extern "C" RocalImage ROCAL_API_CALL rocalSequenceReader(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - unsigned sequence_length, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0); +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReader(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + unsigned sequence_length, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0); -/*! - * \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only. +/*! \brief Creates JPEG image reader and decoder. Reads [Frames] sequences from a directory representing a collection of streams. It accepts external sharding information to load a singe shard only. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images in a sequence will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param sequence_length: The number of frames in a sequence. - * \param is_output Determines if the user wants the loaded sequences to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images in a sequence will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] is_output Determines if the user wants the loaded sequences to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0); +extern "C" RocalTensor ROCAL_API_CALL rocalSequenceReaderSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0); -/*! - * \brief Creates JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief JPEG image reader and decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSource(RocalContext context, - const char *source_path, - const char *json_path, - RocalImageColor color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSource(RocalContext context, + const char* source_path, + const char* json_path, + RocalImageColor color_format, + unsigned internal_shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + +/*! \brief JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourcePartial(RocalContext p_context, - const char *source_path, - const char *json_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourcePartial(RocalContext p_context, + const char* source_path, + const char* json_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0); -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. - * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \return Reference to the output image - */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourcePartialSingleShard(RocalContext p_context, - const char *source_path, - const char *json_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); -/*! - * \brief \param rocal_context Rocal context +/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param json_path Path to the COCO Json File - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCOCOFileSourceSingleShard(RocalContext context, - const char *source_path, - const char *json_path, - RocalImageColor color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourcePartialSingleShard(RocalContext p_context, + const char* source_path, + const char* json_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0); -/*! - * \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. +/*! \brief Creates JPEG image reader. It allocates the resources and objects required to read and decode COCO Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] json_path Path to the COCO Json File + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSource(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCOCOFileSourceSingleShard(RocalContext context, + const char* source_path, + const char* json_path, + RocalImageColor color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + +/*! \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourceSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image - */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSource(RocalContext context, + const char* source_path, RocalImageColor rocal_color_format, unsigned internal_shard_count, bool is_output, @@ -345,25 +276,24 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContex unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); -/*! - * \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored on the Caffe2 LMDB Records. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. * \ingroup group_rocal_data_loaders - * \param p_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(RocalContext p_context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourceSingleShard(RocalContext p_context, + const char* source_path, RocalImageColor rocal_color_format, unsigned shard_id, unsigned shard_count, @@ -374,175 +304,104 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard( unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); -/*! - * \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored in MXNet Records. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image - */ -extern "C" RocalImage ROCAL_API_CALL rocalMXNetRecordSource(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned internal_shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored on the MXNet records. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param p_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalMXNetRecordSourceSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0, - RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); - -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored and Crops t +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSource(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + +/*! \brief Creates JPEG image reader and decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored on the Caffe2 LMDB Records. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param num_threads Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] p_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFusedJpegCrop(RocalContext context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned num_threads, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); - -/*! - * \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourceSingleShard(RocalContext p_context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + +/*! \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored in MXNet Records. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalFusedJpegCropSingleShard(RocalContext context, - const char *source_path, - RocalImageColor color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); - -/*! - * \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. - * If images are not Jpeg compressed they will be ignored. - * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location of the TF records on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image - */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSource(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalMXNetRecordSource(RocalContext context, + const char* source_path, RocalImageColor rocal_color_format, unsigned internal_shard_count, bool is_output, - const char *user_key_for_encoded, - const char *user_key_for_filename, bool shuffle = false, bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); -/*! - * \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only +/*! \brief Creates JPEG image reader and decoder for MXNet records. It allocates the resources and objects required to read and decode Jpeg images stored on the MXNet records. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location of the TF records on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \param rocal_decoder_type Determines the decoder_type, tjpeg or hwdec - * \return Reference to the output image + * \param [in] p_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalMXNetRecordSourceSingleShard(RocalContext p_context, + const char* source_path, RocalImageColor rocal_color_format, unsigned shard_id, unsigned shard_count, @@ -553,290 +412,393 @@ extern "C" RocalImage ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalCon unsigned max_width = 0, unsigned max_height = 0, RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); -/*! - * \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. +/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored and Crops t * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle: to shuffle dataset - * \param loop: repeat data loading - * \param out_width The output_width of raw image - * \param out_height The output height of raw image - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] num_threads Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSource(RocalContext p_context, - const char *source_path, - const char *user_key_for_raw, - const char *user_key_for_filename, - RocalImageColor rocal_color_format, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned out_width = 0, unsigned out_height = 0, - const char *record_name_prefix = ""); +extern "C" RocalTensor ROCAL_API_CALL rocalFusedJpegCrop(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned num_threads, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0); -/*! - * \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. +/*! \brief Creates JPEG image reader and partial decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param shuffle: to shuffle dataset - * \param loop: repeat data loading - * \param out_width The output_width of raw image - * \param out_height The output height of raw image - * \param record_name_prefix : if nonempty reader will only read records with certain prefix - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle = false, - bool loop = false, - unsigned out_width = 0, unsigned out_height = 0, - const char *record_name_prefix = ""); +extern "C" RocalTensor ROCAL_API_CALL rocalFusedJpegCropSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0); -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. +/*! \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. If images are not Jpeg compressed they will be ignored. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. - * \param sequence_length: The number of frames in a sequence. - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location of the TF records on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output image */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSource(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned internal_shard_count, - unsigned sequence_length, - bool is_output = false, - bool shuffle = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true); - -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only. +extern "C" RocalTensor ROCAL_API_CALL rocalJpegTFRecordSource(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned internal_shard_count, + bool is_output, + const char* user_key_for_encoded, + const char* user_key_for_filename, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + +/*! \brief Creates TensorFlow records JPEG image reader and decoder. It allocates the resources and objects required to read and decode Jpeg images stored on the file systems. It accepts external sharding information to load a singe shard. only * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param shard_id Shard id for this loader. - * \param shard_count Total shard count. - * \param sequence_length: The number of frames in a sequence. - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location of the TF records on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \param [in] rocal_decoder_type Determines the decoder_type, tjpeg or hwdec + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true); - -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. +extern "C" RocalTensor ROCAL_API_CALL rocalJpegTFRecordSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0, + RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG); + +/*! \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. - * \param sequence_length: The number of frames in a sequence. - * \param dest_width The output width of frames. - * \param dest_height The output height of frames. - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle: to shuffle dataset + * \param [in] loop: repeat data loading + * \param [in] out_width The output_width of raw image + * \param [in] out_height The output height of raw image + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResize(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned internal_shard_count, - unsigned sequence_length, - unsigned dest_width, - unsigned dest_height, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); +extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSource(RocalContext p_context, + const char* source_path, + const char* user_key_for_raw, + const char* user_key_for_filename, + RocalImageColor rocal_color_format, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned out_width = 0, unsigned out_height = 0, + const char* record_name_prefix = ""); -/*! - * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only. +/*! \brief Creates Raw image loader. It allocates the resources and objects required to load images stored on the file systems. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk. - * source_path can be a video file, folder containing videos or a text file - * \param color_format The color format the frames will be decoded to. - * \param rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. - * \param shard_id Shard id for this loader. - * \param shard_count Total shard count. - * \param sequence_length: The number of frames in a sequence. - * \param dest_width The output width of frames. - * \param dest_height The output height of frames. - * \param shuffle: to shuffle sequences. - * \param is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. - * \param loop: repeat data loading. - * \param step: Frame interval between each sequence. - * \param stride: Frame interval between frames in a sequence. - * \param file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. - * \return + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] shuffle: to shuffle dataset + * \param [in] loop: repeat data loading + * \param [in] out_width The output_width of raw image + * \param [in] out_height The output height of raw image + * \param [in] record_name_prefix : if nonempty reader will only read records with certain prefix + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context, - const char *source_path, - RocalImageColor color_format, - RocalDecodeDevice rocal_decode_device, - unsigned shard_id, - unsigned shard_count, - unsigned sequence_length, - unsigned dest_width, - unsigned dest_height, - bool shuffle = false, - bool is_output = false, - bool loop = false, - unsigned step = 0, - unsigned stride = 0, - bool file_list_frame_num = true, - RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, - std::vector max_size = {}, - unsigned resize_shorter = 0, - unsigned resize_longer = 0, - RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); +extern "C" RocalTensor ROCAL_API_CALL rocalRawTFRecordSourceSingleShard(RocalContext p_context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + unsigned out_width = 0, unsigned out_height = 0, + const char* record_name_prefix = ""); /*! - * \brief Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems. + * \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. * \ingroup group_rocal_data_loaders - * \param context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param out_width ; output width - * \param out_height ; output_height - * \param filename_prefix ; if set loader will only load files with the given prefix name - * \return Reference to the output image + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalRawCIFAR10Source(RocalContext context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSource(RocalContext context, + const char* source_path, RocalImageColor color_format, - bool is_output, - unsigned out_width, unsigned out_height, const char *filename_prefix = "", - bool loop = false); - -/*! - * \brief + RocalDecodeDevice rocal_decode_device, + unsigned internal_shard_count, + unsigned sequence_length, + bool is_output = false, + bool shuffle = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true); + +/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. It accepts external sharding information to load a singe shard only. * \ingroup group_rocal_data_loaders - * \param context - * \return + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] shard_id Shard id for this loader. + * \param [in] shard_count Total shard count. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \return Reference to the output tensor */ -extern "C" RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext context); +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileSourceSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true); + +/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. + * \ingroup group_rocal_data_loaders + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] internal_shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] dest_width The output width of frames. + * \param [in] dest_height The output height of frames. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResize(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned internal_shard_count, + unsigned sequence_length, + unsigned dest_width, + unsigned dest_height, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + +/*! \brief Creates a video reader and decoder as a source. It allocates the resources and objects required to read and decode mp4 videos stored on the file systems. Resizes the decoded frames to the dest width and height. It accepts external sharding information to load a singe shard only. + * \ingroup group_rocal_data_loaders + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk. source_path can be a video file, folder containing videos or a text file + * \param [in] color_format The color format the frames will be decoded to. + * \param [in] rocal_decode_device Enables software or hardware decoding. Currently only software decoding is supported. + * \param [in] shard_id Shard id for this loader. + * \param [in] shard_count Total shard count. + * \param [in] sequence_length: The number of frames in a sequence. + * \param [in] dest_width The output width of frames. + * \param [in] dest_height The output height of frames. + * \param [in] shuffle: to shuffle sequences. + * \param [in] is_output Determines if the user wants the loaded sequence of frames to be part of the output or not. + * \param [in] loop: repeat data loading. + * \param [in] step: Frame interval between each sequence. + * \param [in] stride: Frame interval between frames in a sequence. + * \param [in] file_list_frame_num: Determines if the user wants to read frame number or timestamps if a text file is passed in the source_path. + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalVideoFileResizeSingleShard(RocalContext context, + const char* source_path, + RocalImageColor color_format, + RocalDecodeDevice rocal_decode_device, + unsigned shard_id, + unsigned shard_count, + unsigned sequence_length, + unsigned dest_width, + unsigned dest_height, + bool shuffle = false, + bool is_output = false, + bool loop = false, + unsigned step = 0, + unsigned stride = 0, + bool file_list_frame_num = true, + RocalResizeScalingMode scaling_mode = ROCAL_SCALING_MODE_DEFAULT, + std::vector max_size = {}, + unsigned resize_shorter = 0, + unsigned resize_longer = 0, + RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + +/*! \brief Creates CIFAR10 raw data reader and loader. It allocates the resources and objects required to read raw data stored on the file systems. + * \ingroup group_rocal_data_loaders + * \param [in] context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] out_width output width + * \param [in] out_height output_height + * \param [in] filename_prefix if set loader will only load files with the given prefix name + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalRawCIFAR10Source(RocalContext context, + const char* source_path, + RocalImageColor color_format, + bool is_output, + unsigned out_width, unsigned out_height, const char* filename_prefix = "", + bool loop = false); -/*! - * \brief Creates JPEG image reader and partial decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. +/*! \brief reset Loaders * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. - * \param aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. - * \param num_attempts Maximum number of attempts to generate crop. Default 10 - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] context Rocal Context + * \return Rocal status value */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourcePartialSingleShard(RocalContext p_context, - const char *source_path, - RocalImageColor rocal_color_format, - unsigned shard_id, - unsigned shard_count, - bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, - unsigned num_attempts, - bool shuffle = false, - bool loop = false, - RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, - unsigned max_width = 0, unsigned max_height = 0); +extern "C" RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext context); -/*! - * \brief Creates JPEG image reader and partial decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe22 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. +/*! \brief Creates JPEG image reader and partial decoder for Caffe LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe2 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. * \ingroup group_rocal_data_loaders - * \param rocal_context Rocal context - * \param source_path A NULL terminated char string pointing to the location on the disk - * \param rocal_color_format The color format the images will be decoded to. - * \param shard_id Shard id for this loader - * \param shard_count Total shard count - * \param is_output Determines if the user wants the loaded images to be part of the output or not. - * \param shuffle Determines if the user wants to shuffle the dataset or not. - * \param loop Determines if the user wants to indefinitely loops through images or not. - * \param decode_size_policy - * \param max_width The maximum width of the decoded images, larger or smaller will be resized to closest - * \param max_height The maximum height of the decoded images, larger or smaller will be resized to closest - * \return Reference to the output image + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] area_factor Determines how much area to be cropped. Ranges from from 0.08 - 1. + * \param [in] aspect_ratio Determines the aspect ration of crop. Ranges from 0.75 to 1.33. + * \param [in] num_attempts Maximum number of attempts to generate crop. Default 10 + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \return Reference to the output tensor */ -extern "C" RocalImage ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourcePartialSingleShard(RocalContext p_context, - const char *source_path, +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffeLMDBRecordSourcePartialSingleShard(RocalContext p_context, + const char* source_path, RocalImageColor rocal_color_format, unsigned shard_id, unsigned shard_count, bool is_output, - std::vector &area_factor, - std::vector &aspect_ratio, + std::vector& area_factor, + std::vector& aspect_ratio, unsigned num_attempts, bool shuffle = false, bool loop = false, RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, unsigned max_width = 0, unsigned max_height = 0); -#endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H +/*! \brief Creates JPEG image reader and partial decoder for Caffe2 LMDB records. It allocates the resources and objects required to read and decode Jpeg images stored in Caffe22 LMDB Records. It has internal sharding capability to load/decode in parallel is user wants. + * \ingroup group_rocal_data_loaders + * \param [in] rocal_context Rocal context + * \param [in] source_path A NULL terminated char string pointing to the location on the disk + * \param [in] rocal_color_format The color format the images will be decoded to. + * \param [in] shard_id Shard id for this loader + * \param [in] shard_count Total shard count + * \param [in] is_output Determines if the user wants the loaded images to be part of the output or not. + * \param [in] shuffle Determines if the user wants to shuffle the dataset or not. + * \param [in] loop Determines if the user wants to indefinitely loops through images or not. + * \param [in] decode_size_policy + * \param [in] max_width The maximum width of the decoded images, larger or smaller will be resized to closest + * \param [in] max_height The maximum height of the decoded images, larger or smaller will be resized to closest + * \return Reference to the output tensor + */ +extern "C" RocalTensor ROCAL_API_CALL rocalJpegCaffe2LMDBRecordSourcePartialSingleShard(RocalContext p_context, + const char* source_path, + RocalImageColor rocal_color_format, + unsigned shard_id, + unsigned shard_count, + bool is_output, + std::vector& area_factor, + std::vector& aspect_ratio, + unsigned num_attempts, + bool shuffle = false, + bool loop = false, + RocalImageSizeEvaluationPolicy decode_size_policy = ROCAL_USE_MOST_FREQUENT_SIZE, + unsigned max_width = 0, unsigned max_height = 0); + +#endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H diff --git a/rocAL/include/api/rocal_api_data_transfer.h b/rocAL/include/api/rocal_api_data_transfer.h index b31b8911a..0d60e0b94 100644 --- a/rocAL/include/api/rocal_api_data_transfer.h +++ b/rocAL/include/api/rocal_api_data_transfer.h @@ -33,59 +33,54 @@ THE SOFTWARE. */ /*! - * \brief TBD + * \brief copies data to output buffer * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] context Rocal context + * \param [in] out_ptr pointer to output buffer + * \param [in] out_size size of output buffer + * \return Rocal status indicating success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalCopyToOutput(RocalContext context, unsigned char *out_ptr, size_t out_size); /*! - * \brief TBD - * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure - */ -extern "C" RocalStatus ROCAL_API_CALL rocalToTensor32(RocalContext rocal_context, float *out_ptr, - RocalTensorLayout tensor_format, float multiplier0, - float multiplier1, float multiplier2, float offset0, - float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type); - -/*! - * \brief TBD + * \brief converts data to a tensor * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure - */ -extern "C" RocalStatus ROCAL_API_CALL rocalToTensor16(RocalContext rocal_context, half *out_ptr, - RocalTensorLayout tensor_format, float multiplier0, - float multiplier1, float multiplier2, float offset0, - float offset1, float offset2, - bool reverse_channels, RocalOutputMemType output_mem_type); - -/*! - * \brief TBD - * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] rocal_context Rocal context + * \param [in] out_ptr pointer to output buffer + * \param [in] tensor_format the layout of the tensor data + * \param [in] tensor_output_type the output type of the tensor data + * \param [in] multiplier0 the multiplier for channel 0 + * \param [in] multiplier1 the multiplier for channel 1 + * \param [in] multiplier2 the multiplier for channel 2 + * \param [in] offset0 the offset for channel 0 + * \param [in] offset1 the offset for channel 1 + * \param [in] offset2 the offset for channel 2 + * \param [in] reverse_channels flag to reverse the channel orders + * \param [in] output_mem_type the memory type of output tensor buffer + * \return Rocal status indicating success or failure */ extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context, void *out_ptr, RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, bool reverse_channels, RocalOutputMemType output_mem_type); + /*! - * \brief TBD + * \brief Sets the output images in the RocalContext * \ingroup group_rocal_data_transfer - * - * \param [in] context - * \return A \ref RocalStatus - A status code indicating the success or failure + * \param [in] p_context Rocal context + * \param [in] num_of_outputs number of output images + * \param [in] output_images output images + */ +extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images); + + +/*! + * \brief gives the list of output tensors from rocal context + * \ingroup group_rocal_data_transfer + * \param [in] p_context Rocal Context + * \return A RocalTensorList containing the list of output tensors */ -extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector &output_images); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetOutputTensors(RocalContext p_context); -#endif // MIVISIONX_ROCAL_API_DATA_TRANSFER_H +#endif // MIVISIONX_ROCAL_API_DATA_TRANSFER_H diff --git a/rocAL/include/api/rocal_api_info.h b/rocAL/include/api/rocal_api_info.h index e00d5e4f7..ccc0adea3 100644 --- a/rocAL/include/api/rocal_api_info.h +++ b/rocAL/include/api/rocal_api_info.h @@ -33,115 +33,100 @@ THE SOFTWARE. */ /*! - * \brief rocalGetOutputWidth + * \brief Retrieves the width of the output. * \ingroup group_rocal_info - * - * \param [in] context - * \return The width of the ROCAL's output image in pixels + * \param [in] rocal_context The RocalContext + * \return The width of the output. */ extern "C" int ROCAL_API_CALL rocalGetOutputWidth(RocalContext rocal_context); /*! - * \brief rocalGetOutputHeight + * \brief Retrieves the height of the output. * \ingroup group_rocal_info - * - * \param [in] context - * \return The height of the ROCAL's output image in pixels. It includes all images in the batch. + * \param [in] rocal_context The RocalContext + * \return The height of the output. */ extern "C" int ROCAL_API_CALL rocalGetOutputHeight(RocalContext rocal_context); /*! - * \brief rocalGetOutputColorFormat + * \brief Retrieves the color format of the output. * \ingroup group_rocal_info - * - * \param [in] context - * \return The color format of the ROCAL's output. It's equivalent of what's passed to the loaders as input color format. + * \param [in] rocal_context The RocalContext. + * \return The color format of the output. */ extern "C" int ROCAL_API_CALL rocalGetOutputColorFormat(RocalContext rocal_context); /*! - * \brief rocalGetRemainingImages + * \brief Retrieves the number of remaining images. * \ingroup group_rocal_info - * - * \param [in] context - * \return The number of images yet to be processed + * \param [in] rocal_context The RocalContext. + * \return The number of remaining images yet to be processed. */ + extern "C" size_t ROCAL_API_CALL rocalGetRemainingImages(RocalContext rocal_context); /*! - * \brief rocalGetImageWidth + * \brief Retrieves the width of the image. * \ingroup group_rocal_info - * - * \param [in] image - * \return Width of the graph output image - * \note Returned value valid only after rocalVerify is called + * \param [in] image The RocalTensor data. + * \return The width of the image. */ -extern "C" size_t ROCAL_API_CALL rocalGetImageWidth(RocalImage image); +extern "C" size_t ROCAL_API_CALL rocalGetImageWidth(RocalTensor image); /*! - * \brief rocalGetImageHeight + * \brief Retrieves the height of the image. * \ingroup group_rocal_info - * - * \param [in] image - * \return Height of the pipeline output image, includes all images in the batch - * \note Returned value valid only after rocalVerify is called + * \param [in] image The RocalTensor data. + * \return The height of the image. */ -extern "C" size_t ROCAL_API_CALL rocalGetImageHeight(RocalImage image); +extern "C" size_t ROCAL_API_CALL rocalGetImageHeight(RocalTensor image); /*! - * \brief rocalGetImagePlanes + * \brief Retrieves the number of planes (channels) in the image. * \ingroup group_rocal_info - * - * \param [in] image - * \return Color format of the pipeline output image. - * \note Returned value valid only after rocalVerify is called + * \param [in] image The RocalTensor data. + * \return The number of planes (channels) in the image. */ -extern "C" size_t ROCAL_API_CALL rocalGetImagePlanes(RocalImage image); +extern "C" size_t ROCAL_API_CALL rocalGetImagePlanes(RocalTensor image); /*! - * \brief rocalIsEmpty + * \brief Checks if the RocalContext is empty. * \ingroup group_rocal_info - * - * \param [in] context - * \return 1 if all images have been processed, otherwise 0 - * \note Returned value valid only after rocalVerify is called + * \param [in] rocal_context The RocalContext + * \return return if RocalContext is empty or not. */ extern "C" size_t ROCAL_API_CALL rocalIsEmpty(RocalContext rocal_context); /*! - * \brief rocalGetAugmentationBranchCount + * \brief Retrieves the number of augmentation branches. * \ingroup group_rocal_info - * - * \param [in] context + * \param [in] rocal_context The RocalContext * \return Number of augmentation graph branches. Defined by number of calls to the augmentation API's with the is_output flag set to true. */ extern "C" size_t ROCAL_API_CALL rocalGetAugmentationBranchCount(RocalContext rocal_context); /*! - * \brief rocalGetStatus - * \ingroup group_rocal_info - * - * \param [in] context + * \brief Retrieves the status. + * \ingroup group_rocal_info + * \param [in] rocal_context The RocalContext from which to retrieve the status. * \return The status of tha last API call */ extern "C" RocalStatus ROCAL_API_CALL rocalGetStatus(RocalContext rocal_context); /*! - * \brief rocalGetErrorMessage + * \brief Retrieves the error message. * \ingroup group_rocal_info - * - * \param [in] context - * \return The last error message generated by call to rocal API + * \param [in] rocal_context The RocalContext + * \return A pointer to the error message string. */ -extern "C" const char *ROCAL_API_CALL rocalGetErrorMessage(RocalContext rocal_context); +extern "C" const char* ROCAL_API_CALL rocalGetErrorMessage(RocalContext rocal_context); /*! - * \brief rocalGetTimingInfo + * \brief Retrieves timing information. * \ingroup group_rocal_info - * - * \param [in] context + * \param [in] rocal_context The RocalContext * \return The timing info associated with recent execution. */ extern "C" TimingInfo ROCAL_API_CALL rocalGetTimingInfo(RocalContext rocal_context); -#endif // MIVISIONX_ROCAL_API_INFO_H +#endif // MIVISIONX_ROCAL_API_INFO_H diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index 4b7585d99..dfe961acd 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -32,269 +32,256 @@ THE SOFTWARE. * \brief The AMD rocAL meta data functions. */ -/*! - * \brief rocalCreateLabelReader +/*! \brief creates label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the folder that contains the dataset or metadata file + * \param [in] rocal_context rocal context + * \param [in] source_path path to the folder that contains the dataset or metadata file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateVideoLabelReader +/*! \brief creates video label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the folder that contains the dataset or metadata file - * \param sequence_length The number of frames in a sequence. - * \param frame_step Frame interval between each sequence. - * \param frame_stride Frame interval between frames in a sequence. - * \param file_list_frame_num True : when the inputs from text file is to be considered as frame numbers. - * False : when the inputs from text file is to considered as timestamps. + * \param [in] rocal_context rocal context + * \param [in] source_path path to the folder that contains the dataset or metadata file + * \param [in] sequence_length The number of frames in a sequence. + * \param [in] frame_step Frame interval between each sequence. + * \param [in] frame_stride Frame interval between frames in a sequence. + * \param [in] file_list_frame_num True : when the inputs from text file is to be considered as frame numbers. False : when the inputs from text file is to considered as timestamps. * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateVideoLabelReader(RocalContext rocal_context, const char *source_path, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateVideoLabelReader(RocalContext rocal_context, const char* source_path, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); -/*! - * \brief rocalCreateTFReader +/*! \brief create tf reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file + * \param [in] rocal_context rocal context + * \param [in] source_path path to the coco json file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReader(RocalContext rocal_context, const char *source_path, bool is_output, - const char *user_key_for_label, const char *user_key_for_filename); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReader(RocalContext rocal_context, const char* source_path, bool is_output, + const char* user_key_for_label, const char* user_key_for_filename); -/*! - * \brief rocalCreateTFReaderDetection +/*! \brief create tf reader detection * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file + * \param [in] rocal_context + * \param [in] source_path path to the coco json file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContext rocal_context, const char *source_path, bool is_output, - const char *user_key_for_label, const char *user_key_for_text, - const char *user_key_for_xmin, const char *user_key_for_ymin, const char *user_key_for_xmax, const char *user_key_for_ymax, - const char *user_key_for_filename); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContext rocal_context, const char* source_path, bool is_output, + const char* user_key_for_label, const char* user_key_for_text, + const char* user_key_for_xmin, const char* user_key_for_ymin, const char* user_key_for_xmax, const char* user_key_for_ymax, + const char* user_key_for_filename); -/*! - * \brief rocalCreateCOCOReader +/*! \brief create coco reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file + * \param [in] rocal_context rocal context + * \param [in] source_path path to the coco json file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false); -/*! - * \brief rocalCreateCOCOReaderKeyPoints +/*! \brief create coco reader key points * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the coco json file - * \param sigma sigma used for gaussian distribution (needed for HRNet Pose estimation) - * \param pose_output_width output image width (needed for HRNet Pose estimation) - * \param pose_output_width output image height (needed for HRNet Pose estimation) + * \param [in] rocal_context rocal context + * \param [in] source_path path to the coco json file + * \param [in] sigma sigma used for gaussian distribution (needed for HRNet Pose estimation) + * \param [in] pose_output_width output image width (needed for HRNet Pose estimation) + * \param [in] pose_output_width output image height (needed for HRNet Pose estimation) * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReaderKeyPoints(RocalContext rocal_context, const char *source_path, bool is_output, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReaderKeyPoints(RocalContext rocal_context, const char* source_path, bool is_output, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); -/*! - * \brief rocalCreateTextFileBasedLabelReader +/*! \brief create text file based label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the file that contains the metadata file + * \param [in] rocal_context + * \param [in] source_path path to the file that contains the metadata file * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextFileBasedLabelReader(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextFileBasedLabelReader(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateCaffeLMDBLabelReader +/*! \brief create caffe LMDB label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe LMDB records for Classification + * \param [in] rocal_context + * \param [in] source_path path to the Caffe LMDB records for Classification * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBLabelReader(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBLabelReader(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateCaffeLMDBReaderDetection +/*! \brief create caffe LMDB label reader for object detection * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe LMDB records for Object Detection + * \param [in] rocal_context rocal context + * \param [in] source_path path to the Caffe LMDB records for Object Detection * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBReaderDetection(RocalContext rocal_context, const char *source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffeLMDBReaderDetection(RocalContext rocal_context, const char* source_path); -/*! - * \brief rocalCreateCaffe2LMDBLabelReader +/*! \brief create caffe2 LMDB label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe2LMDB records for Classification + * \param [in] rocal_context rocal context + * \param [in] source_path path to the Caffe2LMDB records for Classification * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBLabelReader(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBLabelReader(RocalContext rocal_context, const char* source_path, bool is_output); -/*! - * \brief rocalCreateCaffe2LMDBReaderDetection +/*! \brief create caffe2 LMDB label reader for object detection * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the Caffe2LMDB records for Object Detection - * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + * \param [in] rocal_context rocal context + * \param [in] source_path path to the Caffe2LMDB records for Object Detection + * \return RocalMetaData object - can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBReaderDetection(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCaffe2LMDBReaderDetection(RocalContext rocal_context, const char* source_path, bool is_output); -/*! - * \brief rocalCreateMXNetReader +/*! \brief create MXNet reader * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the MXNet recordio files for Classification - * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + * \param [in] rocal_context rocal context + * \param [in] source_path path to the MXNet recordio files for Classification + * \return RocalMetaData object - can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateMXNetReader(RocalContext rocal_context, const char *source_path, bool is_output); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateMXNetReader(RocalContext rocal_context, const char* source_path, bool is_output); -/*! - * \brief rocalGetImageName +/*! \brief get image name * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf user buffer provided to be filled with output image names for images in the output batch. + * \param [in] rocal_context rocal context + * \param [out] buf user buffer provided to be filled with output image names for images in the output batch. */ -extern "C" void ROCAL_API_CALL rocalGetImageName(RocalContext rocal_context, char *buf); +extern "C" void ROCAL_API_CALL rocalGetImageName(RocalContext rocal_context, char* buf); -/*! - * \brief rocalGetImageNameLen +/*! \brief get image name lengths * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf userbuffer provided to be filled with the length of the image names in the output batch + * \param [in] rocal_context rocal context + * \param [out] buf userbuffer provided to be filled with the length of the image names in the output batch * \return The size of the buffer needs to be provided by user to get the image names of the output batch */ -extern "C" unsigned ROCAL_API_CALL rocalGetImageNameLen(RocalContext rocal_context, int *buf); +extern "C" unsigned ROCAL_API_CALL rocalGetImageNameLen(RocalContext rocal_context, int* buf); -/*! - * \brief rocalGetImageLabels +/*! \brief get image labels * \ingroup group_rocal_meta_data - * \param meta_data RocalMetaData object that contains info about the images and labels - * \param buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. + * \param [in] meta_data RocalMetaData object that contains info about the images and labels + * \param [out] buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. + * \return RocalTensorList of labels associated with image */ -extern "C" void ROCAL_API_CALL rocalGetImageLabels(RocalContext rocal_context, void *buf, RocalOutputMemType output_mem_type = RocalOutputMemType::ROCAL_MEMCPY_HOST); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetImageLabels(RocalContext rocal_context); -/*! - * \brief rocalGetBoundingBoxCount +/*! \brief get bounding box count * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with number of object in the images. + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with number of object in the images. * \return The size of the buffer needs to be provided by user to get bounding box info for all images in the output batch. */ -extern "C" unsigned ROCAL_API_CALL rocalGetBoundingBoxCount(RocalContext rocal_context, int *buf); +extern "C" unsigned ROCAL_API_CALL rocalGetBoundingBoxCount(RocalContext rocal_context); -/*! - * \brief rocalGetBoundingBoxLabel +/*! \brief get mask count * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with bounding box label info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCount + * \param [in] rocal_context rocal context + * \param [out] buf the imageIdx in the output batch + * \return The size of the buffer needs to be provided by user to get mask box info associated with image_idx in the output batch. */ -extern "C" void ROCAL_API_CALL rocalGetBoundingBoxLabel(RocalContext rocal_context, int *buf); +extern "C" unsigned ROCAL_API_CALL rocalGetMaskCount(RocalContext p_context, int* buf); -/*! - * \brief rocalGetBoundingBoxCords +/*! \brief get mask coordinates * \ingroup group_rocal_meta_data - * \param rocal_context + * \param [in] rocal_context rocal context + * \param [out] bufcount The user's buffer that will be filled with polygon size for the mask info + * \return The tensorlist with the mask coordinates */ -extern "C" void ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext rocal_context, float *buf); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetMaskCoordinates(RocalContext p_context, int* bufcount); -/*! - * \brief rocalGetImageSizes +/*! \brief get bounding box label * \ingroup group_rocal_meta_data - * \param rocal_context + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with bounding box label info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCount + * \return RocalTensorList of labels associated with bounding box coordinates */ -extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int *buf); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxLabel(RocalContext rocal_context); -/*! - * \brief rocalCreateTextCifar10LabelReader +/*! \brief get bounding box coordinates * \ingroup group_rocal_meta_data - * \param rocal_context - * \param source_path path to the file that contains the metadata file - * \param filename_prefix: look only files with prefix ( needed for cifar10) - * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with bounding box coords info for the images in the output batch. It needs to be of size returned by a call to the rocalGetBoundingBoxCords + * \return RocalTensorList of bounding box co-ordinates */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext rocal_context, const char *source_path, const char *file_prefix); +extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext rocal_context); -/*! - * \brief rocalGetOneHotImageLabels +/*! \brief get image sizes * \ingroup group_rocal_meta_data - * \param meta_data RocalMetaData object that contains info about the images and labels - * \param numOfClasses the number of classes for a image dataset - * \param buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. - * \param dest destination can be host=0 / device=1 + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with images sizes info for the images in the output batch */ -extern "C" void ROCAL_API_CALL rocalGetOneHotImageLabels(RocalContext rocal_context, void *buf, int numOfClasses, int dest); +extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int* buf); -/*! - * \brief rocalRandomBBoxCrop +/*! \brief create text cifar10 label reader * \ingroup group_rocal_meta_data - * \param rocal_context - * */ + * \param [in] rocal_context rocal context + * \param [in] source_path path to the file that contains the metadata file + * \param [in] filename_prefix: look only files with prefix ( needed for cifar10) + * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors + */ +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTextCifar10LabelReader(RocalContext rocal_context, const char* source_path, const char* file_prefix); + +/*! \brief get one hot image labels + * \ingroup group_rocal_meta_data + * \param [in] meta_data RocalMetaData object that contains info about the images and labels + * \param [in] numOfClasses the number of classes for a image dataset + * \param [out] buf user's buffer that will be filled with labels. Its needs to be at least of size batch_size. + * \param [in] dest destination can be host=0 / device=1 + */ +extern "C" void ROCAL_API_CALL rocalGetOneHotImageLabels(RocalContext rocal_context, void* buf, int numOfClasses, int dest); + extern "C" void ROCAL_API_CALL rocalRandomBBoxCrop(RocalContext p_context, bool all_boxes_overlap, bool no_crop, RocalFloatParam aspect_ratio = NULL, bool has_shape = false, int crop_width = 0, int crop_height = 0, int num_attempts = 1, RocalFloatParam scaling = NULL, int total_num_attempts = 0, int64_t seed = 0); -/*! - * \brief rocalGetSequenceStartFrameNumber +/*! \brief get sequence starting frame number * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with starting frame numbers of the output batch sequences. + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with starting frame numbers of the output batch sequences. */ -extern "C" void ROCAL_API_CALL rocalGetSequenceStartFrameNumber(RocalContext rocal_context, unsigned int *buf); +extern "C" void ROCAL_API_CALL rocalGetSequenceStartFrameNumber(RocalContext rocal_context, unsigned int* buf); -/*! - * \brief rocalGetSequenceFrameTimestamps +/*! \brief get sequence time stamps * \ingroup group_rocal_meta_data - * \param rocal_context - * \param buf The user's buffer that will be filled with frame timestamps of each of the frames in output batch sequences. + * \param [in] rocal_context rocal context + * \param [out] buf The user's buffer that will be filled with frame timestamps of each of the frames in output batch sequences. */ -extern "C" void ROCAL_API_CALL rocalGetSequenceFrameTimestamps(RocalContext rocal_context, float *buf); +extern "C" void ROCAL_API_CALL rocalGetSequenceFrameTimestamps(RocalContext rocal_context, float* buf); -/*! - * \brief rocalBoxEncoder +/*! \brief rocal box encoder * \ingroup group_rocal_meta_data - * \param anchors Anchors to be used for encoding, as the array of floats is in the ltrb format. - * \param criteria Threshold IoU for matching bounding boxes with anchors. - * The value needs to be between 0 and 1. - * \param offset Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments - * \param means [x y w h] mean values for normalization. - * \param stds [x y w h] standard deviations for offset normalization. - * \param scale Rescales the box and anchor values before the offset is calculated (for example, to return to the absolute values). + * \param [in] anchors Anchors to be used for encoding, as the array of floats is in the ltrb format. + * \param [in] criteria Threshold IoU for matching bounding boxes with anchors. The value needs to be between 0 and 1. + * \param [in] offset Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments + * \param [in] means [x y w h] mean values for normalization. + * \param [in] stds [x y w h] standard deviations for offset normalization. + * \param [in] scale Rescales the box and anchor values before the offset is calculated (for example, to return to the absolute values). */ -extern "C" void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector &anchors, float criteria, - std::vector &means, std::vector &stds, bool offset = false, float scale = 1.0); +extern "C" void ROCAL_API_CALL rocalBoxEncoder(RocalContext p_context, std::vector& anchors, float criteria, + std::vector& means, std::vector& stds, bool offset = false, float scale = 1.0); -/*! - * \brief rocalCopyEncodedBoxesAndLables +/*! \brief copy encoded boxes and labels * \ingroup group_rocal_meta_data - * \param boxes_buf user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size. - * \param labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size. + * \param [in] p_context rocal context + * \param [out] boxes_buf user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size. + * \param [out] labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size. */ -extern "C" void ROCAL_API_CALL rocalCopyEncodedBoxesAndLables(RocalContext p_context, float *boxes_buf, int *labels_buf); +extern "C" void ROCAL_API_CALL rocalCopyEncodedBoxesAndLables(RocalContext p_context, float* boxes_buf, int* labels_buf); -/*! - * \brief rocalGetEncodedBoxesAndLables +/*! \brief * \ingroup group_rocal_meta_data * \param boxes_buf ptr to user's buffer that will be filled with encoded bounding boxes . Its needs to be at least of size batch_size. * \param labels_buf user's buffer that will be filled with encoded labels . Its needs to be at least of size batch_size. */ -extern "C" void ROCAL_API_CALL rocalGetEncodedBoxesAndLables(RocalContext p_context, float **boxes_buf_ptr, int **labels_buf_ptr, int num_encoded_boxes); +extern "C" RocalMetaData ROCAL_API_CALL rocalGetEncodedBoxesAndLables(RocalContext p_context, int num_encoded_boxes); -/*! - * \brief rocalGetImageId +/*! \brief get image id * \ingroup group_rocal_meta_data - * \param rocal_context + * \param rocal_context rocal context * \param buf The user's buffer that will be filled with image id info for the images in the output batch. */ -extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int *buf); +extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int* buf); -/*! - * \brief rocalGetJointsDataPtr +/*! \brief get joints data pointer * \ingroup group_rocal_meta_data - * \param rocal_context - * \param joints_data The user's RocalJointsData pointer that will be pointed to JointsDataBatch pointer + * \param [in] rocal_context rocal context + * \param [out] joints_data The user's RocalJointsData pointer that will be pointed to JointsDataBatch pointer */ -extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData **joints_data); +extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData** joints_data); -#endif // MIVISIONX_ROCAL_API_META_DATA_H +#endif // MIVISIONX_ROCAL_API_META_DATA_H diff --git a/rocAL/include/api/rocal_api_parameters.h b/rocAL/include/api/rocal_api_parameters.h index bc2e5907f..d79abc49b 100644 --- a/rocAL/include/api/rocal_api_parameters.h +++ b/rocAL/include/api/rocal_api_parameters.h @@ -32,163 +32,132 @@ THE SOFTWARE. * \brief The AMD rocAL Parameters. */ -/*! - * \brief rocalSetSeed +/*! \brief set seed for random number generation * \ingroup group_rocal_parameters - * - * \param seed + * \param [in] seed seed for the random number generation */ extern "C" void ROCAL_API_CALL rocalSetSeed(unsigned seed); -/*! - * \brief rocalGetSeed +/*! \brief gets the seed value * \ingroup group_rocal_parameters - * - * \return + * \return seed value */ extern "C" unsigned ROCAL_API_CALL rocalGetSeed(); -/*! - * \brief rocalCreateIntUniformRand +/*! \brief Creates a new uniform random integer parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \return + * \param start start value of the integer range + * \param end end value of the integer range + * \return RocalIntParam representing the uniform random integer parameter. */ extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntUniformRand(int start, int end); -/*! - * \brief rocalUpdateIntUniformRand +/*! \brief updates uniform random integer parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \param input_obj - * \return + * \param start start value of the integer range + * \param end start value of the integer range + * \param input_obj RocalIntParam to be updated. + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntUniformRand(int start, int end, RocalIntParam updating_obj); -/*! - * \brief rocalGetIntValue +/*! \brief gets the value of a RocalIntParam. * \ingroup group_rocal_parameters - * - * \param obj - * \return + * \param [in] obj The RocalIntParam from which to retrieve the value. + * \return integer value of the RocalIntParam. */ extern "C" int ROCAL_API_CALL rocalGetIntValue(RocalIntParam obj); -/*! - * \brief rocalGetFloatValue +/*! \brief gets the value of a RocalFloatParam. * \ingroup group_rocal_parameters - * - * \param obj - * \return + * \param [in] obj The RocalFloatParam from which to retrieve the value. + * \return float value of the RocalIntParam. */ extern "C" float ROCAL_API_CALL rocalGetFloatValue(RocalFloatParam obj); -/*! - * \brief rocalCreateFloatUniformRand +/*! \brief Creates a new uniform random float parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \return + * \param start start value of the float range + * \param end end value of the float range + * \return RocalFloatParam representing the uniform random float parameter. */ extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatUniformRand(float start, float end); -/*! - * \brief rocalCreateFloatParameter +/*! \brief Creates a new float parameter with a specified value. * \ingroup group_rocal_parameters - * - * \param val - * \return + * \param [in] val value to create float param + * \return A new RocalFloatParam representing the float parameter. */ extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatParameter(float val); -/*! - * \brief rocalCreateIntParameter +/*! \brief Creates a new int parameter with a specified value. * \ingroup group_rocal_parameters - * - * \param val - * \return + * \param [in] val value to create integer param + * \return A new RocalIntParam representing the integer parameter. */ extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntParameter(int val); -/*! - * \brief rocalUpdateFloatParameter +/*! \brief Updates a float parameter with a new value. * \ingroup group_rocal_parameters - * - * \param new_val - * \param input_obj - * \return + * \param[in] new_val The new value to update the float parameter. + * \param[in] input_obj The RocalFloatParam to be updated. + * \return RocalStatus value. */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatParameter(float new_val, RocalFloatParam input_obj); -/*! - * \brief rocalUpdateIntParameter +/*! \brief Updates a integer parameter with a new value. * \ingroup group_rocal_parameters - * - * \param new_val - * \param input_obj - * \return + * \param[in] new_val The new value to update the integer parameter. + * \param[in] input_obj The RocalIntParam to be updated. + * \return RocalStatus value. */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntParameter(int new_val, RocalIntParam input_obj); -/*! - * \brief rocalUpdateFloatUniformRand +/*! \brief updates uniform random float parameter within a specified range. * \ingroup group_rocal_parameters - * - * \param start - * \param end - * \param input_obj - * \return + * \param start start value of the float range + * \param end start value of the float range + * \param input_obj RocalFloatParam to be updated. + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatUniformRand(float start, float end, RocalFloatParam updating_obj); -/*! - * \brief rocalCreateIntRand +/*! \brief Sets the parameters for a new or existing RocalIntRandGen object * \ingroup group_rocal_parameters - * - * \param values - * \param frequencies - * \param size - * \return + * \param [in] values random int values + * \param [in] frequencies frequencies of the values + * \param size size of the array + * \return random int paraeter */ extern "C" RocalIntParam ROCAL_API_CALL rocalCreateIntRand(const int *values, const double *frequencies, unsigned size); -/*! - * \brief rocalUpdateIntRand +/*! \brief update the int random value * \ingroup group_rocal_parameters - * - * \param values - * \param frequencies - * \param size - * \param updating_obj - * \return + * \param [in] values random int values + * \param [in] frequencies frequencies of the values + * \param [in] size size of the array + * \param [in] updating_obj Rocal int Param to update + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateIntRand(const int *values, const double *frequencies, unsigned size, RocalIntParam updating_obj); -/*! - * \brief Sets the parameters for a new or existing RocalFloatRandGen object +/*! \brief Sets the parameters for a new or existing RocalFloatRandGen object * \ingroup group_rocal_parameters - * \param values - * \param frequencies - * \param size - * \return + * \param [in] values random float values + * \param [in] frequencies frequencies of the values + * \param size size of the array + * \return random float parameter */ extern "C" RocalFloatParam ROCAL_API_CALL rocalCreateFloatRand(const float *values, const double *frequencies, unsigned size); -/*! - * \brief rocalUpdateFloatRand +/*! \brief update the float random value * \ingroup group_rocal_parameters - * - * \param values - * \param frequencies - * \param size - * \param updating_obj - * \return + * \param [in] values random float values + * \param [in] frequencies frequencies of the values + * \param [in] size size of the array + * \param [in] updating_obj Rocal Float Param to update + * \return rocal status value */ extern "C" RocalStatus ROCAL_API_CALL rocalUpdateFloatRand(const float *values, const double *frequencies, unsigned size, RocalFloatParam updating_obj); -#endif // MIVISIONX_ROCAL_API_PARAMETERS_H +#endif // MIVISIONX_ROCAL_API_PARAMETERS_H diff --git a/rocAL/include/api/rocal_api_tensor.h b/rocAL/include/api/rocal_api_tensor.h new file mode 100644 index 000000000..04bf1cbf7 --- /dev/null +++ b/rocAL/include/api/rocal_api_tensor.h @@ -0,0 +1,70 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef MIVISIONX_ROCAL_API_TENSOR_H +#define MIVISIONX_ROCAL_API_TENSOR_H +#include "rocal_api_types.h" + +/*! + * \file + * \brief The AMD rocAL Library - Tensor + * + * \defgroup group_rocal_tensor API: AMD rocAL - Tensor API + * \brief The AMD rocAL Tensor. + */ + +/*! + * \brief class representing rocal tensor + */ +class rocalTensor { + public: + virtual ~rocalTensor() = default; + virtual void* buffer() = 0; + virtual unsigned copy_data(void* user_buffer, RocalOutputMemType external_mem_type = ROCAL_MEMCPY_HOST) = 0; + virtual unsigned num_of_dims() = 0; + virtual unsigned batch_size() = 0; + virtual std::vector dims() = 0; + virtual std::vector strides() = 0; + virtual RocalTensorLayout layout() = 0; + virtual RocalTensorBackend backend() = 0; + virtual RocalTensorOutputType data_type() = 0; + virtual size_t data_size() = 0; + virtual RocalROICordsType roi_type() = 0; + virtual RocalROICords* get_roi() = 0; + virtual std::vector shape() = 0; +}; + +/*! + * \brief class representing rocal tensor list + */ +class rocalTensorList { + public: + virtual uint64_t size() = 0; + virtual rocalTensor* at(size_t index) = 0; + // isDenseTensor +}; + +typedef rocalTensor* RocalTensor; +typedef rocalTensorList* RocalTensorList; +typedef std::vector RocalMetaData; + +#endif // MIVISIONX_ROCAL_API_TENSOR_H diff --git a/rocAL/include/api/rocal_api_types.h b/rocAL/include/api/rocal_api_types.h index 13af3671c..0cc6adf8b 100644 --- a/rocAL/include/api/rocal_api_types.h +++ b/rocAL/include/api/rocal_api_types.h @@ -50,23 +50,17 @@ using half_float::half; /*! \brief typedef void* Float Param * \ingroup group_rocal_types */ -typedef void *RocalFloatParam; +typedef void* RocalFloatParam; + /*! \brief typedef void* rocAL Int Param * \ingroup group_rocal_types */ -typedef void *RocalIntParam; +typedef void* RocalIntParam; + /*! \brief typedef void* rocAL Context * \ingroup group_rocal_types */ -typedef void *RocalContext; -/*! \brief typedef void* rocAL Image - * \ingroup group_rocal_types - */ -typedef void *RocalImage; -/*! \brief typedef void* rocAL Meta Data - * \ingroup group_rocal_types - */ -typedef void *RocalMetaData; +typedef void* RocalContext; /*! \brief typedef std::vectors * \ingroup group_rocal_types @@ -82,19 +76,18 @@ typedef std::vector>> JointsBatch, JointsVisibili /*! \brief Timing Info struct * \ingroup group_rocal_types */ -struct TimingInfo -{ +struct TimingInfo { long long unsigned load_time; long long unsigned decode_time; long long unsigned process_time; long long unsigned transfer_time; }; +// HRNet training expects meta data (joints_data) in below format, so added here as a type for exposing to user /*! \brief rocAL Joints Data struct - HRNet training expects meta data (joints_data) in below format, so added here as a type for exposing to user * \ingroup group_rocal_types */ -struct RocalJointsData -{ +struct RocalJointsData { ImageIDBatch image_id_batch; AnnotationIDBatch annotation_id_batch; ImagePathBatch image_path_batch; @@ -109,8 +102,7 @@ struct RocalJointsData /*! \brief rocAL Status enum * \ingroup group_rocal_types */ -enum RocalStatus -{ +enum RocalStatus { /*! \brief AMD ROCAL_OK */ ROCAL_OK = 0, @@ -131,8 +123,7 @@ enum RocalStatus /*! \brief rocAL Image Color enum * \ingroup group_rocal_types */ -enum RocalImageColor -{ +enum RocalImageColor { /*! \brief AMD ROCAL_COLOR_RGB24 */ ROCAL_COLOR_RGB24 = 0, @@ -150,8 +141,7 @@ enum RocalImageColor /*! \brief rocAL Process Mode enum * \ingroup group_rocal_types */ -enum RocalProcessMode -{ +enum RocalProcessMode { /*! \brief AMD ROCAL_PROCESS_GPU */ ROCAL_PROCESS_GPU = 0, @@ -163,8 +153,7 @@ enum RocalProcessMode /*! \brief rocAL Flip Axis enum * \ingroup group_rocal_types */ -enum RocalFlipAxis -{ +enum RocalFlipAxis { /*! \brief AMD ROCAL_FLIP_HORIZONTAL */ ROCAL_FLIP_HORIZONTAL = 0, @@ -176,8 +165,7 @@ enum RocalFlipAxis /*! \brief rocAL Image Size Evaluation Policy enum * \ingroup group_rocal_types */ -enum RocalImageSizeEvaluationPolicy -{ +enum RocalImageSizeEvaluationPolicy { /*! \brief AMD ROCAL_USE_MAX_SIZE */ ROCAL_USE_MAX_SIZE = 0, @@ -189,17 +177,16 @@ enum RocalImageSizeEvaluationPolicy ROCAL_USE_MOST_FREQUENT_SIZE = 2, /*! \brief Use the given size only if the actual decoded size is greater than the given size */ - ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED = 3, + ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED = 3, // use the given size only if the actual decoded size is greater than the given size /*! \brief Use max size if the actual decoded size is greater than max */ - ROCAL_USE_MAX_SIZE_RESTRICTED = 4, + ROCAL_USE_MAX_SIZE_RESTRICTED = 4, // use max size if the actual decoded size is greater than max }; /*! \brief rocAL Decode Device enum * \ingroup group_rocal_types */ -enum RocalDecodeDevice -{ +enum RocalDecodeDevice { /*! \brief AMD ROCAL_HW_DECODE */ ROCAL_HW_DECODE = 0, @@ -211,37 +198,46 @@ enum RocalDecodeDevice /*! \brief rocAL Tensor Layout enum * \ingroup group_rocal_types */ -enum RocalTensorLayout -{ +enum RocalTensorLayout { /*! \brief AMD ROCAL_NHWC */ ROCAL_NHWC = 0, /*! \brief AMD ROCAL_NCHW */ - ROCAL_NCHW = 1 + ROCAL_NCHW = 1, + /*! \brief AMD ROCAL_NFHWc + */ + ROCAL_NFHWC = 2, + /*! \brief AMD ROCAL_NFCHW + */ + ROCAL_NFCHW = 3, + /*! \brief AMD ROCAL_NONE + */ + ROCAL_NONE = 4 // Layout for generic tensors (Non-Image or Non-Video) }; /*! \brief rocAL Tensor Output Type enum * \ingroup group_rocal_types */ -enum RocalTensorOutputType -{ +enum RocalTensorOutputType { /*! \brief AMD ROCAL_FP32 */ ROCAL_FP32 = 0, /*! \brief AMD ROCAL_FP16 */ ROCAL_FP16 = 1, - /*! \brief AMD ROCAL_U8 + /*! \brief AMD ROCAL_UINT8 + */ + ROCAL_UINT8 = 2, + /*! \brief AMD ROCAL_INT8 */ - ROCAL_U8 = 2, + ROCAL_INT8 = 3 }; /*! \brief rocAL Decoder Type enum * \ingroup group_rocal_types */ -enum RocalDecoderType -{ +enum RocalDecoderType { /*! \brief AMD ROCAL_DECODER_TJPEG */ ROCAL_DECODER_TJPEG = 0, @@ -259,11 +255,7 @@ enum RocalDecoderType ROCAL_DECODER_VIDEO_FFMPEG_HW = 4 }; -/*! \brief rocAL Output Mem Type enum - * \ingroup group_rocal_types - */ -enum RocalOutputMemType -{ +enum RocalOutputMemType { /*! \brief AMD ROCAL_MEMCPY_HOST */ ROCAL_MEMCPY_HOST = 0, @@ -275,24 +267,24 @@ enum RocalOutputMemType ROCAL_MEMCPY_PINNED = 2 }; +// rocal external memcpy flags /*! \brief AMD rocAL external memcpy flags - force copy to user provided host memory * \ingroup group_rocal_types */ -#define ROCAL_MEMCPY_TO_HOST 1 +#define ROCAL_MEMCPY_TO_HOST 1 // force copy to user provided host memory /*! \brief AMD rocAL external memcpy flags - force copy to user provided device memory (gpu) * \ingroup group_rocal_types */ -#define ROCAL_MEMCPY_TO_DEVICE 2 +#define ROCAL_MEMCPY_TO_DEVICE 2 // force copy to user provided device memory (gpu) /*! \brief AMD rocAL external memcpy flags - for future use * \ingroup group_rocal_types */ -#define ROCAL_MEMCPY_IS_PINNED 4 +#define ROCAL_MEMCPY_IS_PINNED 4 // for future use /*! \brief rocAL Resize Scaling Mode enum * \ingroup group_rocal_types */ -enum RocalResizeScalingMode -{ +enum RocalResizeScalingMode { /*! \brief scales wrt specified size, if only resize width/height is provided the other dimension is scaled according to aspect ratio */ ROCAL_SCALING_MODE_DEFAULT = 0, @@ -304,7 +296,10 @@ enum RocalResizeScalingMode ROCAL_SCALING_MODE_NOT_SMALLER = 2, /*! \brief scales wrt to aspect ratio, so that resize width/height does not exceed specified size */ - ROCAL_SCALING_MODE_NOT_LARGER = 3 + ROCAL_SCALING_MODE_NOT_LARGER = 3, + /*! \brief scales wrt to aspect ratio, so that resize width/height does not exceed specified min and max size + */ + ROCAL_SCALING_MODE_MIN_MAX = 4 }; /*! \brief rocAL Resize Interpolation Type enum @@ -332,4 +327,34 @@ enum RocalResizeInterpolationType ROCAL_TRIANGULAR_INTERPOLATION = 5 }; -#endif // MIVISIONX_ROCAL_API_TYPES_H +/*! \brief Tensor Backend + * \ingroup group_rocal_types + */ +enum RocalTensorBackend { + /*! \brief ROCAL_CPU + */ + ROCAL_CPU = 0, + /*! \brief ROCAL_GPU + */ + ROCAL_GPU = 1 +}; + +/*! \brief Tensor ROI type + * \ingroup group_rocal_types + */ +enum class RocalROICordsType { + /*! \brief ROCAL_LTRB + */ + ROCAL_LTRB = 0, + /*! \brief ROCAL_XYWH + */ + ROCAL_XYWH = 1 +}; + +/*! \brief RocalROICords struct + * \ingroup group_rocal_types + */ +typedef struct { + unsigned x1, y1, x2, y2; +} RocalROICords; +#endif // MIVISIONX_ROCAL_API_TYPES_H diff --git a/rocAL/include/augmentations/color_augmentations/node_blend.h b/rocAL/include/augmentations/color_augmentations/node_blend.h index cee6ac60f..701697e4f 100644 --- a/rocAL/include/augmentations/color_augmentations/node_blend.h +++ b/rocAL/include/augmentations/color_augmentations/node_blend.h @@ -25,19 +25,19 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" -class BlendNode : public Node -{ -public: - explicit BlendNode(const std::vector &inputs, const std::vector &outputs); +class BlendNode : public Node { + public: + explicit BlendNode(const std::vector &inputs, const std::vector &outputs); BlendNode() = delete; void init(float ratio); - void init(FloatParam* ratio); + void init(FloatParam *ratio); -protected: + protected: void update_node() override; void create_node() override; -private: + + private: ParameterVX _ratio; - constexpr static float RATIO_RANGE [2] = {0.1, 0.9}; + constexpr static float RATIO_RANGE[2] = {0.1, 0.9}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/color_augmentations/node_blur.h b/rocAL/include/augmentations/color_augmentations/node_blur.h index 341c109ff..3fae47e22 100644 --- a/rocAL/include/augmentations/color_augmentations/node_blur.h +++ b/rocAL/include/augmentations/color_augmentations/node_blur.h @@ -21,24 +21,23 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class BlurNode : public Node -{ -public: - BlurNode(const std::vector &inputs, const std::vector &outputs); +class BlurNode : public Node { + public: + BlurNode(const std::vector &inputs, const std::vector &outputs); BlurNode() = delete; - void init(int sdev); - void init(IntParam *sdev); + void init(int kernel_size); + void init(IntParam *kernel_size_param); -protected: + protected: void update_node() override; void create_node() override; -private: - ParameterVX _sdev; - constexpr static int SDEV_RANGE [2] = {3, 9}; + private: + ParameterVX _kernel_size; + constexpr static int KERNEL_SIZE_RANGE[2] = {3, 9}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_brightness.h b/rocAL/include/augmentations/color_augmentations/node_brightness.h index b672c81eb..21369651c 100644 --- a/rocAL/include/augmentations/color_augmentations/node_brightness.h +++ b/rocAL/include/augmentations/color_augmentations/node_brightness.h @@ -21,27 +21,26 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class BrightnessNode : public Node -{ -public: - BrightnessNode(const std::vector &inputs, const std::vector &outputs); +class BrightnessNode : public Node { + public: + BrightnessNode(const std::vector &inputs, const std::vector &outputs); BrightnessNode() = delete; - void init( float alpha, float beta); - void init( FloatParam* alpha_param, FloatParam* beta_param); + void init(float alpha, float beta); + void init(FloatParam *alpha_param, FloatParam *beta_param); -protected: - void create_node() override ; + protected: + void create_node() override; void update_node() override; -private: + private: ParameterVX _alpha; ParameterVX _beta; - constexpr static float ALPHA_RANGE [2] = {0.1, 1.95}; - constexpr static float BETA_RANGE [2] = {0, 25}; + constexpr static float ALPHA_RANGE[2] = {0.1, 1.95}; + constexpr static float BETA_RANGE[2] = {0, 25}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/color_augmentations/node_color_temperature.h b/rocAL/include/augmentations/color_augmentations/node_color_temperature.h index ab1bd172f..82b23d3ad 100644 --- a/rocAL/include/augmentations/color_augmentations/node_color_temperature.h +++ b/rocAL/include/augmentations/color_augmentations/node_color_temperature.h @@ -21,24 +21,24 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class ColorTemperatureNode : public Node -{ -public: - ColorTemperatureNode(const std::vector &inputs, const std::vector &outputs); +class ColorTemperatureNode : public Node { + public: + ColorTemperatureNode(const std::vector &inputs, const std::vector &outputs); ColorTemperatureNode() = delete; void init(int adjustment); void init(IntParam *adjustment); -protected: - void create_node() override ; + protected: + void create_node() override; void update_node() override; -private: + + private: ParameterVX _adj_value_param; - constexpr static int ADJUSTMENT_RANGE [2] = {-99, 99}; + constexpr static int ADJUSTMENT_RANGE[2] = {-99, 99}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/color_augmentations/node_color_twist.h b/rocAL/include/augmentations/color_augmentations/node_color_twist.h index eddbeb4f6..28a44af43 100644 --- a/rocAL/include/augmentations/color_augmentations/node_color_twist.h +++ b/rocAL/include/augmentations/color_augmentations/node_color_twist.h @@ -24,28 +24,25 @@ THE SOFTWARE. #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class ColorTwistBatchNode : public Node -{ -public: - ColorTwistBatchNode(const std::vector &inputs, const std::vector &outputs); - ColorTwistBatchNode() = delete; +class ColorTwistNode : public Node { + public: + ColorTwistNode(const std::vector &inputs, const std::vector &outputs); + ColorTwistNode() = delete; void init(float alpha, float beta, float hue, float sat); - void init(FloatParam *alpha, FloatParam *beta, FloatParam *hue, FloatParam *sat); + void init(FloatParam *alpha_param, FloatParam *beta_param, FloatParam *hue_param, FloatParam *sat_param); -protected: + protected: void create_node() override; void update_node() override; -private: + private: ParameterVX _alpha; ParameterVX _beta; ParameterVX _hue; ParameterVX _sat; - - constexpr static float ALPHA_RANGE [2] = {0.1, 1.95}; - constexpr static float BETA_RANGE [2] = {0.1, 25.0}; - constexpr static float HUE_RANGE [2] = {5.0, 170.0}; - constexpr static float SAT_RANGE [2] = {0.1, 0.4}; -}; \ No newline at end of file + constexpr static float ALPHA_RANGE[2] = {0.1, 1.95}; + constexpr static float BETA_RANGE[2] = {0.1, 25.0}; + constexpr static float HUE_RANGE[2] = {5.0, 170.0}; + constexpr static float SAT_RANGE[2] = {0.1, 0.4}; +}; diff --git a/rocAL/include/augmentations/color_augmentations/node_contrast.h b/rocAL/include/augmentations/color_augmentations/node_contrast.h index 075673c90..6567769a8 100644 --- a/rocAL/include/augmentations/color_augmentations/node_contrast.h +++ b/rocAL/include/augmentations/color_augmentations/node_contrast.h @@ -22,25 +22,24 @@ THE SOFTWARE. #pragma once #include + +#include "graph.h" #include "node.h" #include "parameter_vx.h" -#include "graph.h" -class RocalContrastNode : public Node -{ -public: - RocalContrastNode(const std::vector &inputs, const std::vector &outputs); - RocalContrastNode() = delete; - void init(int min, int max); - void init(IntParam *min, IntParam * max); +class ContrastNode : public Node { + public: + ContrastNode(const std::vector &inputs, const std::vector &outputs); + ContrastNode() = delete; + void init(float contrast_factor, float contrast_center); + void init(FloatParam *contrast_factor_param, FloatParam *contrast_center_param); -protected: - void create_node() override ; + protected: + void create_node() override; void update_node() override; -private: - ParameterVX _min; - ParameterVX _max; - constexpr static int CONTRAST_MIN_RANGE [2] = {0, 30}; - constexpr static int CONTRAST_MAX_RANGE [2] = {60, 90}; -}; \ No newline at end of file + private: + ParameterVX _factor, _center; + constexpr static float CONTRAST_FACTOR_RANGE[2] = {0.1, 1.95}; + constexpr static float CONTRAST_CENTER_RANGE[2] = {60, 90}; +}; diff --git a/rocAL/include/augmentations/color_augmentations/node_exposure.h b/rocAL/include/augmentations/color_augmentations/node_exposure.h index 4f1cb95f4..19690d236 100644 --- a/rocAL/include/augmentations/color_augmentations/node_exposure.h +++ b/rocAL/include/augmentations/color_augmentations/node_exposure.h @@ -21,23 +21,23 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class ExposureNode : public Node -{ -public: - ExposureNode(const std::vector &inputs, const std::vector &outputs); +class ExposureNode : public Node { + public: + ExposureNode(const std::vector &inputs, const std::vector &outputs); ExposureNode() = delete; - void init(float shift); - void init(FloatParam *shift); -protected: + void init(float exposure_factor); + void init(FloatParam *exposure_factor_param); + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _shift; - vx_array _width_array ,_height_array; - constexpr static float SHIFT_RANGE [2] = {0.15, 0.95}; -}; \ No newline at end of file + + private: + ParameterVX _exposure_factor; + constexpr static float EXPOSURE_FACTOR_RANGE[2] = {0.15, 0.95}; +}; diff --git a/rocAL/include/augmentations/color_augmentations/node_gamma.h b/rocAL/include/augmentations/color_augmentations/node_gamma.h index b113fd96b..f561ff4ee 100644 --- a/rocAL/include/augmentations/color_augmentations/node_gamma.h +++ b/rocAL/include/augmentations/color_augmentations/node_gamma.h @@ -25,19 +25,18 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" - -class GammaNode : public Node -{ -public: - GammaNode(const std::vector &inputs, const std::vector &outputs); +class GammaNode : public Node { + public: + GammaNode(const std::vector &inputs, const std::vector &outputs); GammaNode() = delete; - void init(float shift); - void init(FloatParam *shift); + void init(float gamma); + void init(FloatParam *gamma_param); -protected: + protected: void update_node() override; void create_node() override; -private: - ParameterVX _shift; - constexpr static float SHIFT_RANGE [2] = {0.3, 7.00}; + + private: + ParameterVX _gamma; + constexpr static float GAMMA_RANGE[2] = {0.3, 7.00}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_hue.h b/rocAL/include/augmentations/color_augmentations/node_hue.h index 79f1639bd..535d5cfef 100644 --- a/rocAL/include/augmentations/color_augmentations/node_hue.h +++ b/rocAL/include/augmentations/color_augmentations/node_hue.h @@ -25,18 +25,18 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" - -class HueNode : public Node -{ -public: - HueNode(const std::vector &inputs, const std::vector &outputs); +class HueNode : public Node { + public: + HueNode(const std::vector &inputs, const std::vector &outputs); HueNode() = delete; void init(float hue); void init(FloatParam *hue); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _hue; - constexpr static float HUE_RANGE [2] = {-359.0, 359.0}; + constexpr static float HUE_RANGE[2] = {-359.0, 359.0}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_saturation.h b/rocAL/include/augmentations/color_augmentations/node_saturation.h index a503c4c85..6ec1d2c79 100644 --- a/rocAL/include/augmentations/color_augmentations/node_saturation.h +++ b/rocAL/include/augmentations/color_augmentations/node_saturation.h @@ -25,18 +25,18 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" - -class SatNode : public Node -{ -public: - SatNode(const std::vector &inputs, const std::vector &outputs); - SatNode() = delete; +class SaturationNode : public Node { + public: + SaturationNode(const std::vector &inputs, const std::vector &outputs); + SaturationNode() = delete; void init(float sat); void init(FloatParam *sat); -protected: + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _sat; // For saturation - constexpr static float SAT_RANGE [2] = {-0.5, 0.5}; + + private: + ParameterVX _saturation; + constexpr static float SAT_RANGE[2] = {-0.5, 0.5}; }; diff --git a/rocAL/include/augmentations/color_augmentations/node_vignette.h b/rocAL/include/augmentations/color_augmentations/node_vignette.h index 9af231b09..8ef42ac1f 100644 --- a/rocAL/include/augmentations/color_augmentations/node_vignette.h +++ b/rocAL/include/augmentations/color_augmentations/node_vignette.h @@ -21,22 +21,23 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class VignetteNode : public Node -{ -public: - VignetteNode(const std::vector &inputs, const std::vector &outputs); - VignetteNode () = delete; +class VignetteNode : public Node { + public: + VignetteNode(const std::vector &inputs, const std::vector &outputs); + VignetteNode() = delete; void init(float sdev); void init(FloatParam *sdev); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _sdev; - constexpr static float SDEV_RANGE [2] = {40 , 60}; + constexpr static float SDEV_RANGE[2] = {40, 60}; }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_fog.h b/rocAL/include/augmentations/effects_augmentations/node_fog.h index ea0309dc8..035bb6701 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_fog.h +++ b/rocAL/include/augmentations/effects_augmentations/node_fog.h @@ -24,19 +24,19 @@ THE SOFTWARE. #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -class FogNode : public Node -{ -public: - FogNode(const std::vector &inputs, const std::vector &outputs); + +class FogNode : public Node { + public: + FogNode(const std::vector &inputs, const std::vector &outputs); FogNode() = delete; void init(float fog_param); void init(FloatParam *fog_param); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _fog_param; - constexpr static float FOG_VALUE_RANGE [2] = {0.2, 0.8}; + constexpr static float FOG_VALUE_RANGE[2] = {0.2, 0.8}; }; - - diff --git a/rocAL/include/augmentations/effects_augmentations/node_jitter.h b/rocAL/include/augmentations/effects_augmentations/node_jitter.h index 2ddc58645..12706d2a2 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_jitter.h +++ b/rocAL/include/augmentations/effects_augmentations/node_jitter.h @@ -25,18 +25,19 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" - -class JitterNode : public Node -{ -public: - JitterNode(const std::vector &inputs, const std::vector &outputs); +class JitterNode : public Node { + public: + JitterNode(const std::vector &inputs, const std::vector &outputs); JitterNode() = delete; - void init(int kernel_size); - void init(IntParam *kernel_size); -protected: + void init(int kernel_size, int seed); + void init(IntParam *kernel_size, int seed); + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _kernel_size; - constexpr static int KERNEL_SIZE [2] = {2, 5}; + int _seed; + constexpr static int KERNEL_SIZE[2] = {2, 5}; }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_pixelate.h b/rocAL/include/augmentations/effects_augmentations/node_pixelate.h index ab3b965dc..48547b9d3 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_pixelate.h +++ b/rocAL/include/augmentations/effects_augmentations/node_pixelate.h @@ -22,18 +22,18 @@ THE SOFTWARE. #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" -#include "graph.h" - -class PixelateNode : public Node -{ -public: - PixelateNode(const std::vector &inputs, const std::vector &outputs); +class PixelateNode : public Node { + public: + PixelateNode(const std::vector &inputs, const std::vector &outputs); PixelateNode() = delete; -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_rain.h b/rocAL/include/augmentations/effects_augmentations/node_rain.h index 9b0cf80aa..2092ede01 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_rain.h +++ b/rocAL/include/augmentations/effects_augmentations/node_rain.h @@ -26,23 +26,24 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" -class RainNode : public Node -{ -public: - RainNode(const std::vector &inputs, const std::vector &outputs); +class RainNode : public Node { + public: + RainNode(const std::vector &inputs, const std::vector &outputs); RainNode() = delete; void init(float rain_value, int rain_width, int rain_height, float rain_transparency); - void init(FloatParam *rain_value, IntParam *rain_width, IntParam *rain_height, FloatParam *rain_transparency); -protected: + void init(FloatParam *rain_value, IntParam *rain_width, IntParam *rain_height, FloatParam *rain_transparency); + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _rain_value; ParameterVX _rain_width; ParameterVX _rain_height; ParameterVX _rain_transparency; - constexpr static float RAIN_VALUE_RANGE [2] = {0.15, 0.95}; - constexpr static int RAIN_WIDTH_RANGE [2] = {1, 2}; - constexpr static int RAIN_HEIGHT_RANGE [2] = {15, 17}; - constexpr static float RAIN_TRANSPARENCY_RANGE [2] = {0.2, 0.3}; + constexpr static float RAIN_VALUE_RANGE[2] = {0.15, 0.95}; + constexpr static int RAIN_WIDTH_RANGE[2] = {1, 2}; + constexpr static int RAIN_HEIGHT_RANGE[2] = {15, 17}; + constexpr static float RAIN_TRANSPARENCY_RANGE[2] = {0.2, 0.3}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/effects_augmentations/node_snow.h b/rocAL/include/augmentations/effects_augmentations/node_snow.h index ba0687f04..6b21cff2c 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_snow.h +++ b/rocAL/include/augmentations/effects_augmentations/node_snow.h @@ -26,17 +26,18 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" -class SnowNode : public Node -{ -public: - SnowNode(const std::vector &inputs, const std::vector &outputs); +class SnowNode : public Node { + public: + SnowNode(const std::vector &inputs, const std::vector &outputs); SnowNode() = delete; - void init(float shift); - void init(FloatParam *shift); -protected: + void init(float snow_value); + void init(FloatParam *snow_value_param); + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _shift; - constexpr static float SNOW_VALUE_RANGE [2] = {0.1, 0.8}; + + private: + ParameterVX _snow_value; + constexpr static float SNOW_VALUE_RANGE[2] = {0.1, 0.8}; }; diff --git a/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h b/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h index 0599ea611..e268a0621 100644 --- a/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h +++ b/rocAL/include/augmentations/effects_augmentations/node_snp_noise.h @@ -22,23 +22,27 @@ THE SOFTWARE. #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class SnPNoiseNode : public Node -{ -public: - SnPNoiseNode(const std::vector &inputs, const std::vector &outputs); +class SnPNoiseNode : public Node { + public: + SnPNoiseNode(const std::vector &inputs, const std::vector &outputs); SnPNoiseNode() = delete; - void init(float sdev); - void init(FloatParam *sdev); -protected: + void init(float noise_prob, float salt_prob, float salt_value, float pepper_value, int seed); + void init(FloatParam *noise_prob_param, FloatParam *salt_prob_param, FloatParam *salt_value_param, FloatParam *pepper_value_param, int seed); + + protected: void create_node() override; void update_node() override; -private: - ParameterVX _sdev; - constexpr static float SDEV_RANGE [2] = {0.1, 0.15}; -}; + private: + ParameterVX _noise_prob, _salt_prob, _salt_value, _pepper_value; + constexpr static float NOISE_PROB_RANGE[2] = {0.1, 1}; + constexpr static float SALT_PROB_RANGE[2] = {0.1, 1}; + constexpr static float SALT_RANGE[2] = {0.1, 1}; + constexpr static float PEPPER_RANGE[2] = {0, 0.5}; + int _seed; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_crop.h b/rocAL/include/augmentations/geometry_augmentations/node_crop.h index 66ffd4139..a09abe60d 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_crop.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_crop.h @@ -22,28 +22,29 @@ THE SOFTWARE. #pragma once #include "node.h" -#include "parameter_factory.h" #include "parameter_crop_factory.h" +#include "parameter_factory.h" #include "parameter_rocal_crop.h" -class CropNode : public Node -{ -public: - CropNode(const std::vector &inputs, const std::vector &outputs); +class CropNode : public Node { + public: + CropNode(const std::vector &inputs, const std::vector &outputs); CropNode() = delete; + ~CropNode(); void init(unsigned int crop_h, unsigned int crop_w, float x_drift, float y_drift); void init(unsigned int crop_h, unsigned int crop_w); - void init( FloatParam *crop_h_factor, FloatParam *crop_w_factor, FloatParam * x_drift, FloatParam * y_drift); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + void init(FloatParam *crop_h_factor, FloatParam *crop_w_factor, FloatParam *x_drift, FloatParam *y_drift); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } -protected: - void create_node() override ; + + protected: + void create_node() override; void update_node() override; -private: + void create_crop_tensor(); + void *_crop_coordinates = nullptr; + vx_tensor _crop_tensor = nullptr; - size_t _dest_width; - size_t _dest_height; + private: std::shared_ptr _crop_param; }; - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h b/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h index 870232d53..654fa2a4f 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_crop_mirror_normalize.h @@ -21,30 +21,28 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" +#include "node_crop.h" #include "parameter_crop_factory.h" +#include "parameter_factory.h" #include "parameter_vx.h" -class CropMirrorNormalizeNode : public Node -{ -public: - CropMirrorNormalizeNode(const std::vector &inputs, - const std::vector &outputs); + +class CropMirrorNormalizeNode : public CropNode { + public: + CropMirrorNormalizeNode(const std::vector &inputs, + const std::vector &outputs); CropMirrorNormalizeNode() = delete; - void init(int crop_h, int crop_w, float start_x, float start_y, float mean, float std_dev, IntParam *mirror); - vx_array return_mirror(){ return _mirror.default_array(); } + void init(int crop_h, int crop_w, float start_x, float start_y, std::vector &mean, std::vector &std_dev, IntParam *mirror); + vx_array return_mirror() { return _mirror.default_array(); } std::shared_ptr return_crop_param() { return _crop_param; } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } -protected: - void create_node() override ; + + protected: + void create_node() override; void update_node() override; -private: + + private: std::shared_ptr _crop_param; - std::vector _mean_vx, _std_dev_vx; - vx_array _mean_array, _std_dev_array; - float _mean; - float _std_dev; + vx_array _multiplier_vx_array, _offset_vx_array; + std::vector _mean, _std_dev; ParameterVX _mirror; - constexpr static int MIRROR_RANGE [2] = {0, 1}; + constexpr static int MIRROR_RANGE[2] = {0, 1}; }; \ No newline at end of file diff --git a/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h b/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h index f5ce3399a..79cb5b7f7 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_crop_resize.h @@ -21,30 +21,25 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" +#include "node_crop.h" #include "parameter_crop_factory.h" +#include "parameter_factory.h" -class CropResizeNode : public Node -{ -public: - CropResizeNode(const std::vector &inputs, const std::vector &outputs); +class CropResizeNode : public CropNode { + public: + CropResizeNode(const std::vector &inputs, const std::vector &outputs); CropResizeNode() = delete; void init(float area, float aspect_ratio, float x_center_drift, float y_center_drift); - void init(FloatParam* area, FloatParam *aspect_ratio, FloatParam * x_drift_factor, FloatParam * y_drift_factor); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + void init(FloatParam *area, FloatParam *aspect_ratio, FloatParam *x_drift_factor, FloatParam *y_drift_factor); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } -protected: + + protected: void create_node() override; void update_node() override; -private: - size_t _dest_width; - size_t _dest_height; + private: std::shared_ptr _crop_param; - vx_array _dst_roi_width ,_dst_roi_height; + vx_array _dst_roi_width, _dst_roi_height; }; - - - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h b/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h index 31e6c5173..5069a8027 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_fisheye.h @@ -22,18 +22,17 @@ THE SOFTWARE. #pragma once -#include "node.h" #include "graph.h" +#include "node.h" - -class FisheyeNode : public Node -{ -public: - FisheyeNode(const std::vector &inputs, const std::vector &outputs); +class FisheyeNode : public Node { + public: + FisheyeNode(const std::vector &inputs, const std::vector &outputs); FisheyeNode() = delete; -protected: + protected: void create_node() override; void update_node() override; -private: + + private: }; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_flip.h b/rocAL/include/augmentations/geometry_augmentations/node_flip.h index d46d8a33d..c2168adac 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_flip.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_flip.h @@ -22,26 +22,24 @@ THE SOFTWARE. #pragma once #include "node.h" -#include "parameter_vx.h" #include "parameter_factory.h" +#include "parameter_vx.h" -class FlipNode : public Node -{ -public: - FlipNode(const std::vector &inputs, const std::vector &outputs); +class FlipNode : public Node { + public: + FlipNode(const std::vector &inputs, const std::vector &outputs); FlipNode() = delete; - void init(int flip_axis); - void init(IntParam *flip_axis); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } - vx_array get_flip_axis() { return _flip_axis.default_array(); } -protected: + void init(int h_flag, int v_flag); + void init(IntParam *h_flag_param, IntParam *v_flag_param); + vx_array get_horizontal_flip() { return _horizontal.default_array(); } + vx_array get_vertical_flip() { return _vertical.default_array(); } + + protected: void create_node() override; void update_node() override; -private: - int _axis; - ParameterVX _flip_axis; - constexpr static int FLIP_SIZE [2] = {0, 2}; -}; \ No newline at end of file + + private: + ParameterVX _horizontal, _vertical; + constexpr static int HORIZONTAL_RANGE[2] = {0, 1}; + constexpr static int VERTICAL_RANGE[2] = {0, 1}; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h b/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h index 4e5cfb3e6..9a222a511 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_lens_correction.h @@ -25,21 +25,20 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" - - -class LensCorrectionNode : public Node -{ -public: - LensCorrectionNode(const std::vector &inputs, const std::vector &outputs); +class LensCorrectionNode : public Node { + public: + LensCorrectionNode(const std::vector &inputs, const std::vector &outputs); LensCorrectionNode() = delete; void init(float strength, float zoom); void init(FloatParam *strength, FloatParam *zoom); -protected: + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _strength; ParameterVX _zoom; - constexpr static float STRENGTH_RANGE [2] = {0.05, 3.0}; - constexpr static float ZOOM_RANGE [2] = {1.0, 1.3}; + constexpr static float STRENGTH_RANGE[2] = {0.05, 3.0}; + constexpr static float ZOOM_RANGE[2] = {1.0, 1.3}; }; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h b/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h index 17eac5ff5..d22948b44 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_random_crop.h @@ -21,30 +21,25 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" +#include "node_crop.h" #include "parameter_crop_factory.h" +#include "parameter_factory.h" -class RandomCropNode : public Node -{ -public: - RandomCropNode(const std::vector &inputs, const std::vector &outputs); +class RandomCropNode : public CropNode { + public: + RandomCropNode(const std::vector &inputs, const std::vector &outputs); RandomCropNode() = delete; - void init(float area, float aspect_ratio, float x_drift, float y_drift); void init(FloatParam *crop_area_factor, FloatParam *crop_aspect_ratio, FloatParam *x_drift, FloatParam *y_drift, int num_of_attempts); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } - int get_num_of_attempts(){return _num_of_attempts;} + int get_num_of_attempts() { return _num_of_attempts; } -protected: + protected: void create_node() override; void update_node() override; -private: - size_t _dest_width; - size_t _dest_height; - int _num_of_attempts = 20; + private: + int _num_of_attempts = 20; std::shared_ptr _crop_param; }; - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_resize.h b/rocAL/include/augmentations/geometry_augmentations/node_resize.h index f62e92c08..43de6a2e2 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_resize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_resize.h @@ -26,12 +26,8 @@ THE SOFTWARE. class ResizeNode : public Node { public: - ResizeNode(const std::vector &inputs, const std::vector &outputs); + ResizeNode(const std::vector &inputs, const std::vector &outputs); ResizeNode() = delete; - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } void init(unsigned dest_width, unsigned dest_height, RocalResizeScalingMode scaling_mode, const std::vector& max_size, RocalResizeInterpolationType interpolation_type); void adjust_out_roi_size(); diff --git a/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h b/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h index 464d042b4..2d425bbc9 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_resize_crop_mirror.h @@ -21,31 +21,34 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_vx.h" -#include "parameter_factory.h" +#include "node_crop.h" #include "parameter_crop_factory.h" +#include "parameter_factory.h" +#include "parameter_vx.h" +#include "rocal_api_types.h" -class CropParam; - -class ResizeCropMirrorNode : public Node -{ -public: - ResizeCropMirrorNode(const std::vector &inputs, const std::vector &outputs); +class ResizeCropMirrorNode : public CropNode { + public: + ResizeCropMirrorNode(const std::vector &inputs, const std::vector &outputs); ResizeCropMirrorNode() = delete; - void init(unsigned int crop_h, unsigned int crop_w, IntParam *mirror); - void init( FloatParam *crop_h_factor, FloatParam *crop_w_factor, IntParam *mirror); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + void init(unsigned int crop_h, unsigned int crop_w, IntParam *mirror, + RocalResizeInterpolationType interpolation_type = RocalResizeInterpolationType::ROCAL_LINEAR_INTERPOLATION); + void init(FloatParam *crop_h_factor, FloatParam *crop_w_factor, IntParam *mirror, + RocalResizeInterpolationType interpolation_type = RocalResizeInterpolationType::ROCAL_LINEAR_INTERPOLATION); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } vx_array get_mirror() { return _mirror.default_array(); } -protected: + void adjust_out_roi_size(); + + protected: void create_node() override; void update_node() override; -private: + + private: std::shared_ptr _crop_param; - vx_array _dst_roi_width ,_dst_roi_height; + vx_array _dst_roi_width, _dst_roi_height; ParameterVX _mirror; - constexpr static int MIRROR_RANGE [2] = {0, 1}; + constexpr static int MIRROR_RANGE[2] = {0, 1}; + int _interpolation_type; }; - diff --git a/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h b/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h index bafd5f6d5..a1205676c 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_resize_mirror_normalize.h @@ -25,26 +25,27 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_vx.h" -class ResizeMirrorNormalizeNode : public Node -{ -public: - ResizeMirrorNormalizeNode(const std::vector &inputs, const std::vector &outputs); +class ResizeMirrorNormalizeNode : public Node { + public: + ResizeMirrorNormalizeNode(const std::vector &inputs, const std::vector &outputs); ResizeMirrorNormalizeNode() = delete; - void init(std::vector& mean, std::vector& std_dev, IntParam *mirror); - vx_array get_dst_width() { return _dst_roi_width; } - vx_array get_dst_height() { return _dst_roi_height;} - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } - vx_array return_mirror(){ return _mirror.default_array(); } -protected: + void init(unsigned dest_width, unsigned dest_height, RocalResizeScalingMode scaling_mode, std::vector max_size, + RocalResizeInterpolationType interpolation_type, std::vector &mean, std::vector &std_dev, IntParam *mirror); + void adjust_out_roi_size(); + vx_array get_mirror() { return _mirror.default_array(); } + + protected: void create_node() override; void update_node() override; -private: - vx_array _dst_roi_width, _dst_roi_height; - std::vector _dest_width_val, _dest_height_val; - vx_array _mean_array, _std_dev_array; - std::vector _mean; - std::vector _std_dev; + + private: + vx_array _mean_vx_array, _std_dev_vx_array, _mirror_vx_array, _dst_roi_width, _dst_roi_height; + std::vector _mean, _std_dev; + int _interpolation_type; ParameterVX _mirror; constexpr static int _mirror_range[2] = {0, 1}; + RocalResizeScalingMode _scaling_mode; + unsigned _src_width, _src_height, _dst_width, _dst_height, _out_width, _out_height; + unsigned _max_width = 0, _max_height = 0; + std::vector _dst_roi_width_vec, _dst_roi_height_vec; }; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_rotate.h b/rocAL/include/augmentations/geometry_augmentations/node_rotate.h index f2abab09b..a9041a5af 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_rotate.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_rotate.h @@ -21,30 +21,28 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" +#include "rocal_api_types.h" -class RotateNode : public Node -{ -public: - RotateNode(const std::vector &inputs, const std::vector &outputs); +class RotateNode : public Node { + public: + RotateNode(const std::vector &inputs, const std::vector &outputs); RotateNode() = delete; - void init(float angle); - void init(FloatParam *angle); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } - vx_array get_src_width() { return _src_roi_width; } - vx_array get_src_height() { return _src_roi_height; } + void init(float angle, RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + void init(FloatParam *angle_param, RocalResizeInterpolationType interpolation_type = ROCAL_LINEAR_INTERPOLATION); + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } vx_array get_angle() { return _angle.default_array(); } -protected: + protected: void create_node() override; void update_node() override; -private: - ParameterVX _angle; - vx_array _dst_roi_width,_dst_roi_height; - constexpr static float ROTATE_ANGLE_RANGE [2] = {0, 180}; -}; \ No newline at end of file + private: + ParameterVX _angle; + int _interpolation_type; + constexpr static float ROTATE_ANGLE_RANGE[2] = {0, 180}; +}; diff --git a/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h b/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h index 476cbacd2..c1bfa6d89 100644 --- a/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h +++ b/rocAL/include/augmentations/geometry_augmentations/node_warp_affine.h @@ -21,34 +21,35 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class WarpAffineNode : public Node -{ -public: - WarpAffineNode(const std::vector &inputs, const std::vector &outputs); +class WarpAffineNode : public Node { + public: + WarpAffineNode(const std::vector &inputs, const std::vector &outputs); WarpAffineNode() = delete; - void init(float x0, float x1, float y0, float y1, float o0, float o1); - void init(FloatParam* x0, FloatParam* x1, FloatParam* y0, FloatParam* y1, FloatParam* o0, FloatParam* o1); -protected: + void init(float x0, float x1, float y0, float y1, float o0, float o1, RocalResizeInterpolationType interpolation_type); + void init(FloatParam *x0, FloatParam *x1, FloatParam *y0, FloatParam *y1, + FloatParam *o0, FloatParam *o1, RocalResizeInterpolationType interpolation_type); + + protected: void create_node() override; void update_node() override; -private: + + private: ParameterVX _x0; ParameterVX _x1; ParameterVX _y0; ParameterVX _y1; ParameterVX _o0; ParameterVX _o1; - std::vector _affine; - vx_array _dst_roi_width,_dst_roi_height; vx_array _affine_array; - constexpr static float COEFFICIENT_RANGE_0 [2] = {-0.35, 0.35}; - constexpr static float COEFFICIENT_RANGE_1 [2] = {0.65, 1.35}; - constexpr static float COEFFICIENT_RANGE_OFFSET [2] = {-10.0, 10.0}; + constexpr static float COEFFICIENT_RANGE_0[2] = {-0.35, 0.35}; + constexpr static float COEFFICIENT_RANGE_1[2] = {0.65, 1.35}; + constexpr static float COEFFICIENT_RANGE_OFFSET[2] = {-10.0, 10.0}; void update_affine_array(); + int _interpolation_type; }; diff --git a/rocAL/include/augmentations/node_copy.h b/rocAL/include/augmentations/node_copy.h index 2cd00d1bf..27879b9ab 100644 --- a/rocAL/include/augmentations/node_copy.h +++ b/rocAL/include/augmentations/node_copy.h @@ -21,16 +21,15 @@ THE SOFTWARE. */ #pragma once -#include "node.h" #include "graph.h" +#include "node.h" -class CopyNode : public Node -{ -public: - CopyNode(const std::vector &inputs, const std::vector &outputs); +class CopyNode : public Node { + public: + CopyNode(const std::vector &inputs, const std::vector &outputs); CopyNode() = delete; -protected: + protected: void create_node() override; - void update_node() override {}; + void update_node() override{}; }; diff --git a/rocAL/include/augmentations/node_nop.h b/rocAL/include/augmentations/node_nop.h index 3876a2572..6c4dce974 100644 --- a/rocAL/include/augmentations/node_nop.h +++ b/rocAL/include/augmentations/node_nop.h @@ -21,15 +21,15 @@ THE SOFTWARE. */ #pragma once -#include "node.h" #include "graph.h" +#include "node.h" -class NopNode : public Node -{ -public: - NopNode(const std::vector &inputs, const std::vector &outputs); +class NopNode : public Node { + public: + NopNode(const std::vector &inputs, const std::vector &outputs); NopNode() = delete; -protected: + + protected: void create_node() override; void update_node() override; }; diff --git a/rocAL/include/augmentations/node_sequence_rearrange.h b/rocAL/include/augmentations/node_sequence_rearrange.h index 247b7728d..d971c1ca8 100644 --- a/rocAL/include/augmentations/node_sequence_rearrange.h +++ b/rocAL/include/augmentations/node_sequence_rearrange.h @@ -21,22 +21,21 @@ THE SOFTWARE. */ #pragma once +#include "graph.h" #include "node.h" #include "parameter_factory.h" #include "parameter_vx.h" -#include "graph.h" -class SequenceRearrangeNode : public Node -{ -public: - SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs); +class SequenceRearrangeNode : public Node { + public: + SequenceRearrangeNode(const std::vector &inputs, const std::vector &outputs); SequenceRearrangeNode() = delete; - void init(unsigned int* new_order, unsigned int new_sequence_length, unsigned int sequence_length, unsigned int sequence_count); -protected: + void init(std::vector &new_order); + + protected: void create_node() override; void update_node() override; -private: + + private: std::vector _new_order; - unsigned int _new_sequence_length, _sequence_length, _sequence_count; - vx_array _sequence_array; }; diff --git a/rocAL/include/augmentations/node_ssd_random_crop.h b/rocAL/include/augmentations/node_ssd_random_crop.h index 63a5bd042..31c3dce49 100644 --- a/rocAL/include/augmentations/node_ssd_random_crop.h +++ b/rocAL/include/augmentations/node_ssd_random_crop.h @@ -20,76 +20,70 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include "node.h" -#include "parameter_factory.h" +#include "node_crop.h" #include "parameter_crop_factory.h" - +#include "parameter_factory.h" // todo:: move this to common header -template +template class SeededRNG { - /* - * @param batch_size How many RNGs to store - * @param state_size How many seed are used to initialize one RNG. Used to lower probablity of - * collisions between seeds used to initialize RNGs in different operators. - */ -public: - SeededRNG (int batch_size = 128) { - std::random_device source; - _batch_size = batch_size; - std::size_t _random_data_size = state_size * batch_size ; - std::vector random_data(_random_data_size); - std::generate(random_data.begin(), random_data.end(), std::ref(source)); - _rngs.reserve(batch_size); - for (int i=0; i < (int)(_batch_size*state_size); i += state_size) { - std::seed_seq seeds(std::begin(random_data) + i, std::begin(random_data)+ i +state_size); - _rngs.emplace_back(T(seeds)); - } - } + /* + * @param batch_size How many RNGs to store + * @param state_size How many seed are used to initialize one RNG. Used to lower probablity of + * collisions between seeds used to initialize RNGs in different operators. + */ + public: + SeededRNG(int batch_size = 128) { + std::random_device source; + _batch_size = batch_size; + std::size_t _random_data_size = state_size * batch_size; + std::vector random_data(_random_data_size); + std::generate(random_data.begin(), random_data.end(), std::ref(source)); + _rngs.reserve(batch_size); + for (int i = 0; i < (int)(_batch_size * state_size); i += state_size) { + std::seed_seq seeds(std::begin(random_data) + i, std::begin(random_data) + i + state_size); + _rngs.emplace_back(T(seeds)); + } + } - /** - * Returns engine corresponding to given sample ID - */ - T &operator[](int sample) noexcept { - return _rngs[sample % _batch_size]; - } + /** + * Returns engine corresponding to given sample ID + */ + T &operator[](int sample) noexcept { + return _rngs[sample % _batch_size]; + } -private: + private: std::vector _rngs; int _batch_size; }; -class SSDRandomCropNode : public Node -{ -public: - SSDRandomCropNode(const std::vector &inputs, const std::vector &outputs); +class SSDRandomCropNode : public CropNode { + public: + SSDRandomCropNode(const std::vector &inputs, const std::vector &outputs); SSDRandomCropNode() = delete; void init(FloatParam *crop_area_factor, FloatParam *crop_aspect_ratio, FloatParam *x_drift, FloatParam *y_drift, int num_of_attempts); - unsigned int get_dst_width() { return _outputs[0]->info().width(); } - unsigned int get_dst_height() { return _outputs[0]->info().height_single(); } + unsigned int get_dst_width() { return _outputs[0]->info().max_shape()[0]; } + unsigned int get_dst_height() { return _outputs[0]->info().max_shape()[1]; } std::shared_ptr get_crop_param() { return _crop_param; } - float get_threshold(){return _threshold;} - std::vector> get_iou_range(){return _iou_range;} - bool is_entire_iou(){return _entire_iou;} + float get_threshold() { return _threshold; } + std::vector> get_iou_range() { return _iou_range; } + bool is_entire_iou() { return _entire_iou; } void set_meta_data_batch() {} -protected: + protected: void create_node() override; void update_node() override; -private: + private: std::shared_ptr _meta_crop_param; - vx_array _crop_width, _crop_height, _x1, _y1, _x2, _y2; - std::vector _crop_width_val, _crop_height_val, _x1_val, _y1_val, _x2_val, _y2_val; - // unsigned int _dst_width, _dst_height; - std::vector in_width, in_height; + std::vector _x1_val, _y1_val, _crop_width_val, _crop_height_val; size_t _dest_width; size_t _dest_height; - float _threshold = 0.05; - std::vector> _iou_range; + float _threshold = 0.05; + std::vector> _iou_range; int _num_of_attempts = 20; bool _entire_iou = false; std::shared_ptr _crop_param; - SeededRNG _rngs; // setting the state_size to 4 for 4 random parameters. - -}; \ No newline at end of file + SeededRNG _rngs; // setting the state_size to 4 for 4 random parameters. +}; diff --git a/rocAL/include/decoders/image/decoder.h b/rocAL/include/decoders/image/decoder.h index aea8f6c86..ba9692930 100644 --- a/rocAL/include/decoders/image/decoder.h +++ b/rocAL/include/decoders/image/decoder.h @@ -28,23 +28,22 @@ THE SOFTWARE. #include "parameter_factory.h" #include "parameter_random_crop_decoder.h" -enum class DecoderType -{ - TURBO_JPEG = 0,//!< Can only decode - FUSED_TURBO_JPEG = 1, //!< FOR PARTIAL DECODING - OPENCV_DEC = 2, //!< for back_up decoding - HW_JPEG_DEC = 3, - SKIP_DECODE = 4, //!< For skipping decoding in case of uncompressed data from reader - OVX_FFMPEG,//!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously +enum class DecoderType { + TURBO_JPEG = 0, //!< Can only decode + FUSED_TURBO_JPEG = 1, //!< FOR PARTIAL DECODING + OPENCV_DEC = 2, //!< for back_up decoding + HW_JPEG_DEC = 3, + SKIP_DECODE = 4, //!< For skipping decoding in case of uncompressed data from reader + OVX_FFMPEG = 5, //!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously + FFMPEG_SOFTWARE_DECODE = 6, + FFMPEG_HARDWARE_DECODE = 7, }; - -class DecoderConfig -{ -public: +class DecoderConfig { + public: DecoderConfig() {} - explicit DecoderConfig(DecoderType type):_type(type){} - virtual DecoderType type() {return _type; }; + explicit DecoderConfig(DecoderType type) : _type(type) {} + virtual DecoderType type() { return _type; }; DecoderType _type = DecoderType::TURBO_JPEG; void set_random_area(std::vector &random_area) { _random_area = std::move(random_area); } void set_random_aspect_ratio(std::vector &random_aspect_ratio) { _random_aspect_ratio = std::move(random_aspect_ratio); } @@ -54,17 +53,15 @@ class DecoderConfig unsigned get_num_attempts() { return _num_attempts; } void set_seed(int seed) { _seed = seed; } int get_seed() { return _seed; } -private: + + private: std::vector _random_area, _random_aspect_ratio; unsigned _num_attempts = 10; - int _seed = std::time(0); //seed for decoder random crop + int _seed = std::time(0); // seed for decoder random crop }; - -class Decoder -{ -public: - +class Decoder { + public: enum class Status { OK = 0, HEADER_DECODE_FAILED, @@ -86,11 +83,11 @@ class Decoder \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - virtual Status decode_info(unsigned char* input_buffer, - size_t input_size, - int* width, - int* height, - int* color_comps) = 0; + virtual Status decode_info(unsigned char *input_buffer, + size_t input_size, + int *width, + int *height, + int *color_comps) = 0; // TODO: Extend the decode API if needed, color format and order can be passed to the function //! Decodes the actual image data @@ -111,7 +108,7 @@ class Decoder virtual ~Decoder() = default; virtual void initialize(int device_id) = 0; virtual bool is_partial_decoder() = 0; - virtual void set_bbox_coords(std::vector bbox_coords) = 0; - virtual std::vector get_bbox_coords() = 0; + virtual void set_bbox_coords(std::vector bbox_coords) = 0; + virtual std::vector get_bbox_coords() = 0; virtual void set_crop_window(CropWindow &crop_window) = 0; }; diff --git a/rocAL/include/decoders/image/decoder_factory.h b/rocAL/include/decoders/image/decoder_factory.h index 978145c6c..e541c8ed2 100644 --- a/rocAL/include/decoders/image/decoder_factory.h +++ b/rocAL/include/decoders/image/decoder_factory.h @@ -22,5 +22,6 @@ THE SOFTWARE. #pragma once #include + #include "decoder.h" std::shared_ptr create_decoder(DecoderConfig config); \ No newline at end of file diff --git a/rocAL/include/decoders/image/fused_crop_decoder.h b/rocAL/include/decoders/image/fused_crop_decoder.h index 247681191..718919b90 100644 --- a/rocAL/include/decoders/image/fused_crop_decoder.h +++ b/rocAL/include/decoders/image/fused_crop_decoder.h @@ -24,7 +24,7 @@ THE SOFTWARE. #include "decoder.h" #include class FusedCropTJDecoder : public Decoder { -public: + public: //! Default constructor FusedCropTJDecoder(); //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image @@ -35,7 +35,7 @@ class FusedCropTJDecoder : public Decoder { \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; //! Decodes the actual image data /*! @@ -51,38 +51,36 @@ class FusedCropTJDecoder : public Decoder { size_t max_decoded_width, size_t max_decoded_height, size_t original_image_width, size_t original_image_height, size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; - + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; ~FusedCropTJDecoder() override; - void initialize(int device_id) override {}; + void initialize(int device_id) override{}; bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } - std::vector get_bbox_coords() override { return _bbox_coord; } + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + std::vector get_bbox_coords() override { return _bbox_coord; } void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } -private: + private: tjhandle m_jpegDecompressor; - const static unsigned SCALING_FACTORS_COUNT = 16; + const static unsigned SCALING_FACTORS_COUNT = 16; const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = { - { 2, 1 }, - { 15, 8 }, - { 7, 4 }, - { 13, 8 }, - { 3, 2 }, - { 11, 8 }, - { 5, 4 }, - { 9, 8 }, - { 1, 1 }, - { 7, 8 }, - { 3, 4 }, - { 5, 8 }, - { 1, 2 }, - { 3, 8 }, - { 1, 4 }, - { 1, 8 } - }; + {2, 1}, + {15, 8}, + {7, 4}, + {13, 8}, + {3, 2}, + {11, 8}, + {5, 4}, + {9, 8}, + {1, 1}, + {7, 8}, + {3, 4}, + {5, 8}, + {1, 2}, + {3, 8}, + {1, 4}, + {1, 8}}; bool _is_partial_decoder = true; - std::vector _bbox_coord; + std::vector _bbox_coord; CropWindow _crop_window; }; diff --git a/rocAL/include/decoders/image/hw_jpeg_decoder.h b/rocAL/include/decoders/image/hw_jpeg_decoder.h index 090f2813c..a9775d69f 100644 --- a/rocAL/include/decoders/image/hw_jpeg_decoder.h +++ b/rocAL/include/decoders/image/hw_jpeg_decoder.h @@ -24,22 +24,21 @@ THE SOFTWARE. #include "decoder.h" #ifdef ROCAL_VIDEO -extern "C" -{ + +extern "C" { #include #include -#include -#include -#include #include +#include #include +#include +#include } - class HWJpegDecoder : public Decoder { -public: + public: //! Default constructor - HWJpegDecoder() {}; + HWJpegDecoder(){}; //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image /*! \param input_buffer User provided buffer containig the encoded image @@ -48,7 +47,7 @@ class HWJpegDecoder : public Decoder { \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; //! Decodes the actual image data /*! @@ -64,16 +63,16 @@ class HWJpegDecoder : public Decoder { size_t max_decoded_width, size_t max_decoded_height, size_t original_image_width, size_t original_image_height, size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; ~HWJpegDecoder() override; - void initialize(int device_id=0); + void initialize(int device_id = 0); bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord;} - void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window;} - std::vector get_bbox_coords() override { return _bbox_coord;} + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } + std::vector get_bbox_coords() override { return _bbox_coord; } -private: + private: void release(); const char *_src_filename = NULL; AVHWDeviceType _hw_type = AV_HWDEVICE_TYPE_NONE; @@ -88,7 +87,7 @@ class HWJpegDecoder : public Decoder { size_t _codec_width, _codec_height; bool _is_partial_decoder = false; - std::vector _bbox_coord; + std::vector _bbox_coord; CropWindow _crop_window; }; diff --git a/rocAL/include/decoders/image/open_cv_decoder.h b/rocAL/include/decoders/image/open_cv_decoder.h index 74ff8f986..90c37e682 100644 --- a/rocAL/include/decoders/image/open_cv_decoder.h +++ b/rocAL/include/decoders/image/open_cv_decoder.h @@ -33,22 +33,22 @@ using namespace cv; #endif class CVDecoder : public Decoder { -public: + public: //! Default constructor CVDecoder(); //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image /*! \param input_buffer User provided buffer containig the encoded image \param input_size Size of the compressed data provided in the input_buffer - \param width pointer to the user's buffer to write the width of the compressed image to - \param height pointer to the user's buffer to write the height of the compressed image to - \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to + \param width pointer to the user's buffer to write the width of the compressed image to + \param height pointer to the user's buffer to write the height of the compressed image to + \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; - + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; + //! Decodes the actual image data //! Decodes the actual image data - /*! + /*! \param input_buffer User provided buffer containig the encoded image \param output_buffer User provided buffer used to write the decoded image into \param input_size Size of the compressed data provided in the input_buffer @@ -58,25 +58,25 @@ class CVDecoder : public Decoder { \param original_image_height The actual height of the compressed image. decoded height will be equal to this if this is smaller than max_decoded_height */ Status decode(unsigned char *input_buffer, size_t input_size, unsigned char *output_buffer, - size_t max_decoded_width, size_t max_decoded_height, - size_t original_image_width, size_t original_image_height, - size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; + size_t max_decoded_width, size_t max_decoded_height, + size_t original_image_width, size_t original_image_height, + size_t &actual_decoded_width, size_t &actual_decoded_height, + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } - std::vector get_bbox_coords() override { return _bbox_coord; } - //virtual Status decode(unsigned char* input_buffer, size_t input_size, unsigned char* output_buffer,int desired_width, int desired_height, ColorFormat desired_color); - void initialize(int device_id) override {}; + std::vector get_bbox_coords() override { return _bbox_coord; } + // virtual Status decode(unsigned char* input_buffer, size_t input_size, unsigned char* output_buffer,int desired_width, int desired_height, ColorFormat desired_color); + void initialize(int device_id) override{}; ~CVDecoder() override; -private: - //cv::Mat m_mat_compressed; - cv::Mat m_mat_scaled; - cv::Mat m_mat_orig; - bool _is_partial_decoder = false; - std::vector _bbox_coord; - CropWindow _crop_window; + private: + // cv::Mat m_mat_compressed; + cv::Mat m_mat_scaled; + cv::Mat m_mat_orig; + bool _is_partial_decoder = false; + std::vector _bbox_coord; + CropWindow _crop_window; }; #endif diff --git a/rocAL/include/decoders/image/turbo_jpeg_decoder.h b/rocAL/include/decoders/image/turbo_jpeg_decoder.h index c5df6d460..ce4dba600 100644 --- a/rocAL/include/decoders/image/turbo_jpeg_decoder.h +++ b/rocAL/include/decoders/image/turbo_jpeg_decoder.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include class TJDecoder : public Decoder { -public: + public: //! Default constructor TJDecoder(); //! Decodes the header of the Jpeg compressed data and returns basic info about the compressed image @@ -37,7 +37,7 @@ class TJDecoder : public Decoder { \param height pointer to the user's buffer to write the height of the compressed image to \param color_comps pointer to the user's buffer to write the number of color components of the compressed image to */ - Status decode_info(unsigned char* input_buffer, size_t input_size, int* width, int* height, int* color_comps) override; + Status decode_info(unsigned char *input_buffer, size_t input_size, int *width, int *height, int *color_comps) override; //! Decodes the actual image data /*! @@ -53,37 +53,37 @@ class TJDecoder : public Decoder { size_t max_decoded_width, size_t max_decoded_height, size_t original_image_width, size_t original_image_height, size_t &actual_decoded_width, size_t &actual_decoded_height, - Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size=false) override; + Decoder::ColorFormat desired_decoded_color_format, DecoderConfig config, bool keep_original_size = false) override; ~TJDecoder() override; - void initialize(int device_id) override {}; + void initialize(int device_id) override{}; bool is_partial_decoder() override { return _is_partial_decoder; } - void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } + void set_bbox_coords(std::vector bbox_coord) override { _bbox_coord = bbox_coord; } void set_crop_window(CropWindow &crop_window) override { _crop_window = crop_window; } - std::vector get_bbox_coords() override { return _bbox_coord; } -private: + std::vector get_bbox_coords() override { return _bbox_coord; } + + private: tjhandle m_jpegDecompressor; - const static unsigned SCALING_FACTORS_COUNT = 16; + const static unsigned SCALING_FACTORS_COUNT = 16; const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = { - { 2, 1 }, - { 15, 8 }, - { 7, 4 }, - { 13, 8 }, - { 3, 2 }, - { 11, 8 }, - { 5, 4 }, - { 9, 8 }, - { 1, 1 }, - { 7, 8 }, - { 3, 4 }, - { 5, 8 }, - { 1, 2 }, - { 3, 8 }, - { 1, 4 }, - { 1, 8 } - }; + {2, 1}, + {15, 8}, + {7, 4}, + {13, 8}, + {3, 2}, + {11, 8}, + {5, 4}, + {9, 8}, + {1, 1}, + {7, 8}, + {3, 4}, + {5, 8}, + {1, 2}, + {3, 8}, + {1, 4}, + {1, 8}}; bool _is_partial_decoder = false; - std::vector _bbox_coord; + std::vector _bbox_coord; const static unsigned _max_scaling_factor = 8; CropWindow _crop_window; }; diff --git a/rocAL/include/decoders/video/ffmpeg_video_decoder.h b/rocAL/include/decoders/video/ffmpeg_video_decoder.h index 3a46d06c8..e66ee9f7b 100644 --- a/rocAL/include/decoders/video/ffmpeg_video_decoder.h +++ b/rocAL/include/decoders/video/ffmpeg_video_decoder.h @@ -25,9 +25,8 @@ THE SOFTWARE. #include "video_decoder.h" #ifdef ROCAL_VIDEO -class FFmpegVideoDecoder : public VideoDecoder -{ -public: +class FFmpegVideoDecoder : public VideoDecoder { + public: //! Default constructor FFmpegVideoDecoder(); VideoDecoder::Status Initialize(const char *src_filename) override; @@ -35,7 +34,8 @@ class FFmpegVideoDecoder : public VideoDecoder int seek_frame(AVRational avg_frame_rate, AVRational time_base, unsigned frame_number) override; void release() override; ~FFmpegVideoDecoder() override; -private: + + private: const char *_src_filename = NULL; AVFormatContext *_fmt_ctx = NULL; AVCodecContext *_video_dec_ctx = NULL; diff --git a/rocAL/include/decoders/video/hardware_video_decoder.h b/rocAL/include/decoders/video/hardware_video_decoder.h index fe23224a5..dad64e02b 100644 --- a/rocAL/include/decoders/video/hardware_video_decoder.h +++ b/rocAL/include/decoders/video/hardware_video_decoder.h @@ -25,9 +25,8 @@ THE SOFTWARE. #include "video_decoder.h" #ifdef ROCAL_VIDEO -class HardWareVideoDecoder : public VideoDecoder -{ -public: +class HardWareVideoDecoder : public VideoDecoder { + public: //! Default constructor HardWareVideoDecoder(); VideoDecoder::Status Initialize(const char *src_filename) override; @@ -35,7 +34,8 @@ class HardWareVideoDecoder : public VideoDecoder int seek_frame(AVRational avg_frame_rate, AVRational time_base, unsigned frame_number) override; void release() override; ~HardWareVideoDecoder() override; -private: + + private: const char *_src_filename = NULL; AVFormatContext *_fmt_ctx = NULL; AVCodecContext *_video_dec_ctx = NULL; diff --git a/rocAL/include/decoders/video/video_decoder.h b/rocAL/include/decoders/video/video_decoder.h index e6549140f..7fe288503 100644 --- a/rocAL/include/decoders/video/video_decoder.h +++ b/rocAL/include/decoders/video/video_decoder.h @@ -26,45 +26,27 @@ THE SOFTWARE. #include #include #ifdef ROCAL_VIDEO -extern "C" -{ -#include -#include -#include -#include + +extern "C" { #include +#include +#include #include -#include -#include #include -#include -#include #include +#include +#include +#include +#include +#include } #endif #include "parameter_factory.h" -enum class VideoDecoderType -{ - FFMPEG_SOFTWARE_DECODE = 0, - FFMPEG_HARDWARE_DECODE = 1, -}; - -class VideoDecoderConfig -{ -public: - VideoDecoderConfig() {} - explicit VideoDecoderConfig(VideoDecoderType type) : _type(type) {} - virtual VideoDecoderType type() { return _type; }; - VideoDecoderType _type = VideoDecoderType::FFMPEG_SOFTWARE_DECODE; -}; - #ifdef ROCAL_VIDEO -class VideoDecoder -{ -public: - enum class Status - { +class VideoDecoder { + public: + enum class Status { OK = 0, HEADER_DECODE_FAILED, CONTENT_DECODE_FAILED, @@ -72,8 +54,7 @@ class VideoDecoder FAILED, NO_MEMORY }; - enum class ColorFormat - { + enum class ColorFormat { GRAY = 0, RGB, BGR diff --git a/rocAL/include/decoders/video/video_decoder_factory.h b/rocAL/include/decoders/video/video_decoder_factory.h index 0b108e465..07001c16a 100644 --- a/rocAL/include/decoders/video/video_decoder_factory.h +++ b/rocAL/include/decoders/video/video_decoder_factory.h @@ -23,7 +23,8 @@ THE SOFTWARE. #pragma once #include #include "video_decoder.h" +#include "decoder.h" #ifdef ROCAL_VIDEO -std::shared_ptr create_video_decoder(VideoDecoderConfig config); +std::shared_ptr create_video_decoder(DecoderConfig config); #endif diff --git a/rocAL/include/device/device_code.h b/rocAL/include/device/device_code.h index 28e9f308f..550c731c2 100644 --- a/rocAL/include/device/device_code.h +++ b/rocAL/include/device/device_code.h @@ -22,20 +22,18 @@ THE SOFTWARE. #pragma once -#include #include - - +#include class DeviceCode { -public: - explicit DeviceCode(const std::string& source_code, const std::string& program_name, const std::vector& kernel_list ): - m_source_code(source_code), m_prog_name(program_name), m_kernel_list(kernel_list) {} + public: + explicit DeviceCode(const std::string& source_code, const std::string& program_name, const std::vector& kernel_list) : m_source_code(source_code), m_prog_name(program_name), m_kernel_list(kernel_list) {} const std::string& getSourceCode() const { return m_source_code; } const std::string& getName() const { return m_prog_name; } const std::vector& getKernelList() const { return m_kernel_list; } -private: + + private: const std::string m_source_code; const std::string m_prog_name; - const std::vector m_kernel_list; + const std::vector m_kernel_list; }; diff --git a/rocAL/include/device/device_data_transfer_code.h b/rocAL/include/device/device_data_transfer_code.h index a752c6a1c..0d081397c 100644 --- a/rocAL/include/device/device_data_transfer_code.h +++ b/rocAL/include/device/device_data_transfer_code.h @@ -26,50 +26,48 @@ THE SOFTWARE. const static std::string data_transfer_program_name = "utility"; -const static std::vector data_transfer_kernel_names = {"copyInt8ToNHWC","copyInt8ToNCHW"}; +const static std::vector data_transfer_kernel_names = {"copyInt8ToNHWC", "copyInt8ToNCHW"}; const static std::string data_transfer_source = -"__kernel void copyInt8ToNHWC(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" -" if(c > 3 || c < 1) return;" -" int i = get_global_id(0);" -" unsigned channel_size = h*w;" -" unsigned size = channel_size*c;" -" if(i >= size) return;" -" unsigned channel_idx = i % c;" -" unsigned pixel_idx = i % channel_size;" -" float out_val = 0;" -" float multiplier[3] = {multiplier0, multiplier1, multiplier2};" -" float offset[3] = {offset0, offset1, offset2};" -" if(reverse_channels) {" -" out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " -" } else {" -" out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " -" }" -" out [out_offset + c*pixel_idx + channel_idx] = out_val;}" -"" -"" -"__kernel void copyInt8ToNCHW(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" -" if(c > 3 || c < 1) return;" -" int i = get_global_id(0);" -" unsigned channel_size = h*w;" -" unsigned size = channel_size*c;" -" if(i >= size) return; " -" unsigned channel_idx = i % c;" -" unsigned pixel_idx = i % channel_size;" -" float out_val = 0;" -" float multiplier[3] = {multiplier0, multiplier1, multiplier2};" -" float offset[3] = {offset0, offset1, offset2};" -" if(reverse_channels) {" -" out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " -" } else {" -" out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " -" }" -" out [out_offset + channel_idx*channel_size + pixel_idx] = out_val;}"; + "__kernel void copyInt8ToNHWC(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" + " if(c > 3 || c < 1) return;" + " int i = get_global_id(0);" + " unsigned channel_size = h*w;" + " unsigned size = channel_size*c;" + " if(i >= size) return;" + " unsigned channel_idx = i % c;" + " unsigned pixel_idx = i % channel_size;" + " float out_val = 0;" + " float multiplier[3] = {multiplier0, multiplier1, multiplier2};" + " float offset[3] = {offset0, offset1, offset2};" + " if(reverse_channels) {" + " out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " + " } else {" + " out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " + " }" + " out [out_offset + c*pixel_idx + channel_idx] = out_val;}" + "" + "" + "__kernel void copyInt8ToNCHW(__global const unsigned char* in, __global float* out, unsigned out_offset, unsigned w, unsigned h, unsigned c, float multiplier0, float multiplier1, float multiplier2, float offset0, float offset1, float offset2, unsigned reverse_channels) {" + " if(c > 3 || c < 1) return;" + " int i = get_global_id(0);" + " unsigned channel_size = h*w;" + " unsigned size = channel_size*c;" + " if(i >= size) return; " + " unsigned channel_idx = i % c;" + " unsigned pixel_idx = i % channel_size;" + " float out_val = 0;" + " float multiplier[3] = {multiplier0, multiplier1, multiplier2};" + " float offset[3] = {offset0, offset1, offset2};" + " if(reverse_channels) {" + " out_val = multiplier[c-channel_idx-1]*((float)(in[c*pixel_idx+c-channel_idx-1]))+offset[c-channel_idx-1]; " + " } else {" + " out_val = multiplier[channel_idx]*((float)(in[c*pixel_idx+channel_idx]))+offset[channel_idx]; " + " }" + " out [out_offset + channel_idx*channel_size + pixel_idx] = out_val;}"; class OCLUtility : public DeviceCode { - public: - OCLUtility(): DeviceCode(data_transfer_source, data_transfer_program_name, data_transfer_kernel_names){} + public: + OCLUtility() : DeviceCode(data_transfer_source, data_transfer_program_name, data_transfer_kernel_names) {} // TODO : delete other implicit constructors }; - - diff --git a/rocAL/include/device/device_manager.h b/rocAL/include/device/device_manager.h index 357d87d12..bc64d5f78 100644 --- a/rocAL/include/device/device_manager.h +++ b/rocAL/include/device/device_manager.h @@ -35,25 +35,28 @@ struct DeviceResources { cl_context context; cl_device_id device_id; cl_command_queue cmd_queue; - DeviceResources() { cmd_queue = nullptr; context = nullptr; device_id = nullptr; } + DeviceResources() { + cmd_queue = nullptr; + context = nullptr; + device_id = nullptr; + } }; - class CLProgram { -public: - CLProgram(const DeviceResources* ocl, const DeviceCode& ocl_code): m_ocl(ocl), m_code(ocl_code) {} + public: + CLProgram(const DeviceResources* ocl, const DeviceCode& ocl_code) : m_ocl(ocl), m_code(ocl_code) {} ~CLProgram(); - cl_int runKernel(const std::string& kernel_name, const std::vector& args, const std::vector& argSize, const std::vector& globalWorkSize, const std::vector& localWorkSize); + cl_int runKernel(const std::string& kernel_name, const std::vector& args, const std::vector& argSize, const std::vector& globalWorkSize, const std::vector& localWorkSize); cl_int buildAll(); - const cl_kernel& operator[](const std::string& kernel_name) const ; + const cl_kernel& operator[](const std::string& kernel_name) const; std::string getProgramName(); -private: + private: const DeviceResources* m_ocl; const DeviceCode& m_code; @@ -61,17 +64,15 @@ class CLProgram { cl_program m_prog; std::map m_kernels; - }; - class DeviceManager { -public: + public: DeviceManager(){}; cl_int initialize(); - DeviceResources *resources(); + DeviceResources* resources(); const CLProgram& operator[](const std::string& prog_name); @@ -79,8 +80,7 @@ class DeviceManager { ~DeviceManager(); -private: - + private: DeviceResources _resources; std::map m_programs; diff --git a/rocAL/include/device/device_manager_hip.h b/rocAL/include/device/device_manager_hip.h index 6402598b0..2f9adce8c 100644 --- a/rocAL/include/device/device_manager_hip.h +++ b/rocAL/include/device/device_manager_hip.h @@ -33,11 +33,14 @@ struct DeviceResourcesHip { hipStream_t hip_stream; int device_id; hipDeviceProp_t dev_prop; - DeviceResourcesHip() { hip_stream = nullptr; device_id = -1;} + DeviceResourcesHip() { + hip_stream = nullptr; + device_id = -1; + } }; class DeviceManagerHip { -public: + public: DeviceManagerHip(){}; hipError_t initialize(); @@ -48,10 +51,8 @@ class DeviceManagerHip { ~DeviceManagerHip(); -private: - + private: DeviceResourcesHip _resources; - }; using pRocalHip = std::shared_ptr; diff --git a/rocAL/include/device/ocl_setup.h b/rocAL/include/device/ocl_setup.h index b8ff2516e..db1e7bc56 100644 --- a/rocAL/include/device/ocl_setup.h +++ b/rocAL/include/device/ocl_setup.h @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#pragma once #if ENABLE_OPENCL #include diff --git a/rocAL/include/loaders/circular_buffer.h b/rocAL/include/loaders/circular_buffer.h index 0d97e2159..ac4fafe13 100644 --- a/rocAL/include/loaders/circular_buffer.h +++ b/rocAL/include/loaders/circular_buffer.h @@ -21,17 +21,17 @@ THE SOFTWARE. */ #pragma once -#include #include +#include #if ENABLE_OPENCL - #include +#include #endif #include + +#include "commons.h" #include "device_manager.h" #include "device_manager_hip.h" -#include "commons.h" -struct decoded_image_info -{ +struct decoded_image_info { std::vector _image_names; std::vector _roi_width; std::vector _roi_height; @@ -39,46 +39,44 @@ struct decoded_image_info std::vector _original_height; }; -struct crop_image_info -{ - //Batch of Image Crop Coordinates in "xywh" format +struct crop_image_info { + // Batch of Image Crop Coordinates in "xywh" format std::vector> _crop_image_coords; }; -class CircularBuffer -{ -public: +class CircularBuffer { + public: CircularBuffer(void* devres); ~CircularBuffer(); void init(RocalMemType output_mem_type, size_t output_mem_size, size_t buff_depth); - void release(); // release resources - void sync();// Syncs device buffers with host - void unblock_reader();// Unblocks the thread currently waiting on a call to get_read_buffer - void unblock_writer();// Unblocks the thread currently waiting on get_write_buffer - void push();// The latest write goes through, effectively adds one element to the buffer - void pop();// The oldest write will be erased and overwritten in upcoming writes + void release(); // release resources + void sync(); // Syncs device buffers with host + void unblock_reader(); // Unblocks the thread currently waiting on a call to get_read_buffer + void unblock_writer(); // Unblocks the thread currently waiting on get_write_buffer + void push(); // The latest write goes through, effectively adds one element to the buffer + void pop(); // The oldest write will be erased and overwritten in upcoming writes void set_image_info(const decoded_image_info& info) { _last_image_info = info; } void set_crop_image_info(const crop_image_info& info) { _last_crop_image_info = info; } decoded_image_info& get_image_info(); crop_image_info& get_cropped_image_info(); bool random_bbox_crop_flag = false; void* get_read_buffer_dev(); - unsigned char* get_read_buffer_host();// blocks the caller if the buffer is empty - unsigned char* get_write_buffer(); // blocks the caller if the buffer is full - size_t level();// Returns the number of elements stored - void reset();// sets the buffer level to 0 - void block_if_empty();// blocks the caller if the buffer is empty - void block_if_full();// blocks the caller if the buffer is full + unsigned char* get_read_buffer_host(); // blocks the caller if the buffer is empty + unsigned char* get_write_buffer(); // blocks the caller if the buffer is full + size_t level(); // Returns the number of elements stored + void reset(); // sets the buffer level to 0 + void block_if_empty(); // blocks the caller if the buffer is empty + void block_if_full(); // blocks the caller if the buffer is full -private: + private: void increment_read_ptr(); void increment_write_ptr(); bool full(); bool empty(); size_t _buff_depth; decoded_image_info _last_image_info; - std::queue _circ_image_info;//!< Stores the loaded images names, decoded_width and decoded_height(data is stored in the _circ_buff) - crop_image_info _last_crop_image_info; // for Random BBox crop coordinates - std::queue _circ_crop_image_info;//!< Stores the crop coordinates of the images for random bbox crop (data is stored in the _circ_buff) + std::queue _circ_image_info; //!< Stores the loaded images names, decoded_width and decoded_height(data is stored in the _circ_buff) + crop_image_info _last_crop_image_info; // for Random BBox crop coordinates + std::queue _circ_crop_image_info; //!< Stores the crop coordinates of the images for random bbox crop (data is stored in the _circ_buff) std::mutex _names_buff_lock; /* * Pinned memory allocated on the host used for fast host to device memory transactions, @@ -92,7 +90,7 @@ class CircularBuffer cl_context _cl_context = nullptr; cl_device_id _device_id = nullptr; #endif - std::vector _dev_buffer;// Actual memory allocated on the device (in the case of GPU affinity) + std::vector _dev_buffer; // Actual memory allocated on the device (in the case of GPU affinity) std::vector _host_buffer_ptrs; std::vector> _actual_host_buffers; std::condition_variable _wait_for_load; diff --git a/rocAL/include/loaders/image/cifar10_data_loader.h b/rocAL/include/loaders/image/cifar10_data_loader.h index 83fad27aa..eb20035c3 100644 --- a/rocAL/include/loaders/image/cifar10_data_loader.h +++ b/rocAL/include/loaders/image/cifar10_data_loader.h @@ -22,19 +22,19 @@ THE SOFTWARE. #pragma once #include + +#include "cifar10_data_reader.h" #include "image_loader.h" #include "reader_factory.h" #include "timing_debug.h" -#include "cifar10_data_reader.h" -class CIFAR10DataLoader : public LoaderModule -{ -public: +class CIFAR10DataLoader : public LoaderModule { + public: explicit CIFAR10DataLoader(void *dev_resources); ~CIFAR10DataLoader() override; LoaderModuleStatus load_next() override; - void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size=true) override; - void set_output_image (Image* output_image) override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = true) override; + void set_output(Tensor *output_tensor) override; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override; size_t remaining_count() override; void reset() override; @@ -43,10 +43,12 @@ class CIFAR10DataLoader : public LoaderModule decoded_image_info get_decode_image_info() override; crop_image_info get_crop_image_info() override; Timing timing() override; - void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; + std::vector> &get_batch_random_bbox_crop_coords(); + void set_batch_random_bbox_crop_coords(std::vector> batch_crop_coords); -private: + private: void increment_loader_idx(); bool is_out_of_data(); void de_init(); @@ -55,7 +57,7 @@ class CIFAR10DataLoader : public LoaderModule LoaderModuleStatus load_routine(); std::shared_ptr _reader; void *_dev_resources; - decoded_image_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field + decoded_image_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field decoded_image_info _output_decoded_img_info; bool _initialized = false; RocalMemType _mem_type; @@ -75,9 +77,12 @@ class CIFAR10DataLoader : public LoaderModule void fast_forward_through_empty_loaders(); bool _is_initialized; bool _stopped = false; - bool _loop;// _randombboxcrop_meta_data_reader = nullptr; -}; \ No newline at end of file + std::vector> _bbox_coords, _crop_coords_batch; + crop_image_info _crop_image_info; + crop_image_info _output_cropped_image_info; +}; diff --git a/rocAL/include/loaders/image/image_loader.h b/rocAL/include/loaders/image/image_loader.h index 91a1d2b9d..5fc41a02c 100644 --- a/rocAL/include/loaders/image/image_loader.h +++ b/rocAL/include/loaders/image/image_loader.h @@ -25,23 +25,24 @@ THE SOFTWARE. #include #include #include -#include "commons.h" + #include "circular_buffer.h" +#include "commons.h" #include "image_read_and_decode.h" #include "meta_data_reader.h" // // ImageLoader runs an internal thread for loading an decoding of images asynchronously // it uses a circular buffer to store decoded frames and images for the user class ImageLoader : public LoaderModule { -public: - explicit ImageLoader(void *dev_resources); + public: + explicit ImageLoader(void* dev_resources); ~ImageLoader() override; LoaderModuleStatus load_next() override; - void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size=false) override; - void set_output_image (Image* output_image) override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_tensor) override; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override; - size_t remaining_count() override; // returns number of remaining items to be loaded - void reset() override; // Resets the loader to load from the beginning of the media + size_t remaining_count() override; // returns number of remaining items to be loaded + void reset() override; // Resets the loader to load from the beginning of the media Timing timing() override; void start_loading() override; LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); @@ -50,9 +51,10 @@ class ImageLoader : public LoaderModule { std::vector get_id() override; decoded_image_info get_decode_image_info() override; crop_image_info get_crop_image_info() override; - void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; -private: + + private: bool is_out_of_data(); void de_init(); void stop_internal_thread(); @@ -61,11 +63,11 @@ class ImageLoader : public LoaderModule { LoaderModuleStatus load_routine(); std::shared_ptr _randombboxcrop_meta_data_reader = nullptr; - Image* _output_image; - std::vector _output_names;//!< image name/ids that are stores in the _output_image + Tensor* _output_tensor; + std::vector _output_names; //!< image name/ids that are stores in the _output_image size_t _output_mem_size; - MetaDataBatch* _meta_data = nullptr;//!< The output of the meta_data_graph, - std::vector> _bbox_coords; + MetaDataBatch* _meta_data = nullptr; //!< The output of the meta_data_graph, + std::vector> _bbox_coords; bool _internal_thread_running; size_t _batch_size; std::thread _load_thread; @@ -78,11 +80,11 @@ class ImageLoader : public LoaderModule { TimingDBG _swap_handle_time; bool _is_initialized; bool _stopped = false; - bool _loop;// + #include "image_loader.h" // // ImageLoaderSharded Can be used to run load and decode in multiple shards, each shard by a single loader instance, // It improves load and decode performance since each loader loads the images in parallel using an internal thread // -class ImageLoaderSharded : public LoaderModule -{ -public: +class ImageLoaderSharded : public LoaderModule { + public: explicit ImageLoaderSharded(void *dev_resources); ~ImageLoaderSharded() override; LoaderModuleStatus load_next() override; - void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size=false) override; - void set_output_image (Image* output_image) override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor *output_tensor) override; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override; size_t remaining_count() override; void reset() override; @@ -45,7 +45,8 @@ class ImageLoaderSharded : public LoaderModule Timing timing() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; -private: + + private: void increment_loader_idx(); void *_dev_resources; bool _initialized = false; @@ -55,6 +56,6 @@ class ImageLoaderSharded : public LoaderModule void fast_forward_through_empty_loaders(); size_t _prefetch_queue_depth; - Image *_output_image; + Tensor *_output_tensor; std::shared_ptr _randombboxcrop_meta_data_reader = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image/image_read_and_decode.h b/rocAL/include/loaders/image/image_read_and_decode.h index 3286e4a95..740978e87 100644 --- a/rocAL/include/loaders/image/image_read_and_decode.h +++ b/rocAL/include/loaders/image/image_read_and_decode.h @@ -22,36 +22,37 @@ THE SOFTWARE. #pragma once #include -#include + #include +#include + #include "commons.h" -#include "turbo_jpeg_decoder.h" -#include "reader_factory.h" -#include "timing_debug.h" #include "loader_module.h" #include "parameter_random_crop_decoder.h" +#include "reader_factory.h" +#include "timing_debug.h" +#include "turbo_jpeg_decoder.h" /** * Compute the scaled value of dimension using the given scaling * factor. This macro performs the integer equivalent of ceil(dimension * * scalingFactor). */ -#define TJSCALED(dimension, scalingFactor) \ - ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ - scalingFactor.denom) +#define TJSCALED(dimension, scalingFactor) \ + ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ + scalingFactor.denom) -class ImageReadAndDecode -{ -public: +class ImageReadAndDecode { + public: ImageReadAndDecode(); ~ImageReadAndDecode(); size_t count(); void reset(); - void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id=0); - void set_bbox_vector(std::vector> bbox_coords) { _bbox_coords = bbox_coords;}; + void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id = 0); + void set_bbox_vector(std::vector> bbox_coords) { _bbox_coords = bbox_coords; }; void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader); - std::vector> get_batch_random_bbox_crop_coords(); - void set_batch_random_bbox_crop_coords(std::vector> batch_crop_coords); + std::vector> &get_batch_random_bbox_crop_coords(); + void set_batch_random_bbox_crop_coords(std::vector> batch_crop_coords); //! Loads a decompressed batch of images into the buffer indicated by buff /// \param buff User's buffer provided to be filled with decoded image samples @@ -62,40 +63,39 @@ class ImageReadAndDecode /// \param roi_height is set by the load() function tp the width of the region that decoded image is located.It's less than max_height and is either equal to the original image height if original image height is smaller than max_height or downscaled if necessary to fit the max_height criterion. /// \param output_color_format defines what color format user expects decoder to decode images into if capable of doing so supported is LoaderModuleStatus load( - unsigned char* buff, - std::vector& names, - const size_t max_decoded_width, - const size_t max_decoded_height, - std::vector &roi_width, - std::vector &roi_height, - std::vector &actual_width, - std::vector &actual_height, - RocalColorFormat output_color_format, - bool decoder_keep_original=false); + unsigned char *buff, + std::vector &names, + const size_t max_decoded_width, + const size_t max_decoded_height, + std::vector &roi_width, + std::vector &roi_height, + std::vector &actual_width, + std::vector &actual_height, + RocalColorFormat output_color_format, + bool decoder_keep_original = false); //! returns timing info or other status information Timing timing(); -private: + private: std::vector> _decoder; std::shared_ptr _reader; std::vector> _compressed_buff; std::vector _actual_read_size; std::vector _image_names; std::vector _compressed_image_size; - std::vector _decompressed_buff_ptrs; + std::vector _decompressed_buff_ptrs; std::vector _actual_decoded_width; std::vector _actual_decoded_height; std::vector _original_width; std::vector _original_height; - static const size_t MAX_COMPRESSED_SIZE = 1*1024*1024; // 1 Meg + static const size_t MAX_COMPRESSED_SIZE = 1 * 1024 * 1024; // 1 Meg TimingDBG _file_load_time, _decode_time; size_t _batch_size, _shard_count, _num_threads; DecoderConfig _decoder_config; bool decoder_keep_original; - std::vector> _bbox_coords, _crop_coords_batch; + std::vector> _bbox_coords, _crop_coords_batch; std::shared_ptr _randombboxcrop_meta_data_reader = nullptr; pCropCord _CropCord; RocalRandomCropDecParam *_random_crop_dec_param = nullptr; }; - diff --git a/rocAL/include/loaders/image/node_cifar10_loader.h b/rocAL/include/loaders/image/node_cifar10_loader.h index ffe701c0c..169ad32a4 100644 --- a/rocAL/include/loaders/image/node_cifar10_loader.h +++ b/rocAL/include/loaders/image/node_cifar10_loader.h @@ -21,17 +21,16 @@ THE SOFTWARE. */ #pragma once -#include "node.h" #include "cifar10_data_loader.h" #include "graph.h" +#include "node.h" -class Cifar10LoaderNode: public Node -{ -public: +class Cifar10LoaderNode : public Node { + public: /// \param device_resources shard count from user /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. - Cifar10LoaderNode(Image *output, void *device_resources); + Cifar10LoaderNode(Tensor *output, void *device_resources); ~Cifar10LoaderNode() override; Cifar10LoaderNode() = delete; /// @@ -40,12 +39,14 @@ class Cifar10LoaderNode: public Node /// \param load_batch_count Defines the quantum count of the images to be loaded. It's usually equal to the user's batch size. /// The loader will repeat images if necessary to be able to have images in multiples of the load_batch_count, /// for example if there are 10 images in the dataset and load_batch_count is 3, the loader repeats 2 images as if there are 12 images available. - void init( const std::string &source_path, const std::string &json_path, StorageType storage_type, bool loop, size_t load_batch_count, RocalMemType mem_type, const std::string &file_prefix); + void init(const std::string &source_path, const std::string &json_path, StorageType storage_type, bool loop, size_t load_batch_count, RocalMemType mem_type, const std::string &file_prefix); std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: + + protected: + void create_node() override{}; + void update_node() override{}; + + private: std::shared_ptr _loader_module = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image/node_fused_jpeg_crop.h b/rocAL/include/loaders/image/node_fused_jpeg_crop.h index fa4558882..4bd799fc9 100644 --- a/rocAL/include/loaders/image/node_fused_jpeg_crop.h +++ b/rocAL/include/loaders/image/node_fused_jpeg_crop.h @@ -21,18 +21,17 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "image_loader_sharded.h" #include "graph.h" +#include "image_loader_sharded.h" +#include "node.h" #include "parameter_factory.h" -class FusedJpegCropNode: public Node -{ -public: +class FusedJpegCropNode : public Node { + public: /// \param device_resources shard count from user /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. - FusedJpegCropNode(Image *output, void *device_resources_hip); + FusedJpegCropNode(Tensor *output, void *device_resources_hip); ~FusedJpegCropNode() override; FusedJpegCropNode() = delete; /// @@ -46,10 +45,12 @@ class FusedJpegCropNode: public Node unsigned num_attempts, std::vector &random_area, std::vector &random_aspect_ratio); std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: + + protected: + void create_node() override{}; + void update_node() override{}; + + private: std::shared_ptr _loader_module = nullptr; std::vector _random_area, _random_aspect_ratio; unsigned _num_attempts; diff --git a/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h b/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h index cd4f23e75..bf5209282 100644 --- a/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h +++ b/rocAL/include/loaders/image/node_fused_jpeg_crop_single_shard.h @@ -21,15 +21,14 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "image_loader_sharded.h" #include "graph.h" +#include "image_loader_sharded.h" +#include "node.h" #include "parameter_factory.h" -class FusedJpegCropSingleShardNode: public Node -{ -public: - FusedJpegCropSingleShardNode(Image *output, void *device_resources); +class FusedJpegCropSingleShardNode : public Node { + public: + FusedJpegCropSingleShardNode(Tensor *output, void *device_resources); ~FusedJpegCropSingleShardNode() override; /// \param user_shard_count shard count from user @@ -43,10 +42,12 @@ class FusedJpegCropSingleShardNode: public Node unsigned num_attempts, std::vector &random_area, std::vector &random_aspect_ratio); std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: + + protected: + void create_node() override{}; + void update_node() override{}; + + private: std::shared_ptr _loader_module = nullptr; std::vector _random_area, _random_aspect_ratio; unsigned _num_attempts; diff --git a/rocAL/include/loaders/image/node_image_loader.h b/rocAL/include/loaders/image/node_image_loader.h index 84afd1673..ff18d205b 100644 --- a/rocAL/include/loaders/image/node_image_loader.h +++ b/rocAL/include/loaders/image/node_image_loader.h @@ -21,17 +21,16 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "image_loader_sharded.h" #include "graph.h" +#include "image_loader_sharded.h" +#include "node.h" -class ImageLoaderNode : public Node -{ -public: +class ImageLoaderNode : public Node { + public: /// \param device_resources shard count from user /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. - ImageLoaderNode(Image *output, void *device_resources); + ImageLoaderNode(Tensor *output, void *device_resources); ~ImageLoaderNode() override; ImageLoaderNode() = delete; /// @@ -44,9 +43,11 @@ class ImageLoaderNode : public Node size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader, bool decoder_keep_orig = false, const char *prefix = "", unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); std::shared_ptr get_loader_module(); -protected: + + protected: void create_node() override{}; void update_node() override{}; -private: + + private: std::shared_ptr _loader_module = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image/node_image_loader_single_shard.h b/rocAL/include/loaders/image/node_image_loader_single_shard.h index 61cb941ae..8a3b717f8 100644 --- a/rocAL/include/loaders/image/node_image_loader_single_shard.h +++ b/rocAL/include/loaders/image/node_image_loader_single_shard.h @@ -21,14 +21,13 @@ THE SOFTWARE. */ #pragma once -#include "node.h" -#include "image_loader_sharded.h" #include "graph.h" +#include "image_loader_sharded.h" +#include "node.h" -class ImageLoaderSingleShardNode : public Node -{ -public: - ImageLoaderSingleShardNode(Image *output, void *device_resources); +class ImageLoaderSingleShardNode : public Node { + public: + ImageLoaderSingleShardNode(Tensor *output, void *device_resources); ~ImageLoaderSingleShardNode() override; /// \param user_shard_count shard count from user @@ -42,9 +41,11 @@ class ImageLoaderSingleShardNode : public Node const std::map feature_key_map = std::map(), unsigned sequence_length = 0, unsigned step = 0, unsigned stride = 0); std::shared_ptr get_loader_module(); -protected: + + protected: void create_node() override{}; void update_node() override{}; -private: + + private: std::shared_ptr _loader_module = nullptr; }; \ No newline at end of file diff --git a/rocAL/include/loaders/image_source_evaluator.h b/rocAL/include/loaders/image_source_evaluator.h index 48a7a42e8..7f1cdd7b0 100644 --- a/rocAL/include/loaders/image_source_evaluator.h +++ b/rocAL/include/loaders/image_source_evaluator.h @@ -21,53 +21,50 @@ THE SOFTWARE. */ #pragma once -#include #include -#include "turbo_jpeg_decoder.h" +#include + +#include "loader_module.h" #include "reader_factory.h" #include "timing_debug.h" -#include "loader_module.h" -enum class ImageSourceEvaluatorStatus -{ +#include "turbo_jpeg_decoder.h" +enum class ImageSourceEvaluatorStatus { OK = 0, - UNSUPPORTED_DECODER_TYPE, + UNSUPPORTED_DECODER_TYPE, UNSUPPORTED_STORAGE_TYPE, }; -enum class MaxSizeEvaluationPolicy -{ +enum class MaxSizeEvaluationPolicy { MAXIMUM_FOUND_SIZE, MOST_FREQUENT_SIZE }; -class ImageSourceEvaluator -{ -public: +class ImageSourceEvaluator { + public: ImageSourceEvaluatorStatus create(ReaderConfig reader_cfg, DecoderConfig decoder_cfg); void find_max_dimension(); void set_size_evaluation_policy(MaxSizeEvaluationPolicy arg); size_t max_width(); size_t max_height(); -private: - class FindMaxSize - { - public: + private: + class FindMaxSize { + public: void set_policy(MaxSizeEvaluationPolicy arg) { _policy = arg; } void process_sample(unsigned val); unsigned get_max() { return _max; }; - private: + + private: MaxSizeEvaluationPolicy _policy = MaxSizeEvaluationPolicy::MOST_FREQUENT_SIZE; - std::map _hist; + std::map _hist; unsigned _max = 0; unsigned _max_count = 0; - }; - FindMaxSize _width_max; + }; + FindMaxSize _width_max; FindMaxSize _height_max; DecoderConfig _decoder_cfg_cv; std::shared_ptr _decoder; std::shared_ptr _reader; std::shared_ptr _meta_data_reader; std::vector _header_buff; - static const size_t COMPRESSED_SIZE = 1024 * 1024; // 1 MB + static const size_t COMPRESSED_SIZE = 1024 * 1024; // 1 MB }; - diff --git a/rocAL/include/loaders/image/loader_module.h b/rocAL/include/loaders/loader_module.h similarity index 68% rename from rocAL/include/loaders/image/loader_module.h rename to rocAL/include/loaders/loader_module.h index e0ad2c451..e962d6012 100644 --- a/rocAL/include/loaders/image/loader_module.h +++ b/rocAL/include/loaders/loader_module.h @@ -20,18 +20,18 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +#pragma once #include + #include "image_reader.h" -#include "decoder.h" -#include "commons.h" -#include "image.h" #include "circular_buffer.h" -#include "meta_data_reader.h" +#include "commons.h" +#include "decoder.h" #include "meta_data_graph.h" +#include "meta_data_reader.h" +#include "tensor.h" -enum class LoaderModuleStatus -{ +enum class LoaderModuleStatus { OK = 0, DEVICE_BUFFER_SWAP_FAILED, HOST_BUFFER_SWAP_FAILED, @@ -42,24 +42,25 @@ enum class LoaderModuleStatus }; /*! \class LoaderModule The interface defining the API and requirements of loader modules*/ -class LoaderModule -{ -public: +class LoaderModule { + public: virtual void initialize(ReaderConfig reader_config, DecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; - virtual void set_output_image(Image* output_image) = 0; - virtual LoaderModuleStatus load_next() = 0; // Loads the next image data into the Image's buffer set by calling into the set_output_image - virtual void reset() = 0; // Resets the loader to load from the beginning of the media - virtual size_t remaining_count() = 0; // Returns the number of available images to be loaded - virtual ~LoaderModule()= default; - virtual Timing timing() = 0;// Returns timing info - virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded - virtual void start_loading() = 0; // starts internal loading thread + virtual void set_output(Tensor* output_tensor) = 0; + virtual LoaderModuleStatus load_next() = 0; // Loads the next image data into the Image's buffer set by calling into the set_output + virtual void reset() = 0; // Resets the loader to load from the beginning of the media + virtual size_t remaining_count() = 0; // Returns the number of available images to be loaded + virtual ~LoaderModule() = default; + virtual Timing timing() = 0; // Returns timing info + virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded + virtual void start_loading() = 0; // starts internal loading thread virtual decoded_image_info get_decode_image_info() = 0; virtual crop_image_info get_crop_image_info() = 0; virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; // introduce meta data reader virtual void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) = 0; virtual void shut_down() = 0; + virtual std::vector get_sequence_start_frame_number() { return {}; } + virtual std::vector> get_sequence_frame_timestamps() { return {}; } }; using pLoaderModule = std::shared_ptr; \ No newline at end of file diff --git a/rocAL/include/loaders/video/node_video_loader.h b/rocAL/include/loaders/video/node_video_loader.h index 41625cb2d..754696c96 100644 --- a/rocAL/include/loaders/video/node_video_loader.h +++ b/rocAL/include/loaders/video/node_video_loader.h @@ -21,16 +21,16 @@ THE SOFTWARE. */ #pragma once +#include + +#include "graph.h" #include "node.h" #include "video_loader_sharded.h" -#include "graph.h" -#include #ifdef ROCAL_VIDEO -class VideoLoaderNode : public Node -{ -public: - VideoLoaderNode(Image *output, void * device_resources); +class VideoLoaderNode : public Node { + public: + VideoLoaderNode(Tensor *output, void *device_resources); ~VideoLoaderNode() override; VideoLoaderNode() = delete; /// @@ -39,13 +39,15 @@ class VideoLoaderNode : public Node /// \param load_batch_count Defines the quantum count of the sequences to be loaded. It's usually equal to the user's batch size. /// The loader will repeat sequences if necessary to be able to have sequences in multiples of the load_batch_count, /// for example if there are 10 sequences in the dataset and load_batch_count is 3, the loader repeats 2 sequences as if there are 12 sequences available. - void init(unsigned internal_shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, + void init(unsigned internal_shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type); - std::shared_ptr get_loader_module(); -protected: + std::shared_ptr get_loader_module(); + + protected: void create_node() override{}; void update_node() override{}; -private: + + private: DecodeMode _decode_mode = DecodeMode::CPU; std::shared_ptr _loader_module = nullptr; }; diff --git a/rocAL/include/loaders/video/node_video_loader_single_shard.h b/rocAL/include/loaders/video/node_video_loader_single_shard.h index a463ed31d..f70044bed 100644 --- a/rocAL/include/loaders/video/node_video_loader_single_shard.h +++ b/rocAL/include/loaders/video/node_video_loader_single_shard.h @@ -21,16 +21,16 @@ THE SOFTWARE. */ #pragma once +#include + +#include "graph.h" #include "node.h" #include "video_loader_sharded.h" -#include "graph.h" -#include #ifdef ROCAL_VIDEO -class VideoLoaderSingleShardNode : public Node -{ -public: - VideoLoaderSingleShardNode(Image *output, void *device_resources); +class VideoLoaderSingleShardNode : public Node { + public: + VideoLoaderSingleShardNode(Tensor *output, void *device_resources); ~VideoLoaderSingleShardNode() override; /// \param user_shard_count shard count from user @@ -39,15 +39,17 @@ class VideoLoaderSingleShardNode : public Node /// \param load_batch_count Defines the quantum count of the sequences to be loaded. It's usually equal to the user's batch size. /// The loader will repeat sequences if necessary to be able to have sequences in multiples of the load_batch_count, /// for example if there are 10 sequences in the dataset and load_batch_count is 3, the loader repeats 2 sequences as if there are 12 sequences available. - void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, VideoStorageType storage_type, VideoDecoderType decoder_type, DecodeMode decoder_mode, + void init(unsigned shard_id, unsigned shard_count, const std::string &source_path, StorageType storage_type, DecoderType decoder_type, DecodeMode decoder_mode, unsigned sequence_length, unsigned step, unsigned stride, VideoProperties &video_prop, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type); - std::shared_ptr get_loader_module(); -protected: - void create_node() override {}; - void update_node() override {}; -private: - DecodeMode _decode_mode = DecodeMode::CPU; + std::shared_ptr get_loader_module(); + + protected: + void create_node() override{}; + void update_node() override{}; + + private: + DecodeMode _decode_mode = DecodeMode::CPU; std::shared_ptr _loader_module = nullptr; }; #endif diff --git a/rocAL/include/loaders/video/video_loader.h b/rocAL/include/loaders/video/video_loader.h index caa1092ee..6e991e78d 100644 --- a/rocAL/include/loaders/video/video_loader.h +++ b/rocAL/include/loaders/video/video_loader.h @@ -25,8 +25,9 @@ THE SOFTWARE. #include #include #include -#include "commons.h" + #include "circular_buffer.h" +#include "commons.h" #include "video_read_and_decode.h" #ifdef ROCAL_VIDEO @@ -34,40 +35,40 @@ THE SOFTWARE. // // VideoLoader runs an internal thread for loading an decoding of sequences asynchronously // it uses a circular buffer to store decoded sequence of frames for the user -class VideoLoader : public VideoLoaderModule -{ -public: - explicit VideoLoader(void * dev_resources); +class VideoLoader : public LoaderModule { + public: + explicit VideoLoader(void* dev_resources); ~VideoLoader() override; - VideoLoaderModuleStatus load_next() override; - void initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; - void set_output_image(Image *output_image) override; - size_t remaining_count() override; // returns number of remaining items to be loaded - void reset() override; // Resets the loader to load from the beginning + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_image) override; + size_t remaining_count() override; // returns number of remaining items to be loaded + void reset() override; // Resets the loader to load from the beginning Timing timing() override; void start_loading() override; - VideoLoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); - VideoLoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); + LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); + LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); std::vector get_id() override; decoded_image_info get_decode_image_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + crop_image_info get_crop_image_info() override { return _crop_img_info; } + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; std::vector get_sequence_start_frame_number() override; std::vector> get_sequence_frame_timestamps() override; void shut_down() override; -private: + private: bool is_out_of_data(); void de_init(); void stop_internal_thread(); std::shared_ptr _video_loader; - VideoLoaderModuleStatus update_output_image(); - VideoLoaderModuleStatus load_routine(); - Image *_output_image; - std::vector _output_names; //!< frame name/ids that are stored in the _output_image + LoaderModuleStatus update_output_image(); + LoaderModuleStatus load_routine(); + Tensor* _output_tensor; + std::vector _output_names; //!< frame name/ids that are stored in the _output_image size_t _output_mem_size; bool _internal_thread_running; size_t _batch_size; - size_t _sequence_count; size_t _sequence_length; std::thread _load_thread; RocalMemType _mem_type; @@ -77,12 +78,14 @@ class VideoLoader : public VideoLoaderModule TimingDBG _swap_handle_time; bool _is_initialized; bool _stopped = false; - bool _loop; //> _sequence_start_framenum_vec; std::vector>> _sequence_frame_timestamps_vec; + crop_image_info _crop_img_info; + size_t _max_tensor_width, _max_tensor_height; }; #endif diff --git a/rocAL/include/loaders/video/video_loader_module.h b/rocAL/include/loaders/video/video_loader_module.h deleted file mode 100644 index 73e6cdf30..000000000 --- a/rocAL/include/loaders/video/video_loader_module.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once -#include -#include "video_reader.h" -#include "video_decoder.h" -#include "commons.h" -#include "image.h" -#include "circular_buffer.h" -#include "meta_data_reader.h" -#include "meta_data_graph.h" - -#ifdef ROCAL_VIDEO -enum class VideoLoaderModuleStatus -{ - OK = 0, - DEVICE_BUFFER_SWAP_FAILED, - HOST_BUFFER_SWAP_FAILED, - NO_FILES_TO_READ, - DECODE_FAILED, - NO_MORE_DATA_TO_READ, - NOT_INITIALIZED -}; - -/*! \class VideoLoaderModule The interface defining the API and requirements of loader modules*/ -class VideoLoaderModule -{ -public: - virtual void initialize(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size) = 0; - virtual void set_output_image(Image *output_image) = 0; - virtual VideoLoaderModuleStatus load_next() = 0; // Loads the next sequence of frames into the buffer set by calling into the set_output_image - virtual void reset() = 0; // Resets the loader to load from the beginning of the video files - virtual size_t remaining_count() = 0; // Returns the number of available frames to be loaded - virtual ~VideoLoaderModule() = default; - virtual Timing timing() = 0; // Returns timing info - virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded - virtual void start_loading() = 0; // starts internal loading thread - virtual decoded_image_info get_decode_image_info() = 0; - virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; - virtual std::vector get_sequence_start_frame_number() = 0; - virtual std::vector> get_sequence_frame_timestamps() = 0; - virtual void shut_down() = 0; -}; - -using pVideoLoaderModule = std::shared_ptr; -#endif diff --git a/rocAL/include/loaders/video/video_loader_sharded.h b/rocAL/include/loaders/video/video_loader_sharded.h index 44b79ebcb..1349cbcbe 100644 --- a/rocAL/include/loaders/video/video_loader_sharded.h +++ b/rocAL/include/loaders/video/video_loader_sharded.h @@ -22,39 +22,43 @@ THE SOFTWARE. #pragma once #include + #include "video_loader.h" // // VideoLoaderSharded Can be used to run load and decode in multiple shards, each shard by a single loader instance, // It improves load and decode performance since each loader loads the sequences in parallel using an internal thread // #ifdef ROCAL_VIDEO -class VideoLoaderSharded : public VideoLoaderModule -{ -public: - explicit VideoLoaderSharded(void *dev_resources); +class VideoLoaderSharded : public LoaderModule { + public: + explicit VideoLoaderSharded(void* dev_resources); ~VideoLoaderSharded() override; - VideoLoaderModuleStatus load_next() override; - void initialize(VideoReaderConfig reader_cfg, VideoDecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; void shut_down() override; - void set_output_image(Image *output_image) override; + void set_output(Tensor* output_image) override; size_t remaining_count() override; void reset() override; void start_loading() override; std::vector get_id() override; decoded_image_info get_decode_image_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + crop_image_info get_crop_image_info() override { return _crop_img_info; } + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; std::vector get_sequence_start_frame_number() override; std::vector> get_sequence_frame_timestamps() override; Timing timing() override; -private: + + private: void increment_loader_idx(); - void *_dev_resources; + void* _dev_resources; bool _initialized = false; std::vector> _loaders; size_t _loader_idx; size_t _shard_count = 1; void fast_forward_through_empty_loaders(); - size_t _prefetch_queue_depth; // Used for circular buffer's internal buffer - Image *_output_image; + size_t _prefetch_queue_depth; // Used for circular buffer's internal buffer + Tensor* _output_tensor; + crop_image_info _crop_img_info; }; #endif diff --git a/rocAL/include/loaders/video/video_read_and_decode.h b/rocAL/include/loaders/video/video_read_and_decode.h index 8f43fb7c9..2cc20a393 100644 --- a/rocAL/include/loaders/video/video_read_and_decode.h +++ b/rocAL/include/loaders/video/video_read_and_decode.h @@ -33,24 +33,23 @@ THE SOFTWARE. #include "ffmpeg_video_decoder.h" #include "video_reader_factory.h" #include "timing_debug.h" -#include "video_loader_module.h" +#include "loader_module.h" #include "video_properties.h" +#include "video_reader.h" + #ifdef ROCAL_VIDEO -extern "C" -{ +extern "C" { #include } -class VideoReadAndDecode -{ -public: +class VideoReadAndDecode { + public: VideoReadAndDecode(); ~VideoReadAndDecode(); size_t count(); void reset(); - void create(VideoReaderConfig reader_config, VideoDecoderConfig decoder_config, int batch_size); - void set_video_process_count(size_t video_count) - { + void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size); + void set_video_process_count(size_t video_count) { _video_process_count = (video_count <= _max_video_count) ? video_count : _max_video_count; } float convert_framenum_to_timestamp(size_t frame_number); @@ -66,7 +65,7 @@ class VideoReadAndDecode /// \param sequence_start_framenum_vec is set by the load() function. The starting frame number of the sequences will be updated. /// \param sequence_frame_timestamps_vec is set by the load() function. The timestamps of each of the frames in the sequences will be updated. /// \param output_color_format defines what color format user expects decoder to decode frames into if capable of doing so supported is - VideoLoaderModuleStatus load( + LoaderModuleStatus load( unsigned char *buff, std::vector &names, const size_t max_decoded_width, @@ -81,9 +80,9 @@ class VideoReadAndDecode //! returns timing info or other status information Timing timing(); -private: - struct video_map - { + + private: + struct video_map { int _video_map_idx; bool _is_decoder_instance; }; @@ -102,7 +101,6 @@ class VideoReadAndDecode std::vector _sequence_video_idx; TimingDBG _file_load_time, _decode_time; size_t _batch_size; - size_t _sequence_count; size_t _sequence_length; size_t _stride; size_t _video_count; @@ -111,6 +109,6 @@ class VideoReadAndDecode size_t _max_decoded_height; size_t _max_decoded_stride; AVPixelFormat _out_pix_fmt; - VideoDecoderConfig _video_decoder_config; + DecoderConfig _video_decoder_config; }; #endif diff --git a/rocAL/include/meta_data/augmentations_meta_nodes.h b/rocAL/include/meta_data/augmentations_meta_nodes.h index 7e6f5a240..de1180a25 100644 --- a/rocAL/include/meta_data/augmentations_meta_nodes.h +++ b/rocAL/include/meta_data/augmentations_meta_nodes.h @@ -22,11 +22,12 @@ THE SOFTWARE. #pragma once +#include "meta_node_crop.h" #include "meta_node_crop_mirror_normalize.h" -#include "meta_node_resize.h" #include "meta_node_crop_resize.h" -#include "meta_node_crop.h" +#include "meta_node_flip.h" +#include "meta_node_resize.h" #include "meta_node_resize_crop_mirror.h" +#include "meta_node_resize_mirror_normalize.h" #include "meta_node_rotate.h" #include "meta_node_ssd_random_crop.h" -#include "meta_node_flip.h" diff --git a/rocAL/include/meta_data/bounding_box_graph.h b/rocAL/include/meta_data/bounding_box_graph.h index c2a46eb4d..76e2cf5fe 100644 --- a/rocAL/include/meta_data/bounding_box_graph.h +++ b/rocAL/include/meta_data/bounding_box_graph.h @@ -22,13 +22,31 @@ THE SOFTWARE. #pragma once #include + #include "meta_data_graph.h" #include "meta_node.h" -class BoundingBoxGraph : public MetaDataGraph -{ -public: - void process(MetaDataBatch* meta_data) override; - void update_random_bbox_meta_data(MetaDataBatch* meta_data, decoded_image_info decoded_image_info,crop_image_info crop_image_info) override; - void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data ,float criteria, bool offset , float scale, std::vector& means, std::vector& stds) override; -}; +typedef struct { + float xc; + float yc; + float w; + float h; +} BoundingBoxCord_xcycwh; +typedef struct { + float l; + float t; + float r; + float b; +} BoundingBoxCord_ltrb; +typedef union { + BoundingBoxCord_xcycwh xcycwh; + BoundingBoxCord_ltrb ltrb; +} BoundingBoxCordf; // Union comprises of float bbox cords of ltrb/xcycwh type + +class BoundingBoxGraph : public MetaDataGraph { + public: + void process(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data) override; + void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decode_image_info) override; + void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) override; + void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) override; +}; diff --git a/rocAL/include/meta_data/caffe2_meta_data_reader.h b/rocAL/include/meta_data/caffe2_meta_data_reader.h index f1d521dd6..3dfe3d3ea 100644 --- a/rocAL/include/meta_data/caffe2_meta_data_reader.h +++ b/rocAL/include/meta_data/caffe2_meta_data_reader.h @@ -21,42 +21,42 @@ THE SOFTWARE. */ #pragma once -#include #include -#include + #include +#include +#include #include + #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" #include "image_reader.h" -class Caffe2MetaDataReader: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class Caffe2MetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - std::map> &get_map_content() override { return (_map_content);} + std::map>& get_map_content() override { return (_map_content); } Caffe2MetaDataReader(); - ~Caffe2MetaDataReader() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); bool _last_rec; void read_lmdb_record(std::string file_name, uint file_size); std::map> _map_content; std::map>::iterator _itr; std::string _path; - LabelBatch* _output; - DIR *_src_dir; - struct dirent *_entity; + pMetaDataBatch _output; + DIR* _src_dir; + struct dirent* _entity; std::vector _file_names; std::vector _image_name; }; diff --git a/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h b/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h index 589b1eba8..79da58310 100644 --- a/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h +++ b/rocAL/include/meta_data/caffe2_meta_data_reader_detection.h @@ -21,42 +21,42 @@ THE SOFTWARE. */ #pragma once -#include #include -#include + #include +#include +#include #include + #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" #include "image_reader.h" -class Caffe2MetaDataReaderDetection: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class Caffe2MetaDataReaderDetection : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); - std::map> &get_map_content() override{ return _map_content;} + std::map>& get_map_content() override { return _map_content; } bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } Caffe2MetaDataReaderDetection(); - ~Caffe2MetaDataReaderDetection() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; - void add(std::string image_name, BoundingBoxCords bbox, BoundingBoxLabels b_labels, ImgSize image_size); + bool exists(const std::string& image_name) override; + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size); bool _last_rec; void read_lmdb_record(std::string file_name, uint file_size); std::map> _map_content; std::map>::iterator _itr; std::string _path; - BoundingBoxBatch* _output; - DIR *_src_dir; - struct dirent *_entity; + pMetaDataBatch _output; + DIR* _src_dir; + struct dirent* _entity; std::vector _file_names; std::vector _image_name; }; diff --git a/rocAL/include/meta_data/caffe_meta_data_reader.h b/rocAL/include/meta_data/caffe_meta_data_reader.h index bbc2355d6..b166ba7eb 100644 --- a/rocAL/include/meta_data/caffe_meta_data_reader.h +++ b/rocAL/include/meta_data/caffe_meta_data_reader.h @@ -21,40 +21,40 @@ THE SOFTWARE. */ #pragma once -#include #include #include + +#include + +#include "caffe_protos.pb.h" #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" #include "image_reader.h" -#include "caffe_protos.pb.h" -class CaffeMetaDataReader: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class CaffeMetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; bool set_timestamp_mode() override { return false; } void print_map_contents(); - std::map> &get_map_content() override { return _map_content;} - MetaDataBatch * get_output() override { return _output; } + std::map>& get_map_content() override { return _map_content; } CaffeMetaDataReader(); - ~CaffeMetaDataReader() override { delete _output; } -private: + + private: void read_files(const std::string& _path); void read_lmdb_record(std::string _path, uint file_size); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); std::map> _map_content; std::map>::iterator _itr; std::string _path; - LabelBatch* _output; + pMetaDataBatch _output; DIR *_src_dir, *_sub_dir; - struct dirent *_entity; + struct dirent* _entity; std::vector _file_names; std::vector _subfolder_file_names; MDB_env* _mdb_env; @@ -62,4 +62,4 @@ public : MDB_val _mdb_key, _mdb_value; MDB_txn* _mdb_txn; MDB_cursor* _mdb_cursor; - }; +}; diff --git a/rocAL/include/meta_data/caffe_meta_data_reader_detection.h b/rocAL/include/meta_data/caffe_meta_data_reader_detection.h index 457ddea54..528446248 100644 --- a/rocAL/include/meta_data/caffe_meta_data_reader_detection.h +++ b/rocAL/include/meta_data/caffe_meta_data_reader_detection.h @@ -21,44 +21,44 @@ THE SOFTWARE. */ #pragma once -#include #include -#include + #include +#include +#include #include #include #include "commons.h" +#include "lmdb.h" #include "meta_data.h" #include "meta_data_reader.h" #include "image_reader.h" #include "caffe_protos.pb.h" -class CaffeMetaDataReaderDetection: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class CaffeMetaDataReaderDetection : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; bool set_timestamp_mode() override { return false; } void print_map_contents(); - std::map> &get_map_content() override{ return _map_content;} - MetaDataBatch * get_output() override { return _output; } + std::map>& get_map_content() override { return _map_content; } CaffeMetaDataReaderDetection(); - ~CaffeMetaDataReaderDetection() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; - void add(std::string image_name, BoundingBoxCords bbox, BoundingBoxLabels b_labels, ImgSize image_size); + bool exists(const std::string& image_name) override; + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size); bool _last_rec; void read_lmdb_record(std::string file_name, uint file_size); std::map> _map_content; std::map>::iterator _itr; std::string _path; - BoundingBoxBatch* _output; - DIR *_src_dir; - struct dirent *_entity; + pMetaDataBatch _output; + DIR* _src_dir; + struct dirent* _entity; std::vector _file_names; MDB_env* _mdb_env; MDB_dbi _mdb_dbi; diff --git a/rocAL/include/meta_data/cifar10_meta_data_reader.h b/rocAL/include/meta_data/cifar10_meta_data_reader.h index 8801b9235..86415461c 100644 --- a/rocAL/include/meta_data/cifar10_meta_data_reader.h +++ b/rocAL/include/meta_data/cifar10_meta_data_reader.h @@ -21,38 +21,38 @@ THE SOFTWARE. */ #pragma once -#include #include + +#include + #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" -class Cifar10MetaDataReader: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class Cifar10MetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - std::map> &get_map_content() override { return _map_content;} + std::map>& get_map_content() override { return _map_content; } Cifar10MetaDataReader(); - ~Cifar10MetaDataReader() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); std::map> _map_content; std::map>::iterator _itr; std::string _path; std::string _file_prefix; - size_t _raw_file_size; - LabelBatch* _output; + size_t _raw_file_size; + pMetaDataBatch _output; DIR *_src_dir, *_sub_dir; - struct dirent *_entity; + struct dirent* _entity; std::vector _file_names; std::vector _file_offsets; std::vector _file_idx; diff --git a/rocAL/include/meta_data/coco_meta_data_reader.h b/rocAL/include/meta_data/coco_meta_data_reader.h index 2aacbe6bd..aec539bed 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader.h +++ b/rocAL/include/meta_data/coco_meta_data_reader.h @@ -22,37 +22,37 @@ THE SOFTWARE. #pragma once #include + #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" #include "timing_debug.h" -class COCOMetaDataReader: public MetaDataReader -{ -public: - void init(const MetaDataConfig& cfg) override; +class COCOMetaDataReader : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - const std::map> & get_map_content() override { return _map_content;} + + const std::map>& get_map_content() override { return _map_content; } COCOMetaDataReader(); - ~COCOMetaDataReader() override { delete _output; } -private: - BoundingBoxBatch* _output; + + private: + pMetaDataBatch _output; std::string _path; int meta_data_reader_type; - void add(std::string image_name, BoundingBoxCords bbox, BoundingBoxLabels b_labels, ImgSize image_size); - bool exists(const std::string &image_name) override; + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, int image_id = 0); + void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count); // To add Mask coordinates to Metadata struct + bool exists(const std::string& image_name) override; std::map> _map_content; std::map>::iterator _itr; std::map _map_img_sizes; - std::map ::iterator itr; + std::map::iterator itr; std::map _label_info; - std::map ::iterator _it_label; + std::map::iterator _it_label; TimingDBG _coco_metadata_read_time; }; - diff --git a/rocAL/include/meta_data/coco_meta_data_reader_key_points.h b/rocAL/include/meta_data/coco_meta_data_reader_key_points.h index 096780b90..4f9a6c062 100644 --- a/rocAL/include/meta_data/coco_meta_data_reader_key_points.h +++ b/rocAL/include/meta_data/coco_meta_data_reader_key_points.h @@ -22,39 +22,38 @@ THE SOFTWARE. #pragma once #include + #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" #include "timing_debug.h" -class COCOMetaDataReaderKeyPoints: public MetaDataReader -{ -public: - void init(const MetaDataConfig& cfg) override; +class COCOMetaDataReaderKeyPoints : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - MetaDataBatch * get_output() override { return _output; } - const std::map> & get_map_content() override { return _map_content; } + + const std::map>& get_map_content() override { return _map_content; } COCOMetaDataReaderKeyPoints(); - ~COCOMetaDataReaderKeyPoints() override { delete _output; } -private: - KeyPointBatch* _output; + + private: + pMetaDataBatch _output; std::string _path; unsigned _out_img_width; unsigned _out_img_height; int meta_data_reader_type; - void add(std::string image_name, ImgSize image_size, JointsData *joints_data); - bool exists(const std::string &image_name) override; + void add(std::string image_name, ImgSize image_size, JointsData* joints_data); + bool exists(const std::string& image_name) override; std::map> _map_content; std::map>::iterator _itr; std::map _map_img_sizes; - std::map> ::iterator itr; + std::map>::iterator itr; std::map _label_info; - std::map ::iterator _it_label; + std::map::iterator _it_label; TimingDBG _coco_metadata_read_time; }; - diff --git a/rocAL/include/meta_data/label_reader_folders.h b/rocAL/include/meta_data/label_reader_folders.h index 5c999edb8..e9a42ac4d 100644 --- a/rocAL/include/meta_data/label_reader_folders.h +++ b/rocAL/include/meta_data/label_reader_folders.h @@ -21,36 +21,37 @@ THE SOFTWARE. */ #pragma once -#include #include + +#include + #include "commons.h" #include "meta_data.h" #include "meta_data_reader.h" -class LabelReaderFolders: public MetaDataReader -{ -public : - void init(const MetaDataConfig& cfg) override; +class LabelReaderFolders : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; void lookup(const std::vector& image_names) override; void read_all(const std::string& path) override; void release(std::string image_name); void release() override; void print_map_contents(); bool set_timestamp_mode() override { return false; } - const std::map> & get_map_content() override { return _map_content;} - MetaDataBatch * get_output() override { return _output; } + const std::map>& get_map_content() override { return _map_content; } + LabelReaderFolders(); - ~LabelReaderFolders() override { delete _output; } -private: + + private: void read_files(const std::string& _path); - bool exists(const std::string &image_name) override; + bool exists(const std::string& image_name) override; void add(std::string image_name, int label); std::map> _map_content; std::map>::iterator _itr; std::string _path; - LabelBatch* _output; + pMetaDataBatch _output; DIR *_src_dir, *_sub_dir; - struct dirent *_entity; + struct dirent* _entity; std::vector _file_names; std::vector _subfolder_file_names; }; \ No newline at end of file diff --git a/rocAL/include/meta_data/lookahead_parser.h b/rocAL/include/meta_data/lookahead_parser.h index d62c05bfa..89d4ad2cd 100644 --- a/rocAL/include/meta_data/lookahead_parser.h +++ b/rocAL/include/meta_data/lookahead_parser.h @@ -20,13 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once -#include #include -#include -#include +#include + #include -#include #include +#include +#include +#include RAPIDJSON_DIAG_PUSH #ifdef __GNUC__ @@ -65,29 +66,76 @@ RAPIDJSON_DIAG_OFF(effc++) using namespace rapidjson; - class LookaheadParserHandler { -public: - bool Null() { st_ = kHasNull; v_.SetNull(); return true; } - bool Bool(bool b) { st_ = kHasBool; v_.SetBool(b); return true; } - bool Int(int i) { st_ = kHasNumber; v_.SetInt(i); return true; } - bool Uint(unsigned u) { st_ = kHasNumber; v_.SetUint(u); return true; } - bool Int64(int64_t i) { st_ = kHasNumber; v_.SetInt64(i); return true; } - bool Uint64(uint64_t u) { st_ = kHasNumber; v_.SetUint64(u); return true; } - bool Double(double d) { st_ = kHasNumber; v_.SetDouble(d); return true; } + public: + bool Null() { + st_ = kHasNull; + v_.SetNull(); + return true; + } + bool Bool(bool b) { + st_ = kHasBool; + v_.SetBool(b); + return true; + } + bool Int(int i) { + st_ = kHasNumber; + v_.SetInt(i); + return true; + } + bool Uint(unsigned u) { + st_ = kHasNumber; + v_.SetUint(u); + return true; + } + bool Int64(int64_t i) { + st_ = kHasNumber; + v_.SetInt64(i); + return true; + } + bool Uint64(uint64_t u) { + st_ = kHasNumber; + v_.SetUint64(u); + return true; + } + bool Double(double d) { + st_ = kHasNumber; + v_.SetDouble(d); + return true; + } bool RawNumber(const char*, SizeType, bool) { return false; } - bool String(const char* str, SizeType length, bool) { st_ = kHasString; v_.SetString(str, length); return true; } - bool StartObject() { st_ = kEnteringObject; return true; } - bool Key(const char* str, SizeType length, bool) { st_ = kHasKey; v_.SetString(str, length); return true; } - bool EndObject(SizeType) { st_ = kExitingObject; return true; } - bool StartArray() { st_ = kEnteringArray; return true; } - bool EndArray(SizeType) { st_ = kExitingArray; return true; } - -protected: + bool String(const char* str, SizeType length, bool) { + st_ = kHasString; + v_.SetString(str, length); + return true; + } + bool StartObject() { + st_ = kEnteringObject; + return true; + } + bool Key(const char* str, SizeType length, bool) { + st_ = kHasKey; + v_.SetString(str, length); + return true; + } + bool EndObject(SizeType) { + st_ = kExitingObject; + return true; + } + bool StartArray() { + st_ = kEnteringArray; + return true; + } + bool EndArray(SizeType) { + st_ = kExitingArray; + return true; + } + + protected: LookaheadParserHandler(char* str); void ParseNext(); -protected: + protected: enum LookaheadParsingState { kInit, kError, @@ -125,7 +173,7 @@ inline void LookaheadParserHandler::ParseNext() { } class LookaheadParser : protected LookaheadParserHandler { -public: + public: LookaheadParser(char* str) : LookaheadParserHandler(str) {} bool EnterObject(); @@ -142,17 +190,17 @@ class LookaheadParser : protected LookaheadParserHandler { void SkipArray(); void SkipValue(); Value* PeekValue(); - int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array) + int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array) bool IsValid() { return st_ != kError; } -protected: + protected: void SkipOut(int depth); }; inline bool LookaheadParser::EnterObject() { if (st_ != kEnteringObject) { - st_ = kError; + st_ = kError; return false; } @@ -162,7 +210,7 @@ inline bool LookaheadParser::EnterObject() { inline bool LookaheadParser::EnterArray() { if (st_ != kEnteringArray) { - st_ = kError; + st_ = kError; return false; } @@ -213,7 +261,7 @@ inline int LookaheadParser::GetInt() { inline double LookaheadParser::GetDouble() { if (st_ != kHasNumber) { - st_ = kError; + st_ = kError; return 0.; } @@ -224,7 +272,7 @@ inline double LookaheadParser::GetDouble() { inline bool LookaheadParser::GetBool() { if (st_ != kHasBool) { - st_ = kError; + st_ = kError; return false; } @@ -235,7 +283,7 @@ inline bool LookaheadParser::GetBool() { inline void LookaheadParser::GetNull() { if (st_ != kHasNull) { - st_ = kError; + st_ = kError; return; } @@ -244,7 +292,7 @@ inline void LookaheadParser::GetNull() { inline const char* LookaheadParser::GetString() { if (st_ != kHasString) { - st_ = kError; + st_ = kError; return 0; } @@ -257,17 +305,14 @@ inline void LookaheadParser::SkipOut(int depth) { do { if (st_ == kEnteringArray || st_ == kEnteringObject) { ++depth; - } - else if (st_ == kExitingArray || st_ == kExitingObject) { + } else if (st_ == kExitingArray || st_ == kExitingObject) { --depth; - } - else if (st_ == kError) { + } else if (st_ == kError) { return; } ParseNext(); - } - while (depth > 0); + } while (depth > 0); } inline void LookaheadParser::SkipValue() { diff --git a/rocAL/include/meta_data/meta_data.h b/rocAL/include/meta_data/meta_data.h index 59907aaee..2d3ba9a26 100644 --- a/rocAL/include/meta_data/meta_data.h +++ b/rocAL/include/meta_data/meta_data.h @@ -21,40 +21,58 @@ THE SOFTWARE. */ #pragma once +#include +#include +#include #include #include #include -#include -#include "commons.h" +#include "commons.h" -//Defined constants since needed in reader and meta nodes for Pose Estimation +// Defined constants since needed in reader and meta nodes for Pose Estimation #define NUMBER_OF_JOINTS 17 #define NUMBER_OF_JOINTS_HALFBODY 8 -#define PIXEL_STD 200 +#define PIXEL_STD 200 #define SCALE_CONSTANT_CS 1.25 #define SCALE_CONSTANT_HALF_BODY 1.5 -typedef struct BoundingBoxCord_ -{ - float l; float t; float r; float b; - BoundingBoxCord_() {} - BoundingBoxCord_(float l_, float t_, float r_, float b_): l(l_), t(t_), r(r_), b(b_) {} // constructor - BoundingBoxCord_(const BoundingBoxCord_& cord) : l(cord.l), t(cord.t), r(cord.r), b(cord.b) {} //copy constructor +typedef struct BoundingBoxCord_ { + float l; + float t; + float r; + float b; + BoundingBoxCord_() {} + BoundingBoxCord_(float l_, float t_, float r_, float b_) : l(l_), t(t_), r(r_), b(b_) {} // constructor + BoundingBoxCord_(const BoundingBoxCord_& cord) : l(cord.l), t(cord.t), r(cord.r), b(cord.b) {} // copy constructor } BoundingBoxCord; -typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh; -typedef std::vector BoundingBoxCords; -typedef std::vector BoundingBoxCords_xcycwh; -typedef std::vector BoundingBoxLabels; -typedef struct { int w; int h; } ImgSize; -typedef std::vector ImgSizes; +typedef std::vector BoundingBoxCords; +typedef std::vector Labels; +typedef struct { + int w; + int h; +} ImgSize; +typedef std::vector ImgSizes; -typedef std::vector ImageIDBatch,AnnotationIDBatch; +typedef std::vector MaskCords; +typedef std::vector ImageIDBatch, AnnotationIDBatch; typedef std::vector ImagePathBatch; -typedef std::vector Joint,JointVisibility,ScoreBatch,RotationBatch; -typedef std::vector> Joints,JointsVisibility, CenterBatch, ScaleBatch; +typedef std::vector Joint, JointVisibility, ScoreBatch, RotationBatch; +typedef std::vector> Joints, JointsVisibility, CenterBatch, ScaleBatch; typedef std::vector>> JointsBatch, JointsVisibilityBatch; +enum class MetaDataType { + Label, + BoundingBox, + PolygonMask, + KeyPoints +}; + +enum class BoundingBoxType { + XYWH = 0, + LTRB +}; + typedef struct { int image_id; @@ -66,7 +84,7 @@ typedef struct JointsVisibility joints_visibility; float score; float rotation; -}JointsData; +} JointsData; typedef struct { @@ -79,187 +97,398 @@ typedef struct JointsVisibilityBatch joints_visibility_batch; ScoreBatch score_batch; RotationBatch rotation_batch; -}JointsDataBatch; +} JointsDataBatch; -struct MetaData -{ - int& get_label() { return _label_id; } - BoundingBoxCords& get_bb_cords() { return _bb_cords; } - BoundingBoxCords_xcycwh& get_bb_cords_xcycwh() { return _bb_cords_xcycwh; } - BoundingBoxLabels& get_bb_labels() { return _bb_label_ids; } - void set_bb_labels(BoundingBoxLabels bb_label_ids) {_bb_label_ids = std::move(bb_label_ids); } - ImgSize& get_img_size() { return _img_size; } - const JointsData& get_joints_data(){ return _joints_data; } -protected: - BoundingBoxCords _bb_cords = {}; // For bb use - BoundingBoxCords_xcycwh _bb_cords_xcycwh = {}; // For bb use - BoundingBoxLabels _bb_label_ids = {};// For bb use - ImgSize _img_size = {}; - JointsData _joints_data = {}; - int _label_id = -1; // For label use only +typedef class MetaDataInfo { + public: + int img_id = -1; + std::string img_name = ""; + ImgSize img_size = {}; +} MetaDataInfo; + +class MetaData { + public: + virtual std::vector& get_labels() = 0; + virtual void set_labels(Labels label_ids) = 0; + virtual BoundingBoxCords& get_bb_cords() = 0; + virtual void set_bb_cords(BoundingBoxCords bb_cords) = 0; + virtual std::vector& get_polygon_count() = 0; + virtual std::vector>& get_vertices_count() = 0; + virtual MaskCords& get_mask_cords() = 0; + virtual void set_mask_cords(MaskCords mask_cords) = 0; + virtual void set_polygon_counts(std::vector polygon_count) = 0; + virtual void set_vertices_counts(std::vector> vertices_count) = 0; + virtual JointsData& get_joints_data() = 0; + virtual void set_joints_data(JointsData* joints_data) = 0; + ImgSize& get_img_size() { return _info.img_size; } + std::string& get_image_name() { return _info.img_name; } + int& get_image_id() { return _info.img_id; } + void set_img_size(ImgSize img_size) { _info.img_size = std::move(img_size); } + void set_img_id(int img_id) { _info.img_id = img_id; } + void set_img_name(std::string img_name) { _info.img_name = img_name; } + void set_metadata_info(MetaDataInfo info) { _info = std::move(info); } + + protected: + MetaDataInfo _info; }; -struct Label : public MetaData -{ - Label(int label) { _label_id = label; } - Label(){ _label_id = -1; } +class Label : public MetaData { + public: + Label(int label) { _label_ids = {label}; } + Label() { _label_ids = {-1}; } + std::vector& get_labels() override { return _label_ids; } + void set_labels(Labels label_ids) override { _label_ids = std::move(label_ids); } + BoundingBoxCords& get_bb_cords() override { THROW("Not Implemented") } + void set_bb_cords(BoundingBoxCords bb_cords) override{THROW("Not Implemented")} std::vector& get_polygon_count() override{THROW("Not Implemented")} std::vector>& get_vertices_count() override{THROW("Not Implemented")} MaskCords& get_mask_cords() override { THROW("Not Implemented") } + void set_mask_cords(MaskCords mask_cords) override { THROW("Not Implemented") } + void set_polygon_counts(std::vector polygon_count) override { THROW("Not Implemented") } + void set_vertices_counts(std::vector> vertices_count) override{THROW("Not Implemented")} JointsData& get_joints_data() override { THROW("Not Implemented") } + void set_joints_data(JointsData* joints_data) override { THROW("Not Implemented") } + + protected: + Labels _label_ids = {}; // For label use only }; -struct BoundingBox : public MetaData -{ - BoundingBox()= default; - BoundingBox(BoundingBoxCords bb_cords, BoundingBoxLabels bb_label_ids) - { - _bb_cords =std::move(bb_cords); - _bb_label_ids = std::move(bb_label_ids); - } - BoundingBox(BoundingBoxCords bb_cords, BoundingBoxLabels bb_label_ids, ImgSize img_size) - { - _bb_cords =std::move(bb_cords); - _bb_label_ids = std::move(bb_label_ids); - _img_size = std::move(img_size); - } - void set_bb_cords(BoundingBoxCords bb_cords) { _bb_cords =std::move(bb_cords); } - BoundingBox(BoundingBoxCords_xcycwh bb_cords_xcycwh, BoundingBoxLabels bb_label_ids) - { - _bb_cords_xcycwh =std::move(bb_cords_xcycwh); - _bb_label_ids = std::move(bb_label_ids); - } - BoundingBox(BoundingBoxCords_xcycwh bb_cords_xcycwh, BoundingBoxLabels bb_label_ids, ImgSize img_size) - { - _bb_cords_xcycwh =std::move(bb_cords_xcycwh); - _bb_label_ids = std::move(bb_label_ids); - _img_size = std::move(img_size); - } - void set_bb_cords_xcycwh(BoundingBoxCords_xcycwh bb_cords_xcycwh) { _bb_cords_xcycwh =std::move(bb_cords_xcycwh); } - void set_bb_labels(BoundingBoxLabels bb_label_ids) { _bb_label_ids = std::move(bb_label_ids); } - void set_img_size(ImgSize img_size) { _img_size = std::move(img_size); } +class BoundingBox : public Label { + public: + BoundingBox() = default; + BoundingBox(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size = ImgSize{0, 0}, int img_id = 0) { + _bb_cords = std::move(bb_cords); + _label_ids = std::move(bb_label_ids); + _info.img_size = std::move(img_size); + _info.img_id = img_id; + } + BoundingBoxCords& get_bb_cords() override { return _bb_cords; } + void set_bb_cords(BoundingBoxCords bb_cords) override { _bb_cords = std::move(bb_cords); } + + protected: + BoundingBoxCords _bb_cords = {}; // For bb use }; -struct KeyPoint : public MetaData -{ - KeyPoint()= default; - KeyPoint(ImgSize img_size, JointsData *joints_data) - { - _img_size = std::move(img_size); +struct PolygonMask : public BoundingBox { + public: + PolygonMask(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size, MaskCords mask_cords, std::vector polygon_count, std::vector> vertices_count) { + _bb_cords = std::move(bb_cords); + _label_ids = std::move(bb_label_ids); + _info.img_size = std::move(img_size); + _mask_cords = std::move(mask_cords); + _polygon_count = std::move(polygon_count); + _vertices_count = std::move(vertices_count); + } + std::vector& get_polygon_count() override { return _polygon_count; } + std::vector>& get_vertices_count() override { return _vertices_count; } + MaskCords& get_mask_cords() override { return _mask_cords; } + void set_mask_cords(MaskCords mask_cords) override { _mask_cords = std::move(mask_cords); } + void set_polygon_counts(std::vector polygon_count) override { _polygon_count = std::move(polygon_count); } + void set_vertices_counts(std::vector> vertices_count) override { _vertices_count = std::move(vertices_count); } + + protected: + MaskCords _mask_cords = {}; + std::vector _polygon_count = {}; + std::vector> _vertices_count = {}; +}; + +class KeyPoint : public BoundingBox { + public: + KeyPoint() = default; + KeyPoint(ImgSize img_size, JointsData* joints_data) { + _info.img_size = std::move(img_size); _joints_data = std::move(*joints_data); } - void set_joints_data(JointsData *joints_data) { _joints_data = std::move(*joints_data); } + void set_joints_data(JointsData* joints_data) override { _joints_data = std::move(*joints_data); } + JointsData& get_joints_data() override { return _joints_data; } + + protected: + JointsData _joints_data = {}; }; -struct MetaDataBatch -{ +class MetaDataInfoBatch { + public: + std::vector img_ids = {}; + std::vector img_names = {}; + std::vector img_sizes = {}; + void clear() { + img_ids.clear(); + img_names.clear(); + img_sizes.clear(); + } + void resize(int batch_size) { + img_ids.resize(batch_size); + img_names.resize(batch_size); + img_sizes.resize(batch_size); + } + void insert(MetaDataInfoBatch& other) { + img_sizes.insert(img_sizes.end(), other.img_sizes.begin(), other.img_sizes.end()); + img_ids.insert(img_ids.end(), other.img_ids.begin(), other.img_ids.end()); + img_names.insert(img_names.end(), other.img_names.begin(), other.img_names.end()); + } +}; + +class MetaDataBatch { + public: virtual ~MetaDataBatch() = default; virtual void clear() = 0; virtual void resize(int batch_size) = 0; virtual int size() = 0; - virtual MetaDataBatch& operator += (MetaDataBatch& other) = 0; - MetaDataBatch* concatenate(MetaDataBatch* other) - { + virtual void copy_data(std::vector buffer) = 0; + virtual std::vector& get_buffer_size() = 0; + virtual MetaDataBatch& operator+=(MetaDataBatch& other) = 0; + MetaDataBatch* concatenate(MetaDataBatch* other) { *this += *other; return this; } - virtual std::shared_ptr clone() = 0; - std::vector& get_label_batch() { return _label_id; } - std::vector& get_bb_cords_batch() { return _bb_cords; } - std::vector& get_bb_cords_batch_xcycxwh() { return _bb_cords_xcycwh; } - std::vector& get_bb_labels_batch() { return _bb_label_ids; } - ImgSizes & get_img_sizes_batch() { return _img_sizes; } - JointsDataBatch & get_joints_data_batch() { return _joints_data; } -protected: - std::vector _label_id = {}; // For label use only - std::vector _bb_cords = {}; - std::vector _bb_cords_xcycwh = {}; - std::vector _bb_label_ids = {}; - std::vector _img_sizes = {}; - JointsDataBatch _joints_data = {}; + virtual std::shared_ptr clone(bool copy_contents = true) = 0; + virtual int mask_size() = 0; + virtual std::vector& get_labels_batch() = 0; + virtual std::vector& get_bb_cords_batch() = 0; + virtual void set_xywh_bbox() = 0; + virtual std::vector& get_mask_cords_batch() = 0; + virtual std::vector>& get_mask_polygons_count_batch() = 0; + virtual std::vector>>& get_mask_vertices_count_batch() = 0; + virtual JointsDataBatch& get_joints_data_batch() = 0; + std::vector& get_image_id_batch() { return _info_batch.img_ids; } + std::vector& get_image_names_batch() { return _info_batch.img_names; } + ImgSizes& get_img_sizes_batch() { return _info_batch.img_sizes; } + MetaDataInfoBatch& get_info_batch() { return _info_batch; } + void set_metadata_type(MetaDataType metadata_type) { _type = metadata_type; } + MetaDataType get_metadata_type() { return _type; } + + protected: + MetaDataInfoBatch _info_batch; + MetaDataType _type; }; -struct LabelBatch : public MetaDataBatch -{ - void clear() override - { - _label_id.clear(); +class LabelBatch : public MetaDataBatch { + public: + void clear() override { + for (auto label : _label_ids) { + label.clear(); + } + _info_batch.clear(); + _label_ids.clear(); + _buffer_size.clear(); } - MetaDataBatch& operator += (MetaDataBatch& other) override - { - _label_id.insert(_label_id.end(), other.get_label_batch().begin(), other.get_label_batch().end()); + MetaDataBatch& operator+=(MetaDataBatch& other) override { + _label_ids.insert(_label_ids.end(), other.get_labels_batch().begin(), other.get_labels_batch().end()); + _info_batch.insert(other.get_info_batch()); return *this; } - void resize(int batch_size) override - { - _label_id.resize(batch_size); + void resize(int batch_size) override { + _label_ids.resize(batch_size); + _info_batch.resize(batch_size); } - int size() override - { - return _label_id.size(); + int size() override { + return _label_ids.size(); } - std::shared_ptr clone() override - { - return std::make_shared(*this); + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr label_batch_instance = std::make_shared(); + label_batch_instance->resize(this->size()); + label_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return label_batch_instance; + } } - explicit LabelBatch(std::vector& labels) - { - _label_id = std::move(labels); + explicit LabelBatch(std::vector& labels) { + _label_ids = std::move(labels); } LabelBatch() = default; + void copy_data(std::vector buffer) override { + if (buffer.size() < 1) + THROW("The buffers are insufficient") // TODO -change + auto labels_buffer = (int*)buffer[0]; + for (unsigned i = 0; i < _label_ids.size(); i++) { + memcpy(labels_buffer, _label_ids[i].data(), _label_ids[i].size() * sizeof(int)); + labels_buffer += _label_ids[i].size(); + } + } + std::vector& get_buffer_size() override { + _buffer_size.clear(); + size_t size = 0; + for (auto label : _label_ids) + size += label.size(); + _buffer_size.emplace_back(size * sizeof(int)); + return _buffer_size; + } + std::vector& get_labels_batch() override { return _label_ids; } + int mask_size() override{THROW("Not Implemented")} std::vector& get_bb_cords_batch() override { THROW("Not Implemented") } + void set_xywh_bbox() override{THROW("Not Implemented")} std::vector& get_mask_cords_batch() override{THROW("Not Implemented")} std::vector>& get_mask_polygons_count_batch() override{THROW("Not Implemented")} std::vector>>& get_mask_vertices_count_batch() override{THROW("Not Implemented")} JointsDataBatch& get_joints_data_batch() override { THROW("Not Implemented") } + + protected: + std::vector _label_ids = {}; + std::vector _buffer_size; }; -struct BoundingBoxBatch: public MetaDataBatch -{ - void clear() override - { +class BoundingBoxBatch : public LabelBatch { + public: + void clear() override { _bb_cords.clear(); - _bb_label_ids.clear(); - _img_sizes.clear(); + _label_ids.clear(); + _info_batch.clear(); + _buffer_size.clear(); } - MetaDataBatch& operator += (MetaDataBatch& other) override - { + MetaDataBatch& operator+=(MetaDataBatch& other) override { _bb_cords.insert(_bb_cords.end(), other.get_bb_cords_batch().begin(), other.get_bb_cords_batch().end()); - _bb_label_ids.insert(_bb_label_ids.end(), other.get_bb_labels_batch().begin(), other.get_bb_labels_batch().end()); - _img_sizes.insert(_img_sizes.end(), other.get_img_sizes_batch().begin(), other.get_img_sizes_batch().end()); + _label_ids.insert(_label_ids.end(), other.get_labels_batch().begin(), other.get_labels_batch().end()); + _info_batch.insert(other.get_info_batch()); return *this; } - void resize(int batch_size) override - { + void resize(int batch_size) override { _bb_cords.resize(batch_size); - _bb_label_ids.resize(batch_size); - _img_sizes.resize(batch_size); + _label_ids.resize(batch_size); + _info_batch.resize(batch_size); } - int size() override - { + int size() override { return _bb_cords.size(); } - std::shared_ptr clone() override - { - return std::make_shared(*this); + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr bbox_batch_instance = std::make_shared(); + bbox_batch_instance->resize(this->size()); + bbox_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return bbox_batch_instance; + } } + void convert_ltrb_to_xywh(BoundingBoxCords& ltrb_bbox_list) { + for (unsigned i = 0; i < ltrb_bbox_list.size(); i++) { + auto& bbox = ltrb_bbox_list[i]; + // Change the values in place + bbox.r = bbox.r - bbox.l; + bbox.b = bbox.b - bbox.t; + } + } + void copy_data(std::vector buffer) override { + if (buffer.size() < 2) + THROW("The buffers are insufficient") // TODO -change + int* labels_buffer = (int*)buffer[0]; + float* bbox_buffer = (float*)buffer[1]; + for (unsigned i = 0; i < _label_ids.size(); i++) { + memcpy(labels_buffer, _label_ids[i].data(), _label_ids[i].size() * sizeof(int)); + if (_bbox_output_type == BoundingBoxType::XYWH) convert_ltrb_to_xywh(_bb_cords[i]); + memcpy(bbox_buffer, _bb_cords[i].data(), _label_ids[i].size() * 4 * sizeof(float)); + labels_buffer += _label_ids[i].size(); + bbox_buffer += (_label_ids[i].size() * 4); + } + } + std::vector& get_buffer_size() override { + _buffer_size.clear(); + size_t size = 0; + for (auto label : _label_ids) + size += label.size(); + _buffer_size.emplace_back(size * sizeof(int)); + _buffer_size.emplace_back(size * 4 * sizeof(float)); + return _buffer_size; + } + std::vector& get_bb_cords_batch() override { return _bb_cords; } + void set_xywh_bbox() override { _bbox_output_type = BoundingBoxType::XYWH; } + + protected: + std::vector _bb_cords = {}; + BoundingBoxType _bbox_output_type = BoundingBoxType::LTRB; }; -struct KeyPointBatch : public MetaDataBatch -{ - void clear() override - { - _img_sizes.clear(); +struct PolygonMaskBatch : public BoundingBoxBatch { + public: + void clear() override { + _bb_cords.clear(); + _label_ids.clear(); + _info_batch.clear(); + _mask_cords.clear(); + _polygon_counts.clear(); + _vertices_counts.clear(); + _buffer_size.clear(); + } + MetaDataBatch& operator+=(MetaDataBatch& other) override { + _bb_cords.insert(_bb_cords.end(), other.get_bb_cords_batch().begin(), other.get_bb_cords_batch().end()); + _label_ids.insert(_label_ids.end(), other.get_labels_batch().begin(), other.get_labels_batch().end()); + _info_batch.insert(other.get_info_batch()); + _mask_cords.insert(_mask_cords.end(), other.get_mask_cords_batch().begin(), other.get_mask_cords_batch().end()); + _polygon_counts.insert(_polygon_counts.end(), other.get_mask_polygons_count_batch().begin(), other.get_mask_polygons_count_batch().end()); + _vertices_counts.insert(_vertices_counts.end(), other.get_mask_vertices_count_batch().begin(), other.get_mask_vertices_count_batch().end()); + return *this; + } + void resize(int batch_size) override { + _bb_cords.resize(batch_size); + _label_ids.resize(batch_size); + _info_batch.resize(batch_size); + _mask_cords.resize(batch_size); + _polygon_counts.resize(batch_size); + _vertices_counts.resize(batch_size); + } + std::vector& get_mask_cords_batch() override { return _mask_cords; } + std::vector>& get_mask_polygons_count_batch() override { return _polygon_counts; } + std::vector>>& get_mask_vertices_count_batch() override { return _vertices_counts; } + int mask_size() override { return _mask_cords.size(); } + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr mask_batch_instance = std::make_shared(); + mask_batch_instance->resize(this->size()); + mask_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return mask_batch_instance; + } + } + void copy_data(std::vector buffer) override { + if (buffer.size() < 2) + THROW("The buffers are insufficient") // TODO -change + int* labels_buffer = (int*)buffer[0]; + float* bbox_buffer = (float*)buffer[1]; + float* mask_buffer = (float*)buffer[2]; + for (unsigned i = 0; i < _label_ids.size(); i++) { + mempcpy(labels_buffer, _label_ids[i].data(), _label_ids[i].size() * sizeof(int)); + if (_bbox_output_type == BoundingBoxType::XYWH) convert_ltrb_to_xywh(_bb_cords[i]); + memcpy(bbox_buffer, _bb_cords[i].data(), _label_ids[i].size() * 4 * sizeof(float)); + memcpy(mask_buffer, _mask_cords[i].data(), _mask_cords[i].size() * sizeof(float)); + labels_buffer += _label_ids[i].size(); + bbox_buffer += (_label_ids[i].size() * 4); + mask_buffer += _mask_cords[i].size(); + } + } + std::vector& get_buffer_size() override { + _buffer_size.clear(); + size_t size = 0; + for (auto label : _label_ids) + size += label.size(); + _buffer_size.emplace_back(size * sizeof(int)); + _buffer_size.emplace_back(size * 4 * sizeof(float)); + size = 0; + for (auto mask : _mask_cords) + size += mask.size(); + _buffer_size.emplace_back(size * sizeof(float)); + return _buffer_size; + } + + protected: + std::vector _mask_cords = {}; + std::vector> _polygon_counts = {}; + std::vector>> _vertices_counts = {}; +}; + +class KeyPointBatch : public BoundingBoxBatch { + public: + void clear() override { + _info_batch.clear(); _joints_data = {}; _bb_cords.clear(); - _bb_label_ids.clear(); + _label_ids.clear(); } - MetaDataBatch& operator += (MetaDataBatch& other) override - { - _img_sizes.insert(_img_sizes.end(), other.get_img_sizes_batch().begin(), other.get_img_sizes_batch().end()); + MetaDataBatch& operator+=(MetaDataBatch& other) override { _joints_data.image_id_batch.insert(_joints_data.image_id_batch.end(), other.get_joints_data_batch().image_id_batch.begin(), other.get_joints_data_batch().image_id_batch.end()); _joints_data.annotation_id_batch.insert(_joints_data.annotation_id_batch.end(), other.get_joints_data_batch().annotation_id_batch.begin(), other.get_joints_data_batch().annotation_id_batch.end()); _joints_data.center_batch.insert(_joints_data.center_batch.end(), other.get_joints_data_batch().center_batch.begin(), other.get_joints_data_batch().center_batch.end()); _joints_data.scale_batch.insert(_joints_data.scale_batch.end(), other.get_joints_data_batch().scale_batch.begin(), other.get_joints_data_batch().scale_batch.end()); - _joints_data.joints_batch.insert(_joints_data.joints_batch.end(), other.get_joints_data_batch().joints_batch.begin() ,other.get_joints_data_batch().joints_batch.end()); + _joints_data.joints_batch.insert(_joints_data.joints_batch.end(), other.get_joints_data_batch().joints_batch.begin(), other.get_joints_data_batch().joints_batch.end()); _joints_data.joints_visibility_batch.insert(_joints_data.joints_visibility_batch.end(), other.get_joints_data_batch().joints_visibility_batch.begin(), other.get_joints_data_batch().joints_visibility_batch.end()); _joints_data.score_batch.insert(_joints_data.score_batch.end(), other.get_joints_data_batch().score_batch.begin(), other.get_joints_data_batch().score_batch.end()); _joints_data.rotation_batch.insert(_joints_data.rotation_batch.end(), other.get_joints_data_batch().rotation_batch.begin(), other.get_joints_data_batch().rotation_batch.end()); + _info_batch.insert(other.get_info_batch()); return *this; } - void resize(int batch_size) override - { + void resize(int batch_size) override { _joints_data.image_id_batch.resize(batch_size); _joints_data.annotation_id_batch.resize(batch_size); _joints_data.center_batch.resize(batch_size); @@ -268,22 +497,34 @@ struct KeyPointBatch : public MetaDataBatch _joints_data.joints_visibility_batch.resize(batch_size); _joints_data.score_batch.resize(batch_size); _joints_data.rotation_batch.resize(batch_size); + _info_batch.resize(batch_size); _bb_cords.resize(batch_size); - _bb_label_ids.resize(batch_size); + _label_ids.resize(batch_size); } - int size() override - { + int size() override { return _joints_data.image_id_batch.size(); } - std::shared_ptr clone() override - { - return std::make_shared(*this); + std::shared_ptr clone(bool copy_contents) override { + if (copy_contents) { + return std::make_shared(*this); // Copy the entire metadata batch with all the metadata values and info + } else { + std::shared_ptr joints_batch_instance = std::make_shared(); + joints_batch_instance->resize(this->size()); + joints_batch_instance->get_info_batch() = this->get_info_batch(); // Copy only info to newly created instance excluding the metadata values + return joints_batch_instance; + } } + JointsDataBatch& get_joints_data_batch() override { return _joints_data; } + void copy_data(std::vector buffer) override {} + std::vector& get_buffer_size() override { return _buffer_size; } + + protected: + JointsDataBatch _joints_data = {}; }; using ImageNameBatch = std::vector; using pMetaData = std::shared_ptr