-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearchindex.js
1 lines (1 loc) · 142 KB
/
searchindex.js
1
Search.setIndex({"docnames": ["code/adobe_extract/adobe_to_textract", "code/adobe_extract/adobe_to_textract_utility", "code/adobe_extract/create_blocks", "code/adobe_extract/index", "code/adobe_extract/join_adobe_textract", "code/adobe_extract/join_adobe_textract_structure", "code/adobe_extract/join_adobe_textract_utility", "code/adobe_extract/main", "code/adobe_extract/table_tracking", "code/adobe_extract/transform_tables", "code/app", "code/classifier/index", "code/classifier/main", "code/classifier/mapping_dict", "code/classifier/metrics", "code/confidence/cell_status", "code/confidence/check_transformation", "code/confidence/combined_confidence", "code/confidence/index", "code/dl_classification/dl_tools", "code/dl_classification/index", "code/dl_classification/table_classifier", "code/equipment_parser/data_models", "code/equipment_parser/index", "code/equipment_parser/main", "code/equipment_parser/serial/index", "code/equipment_parser/serial/main", "code/equipment_parser/serial/process_color", "code/equipment_parser/serial/process_layout", "code/equipment_parser/serial/process_line", "code/equipment_parser/serial/process_version", "code/equipment_parser/serial/serial_data_models", "code/equipment_parser/serial/utils", "code/equipment_parser/shared", "code/equipment_parser/table/cell_state/cell_classification", "code/equipment_parser/table/cell_state/index", "code/equipment_parser/table/cell_state/main", "code/equipment_parser/table/eq_packages", "code/equipment_parser/table/index", "code/equipment_parser/table/labeling", "code/equipment_parser/table/main", "code/equipment_parser/table/recycle_headers", "code/equipment_parser/table/row_filtering", "code/equipment_parser/table/table_semantics", "code/equipment_parser/table/table_split", "code/equipment_parser/table/utils", "code/equipment_parser/table/versions_matching", "code/equipment_parser/testing/generator", "code/equipment_parser/testing/index", "code/equipment_parser/testing/output_formatters", "code/expert_rules/after_matching", "code/expert_rules/before_matching", "code/expert_rules/index", "code/expert_rules/post_cell_split_transform", "code/expert_rules/post_labeling_transform", "code/expert_rules/post_table_parser", "code/expert_rules/pre_labeling_td_transform", "code/expert_rules/pre_labeling_transform", "code/expert_rules/select_settings", "code/expert_rules/unite_combinations", "code/expert_rules/utils", "code/gpt_processing/index", "code/gpt_processing/querier", "code/gpt_processing/td_gpt_data", "code/index", "code/layout_transforming/index", "code/layout_transforming/index_mapper", "code/layout_transforming/layout_mapper", "code/layout_transforming/layout_parser", "code/layout_transforming/merge_mappers", "code/layout_transforming/technical_mappers", "code/local_test", "code/make_logs/create_excels", "code/make_logs/excel_extractor", "code/make_logs/excel_utility", "code/make_logs/index", "code/make_logs/read_excel", "code/make_logs/table_makers", "code/make_logs/write_labels", "code/make_logs/write_logs", "code/multi_values_split/index", "code/multi_values_split/main", "code/multi_values_split/utils", "code/page_selection/index", "code/page_selection/pl_finder/index", "code/page_selection/pl_finder/main", "code/page_selection/pl_finder/merge_tables", "code/page_selection/pl_finder/price_page_finder", "code/page_selection/pl_finder/price_table_finder", "code/page_selection/pl_finder/table_preparator", "code/page_selection/pl_finder/utils", "code/page_selection/td_finder/features", "code/page_selection/td_finder/index", "code/page_selection/td_finder/main", "code/parser_postprocessing/add_VE_id", "code/parser_postprocessing/add_confidence", "code/parser_postprocessing/duplicates", "code/parser_postprocessing/index", "code/parser_postprocessing/main", "code/parser_postprocessing/model_generation_matching", "code/parser_postprocessing/model_generation_matching_v2", "code/parser_postprocessing/parser_postprocessing", "code/parser_postprocessing/reduce_possible_ids", "code/preprocess_pdf/adobe_parser", "code/preprocess_pdf/assemble_extracts", "code/preprocess_pdf/improve_textract", "code/preprocess_pdf/index", "code/preprocess_pdf/merge_tables", "code/preprocess_pdf/pdf_parser", "code/preprocess_pdf/segment_classifiers", "code/preprocess_pdf/segment_divider", "code/preprocess_pdf/textract_generator", "code/preprocess_pdf/textract_parser", "code/price_list_parser/index", "code/price_list_parser/matching", "code/price_list_parser/models", "code/price_list_parser/multiple_price_lists_utils", "code/price_list_parser/parser_v2", "code/price_list_parser/parser_v2_utils", "code/price_list_parser/settings", "code/price_list_parser/storage", "code/price_list_parser/transform_to_json", "code/shared/eval", "code/shared/helpers", "code/shared/index", "code/shared/preskok_api", "code/shared/price_lists", "code/shared/s3", "code/shared/string_utils", "code/table_parser_v2/abstract_parser", "code/table_parser_v2/column_extraction", "code/table_parser_v2/helpers", "code/table_parser_v2/index", "code/table_parser_v2/main", "code/table_parser_v2/parse_tables", "code/table_parser_v2/pl_summary", "code/table_parser_v2/search_textract_blocks", "code/table_parser_v2/table_surrounding", "code/table_parser_v2/td_parser", "code/table_parser_v2/transmission", "code/table_parser_v2/utils", "code/textract/helpers", "code/textract/index", "code/textract/line_splitting", "code/textract/main", "code/textract/textract_divider", "code/textract/utils", "flow/classifying_pages", "flow/cleanup", "flow/collect_combinations", "flow/expert_rules1", "flow/expert_rules2", "flow/index", "flow/json_format", "flow/labeling", "flow/logging", "flow/pandas_dataframes", "flow/processing_table", "flow/setting_up", "flow/splitting", "flow/standard_form", "flow/structuring_pdf", "flow/validating", "flow/vehicle_editor", "index", "sample"], "filenames": ["code/adobe_extract/adobe_to_textract.rst", "code/adobe_extract/adobe_to_textract_utility.rst", "code/adobe_extract/create_blocks.rst", "code/adobe_extract/index.rst", "code/adobe_extract/join_adobe_textract.rst", "code/adobe_extract/join_adobe_textract_structure.rst", "code/adobe_extract/join_adobe_textract_utility.rst", "code/adobe_extract/main.rst", "code/adobe_extract/table_tracking.rst", "code/adobe_extract/transform_tables.rst", "code/app.rst", "code/classifier/index.rst", "code/classifier/main.rst", "code/classifier/mapping_dict.rst", "code/classifier/metrics.rst", "code/confidence/cell_status.rst", "code/confidence/check_transformation.rst", "code/confidence/combined_confidence.rst", "code/confidence/index.rst", "code/dl_classification/dl_tools.rst", "code/dl_classification/index.rst", "code/dl_classification/table_classifier.rst", "code/equipment_parser/data_models.rst", "code/equipment_parser/index.rst", "code/equipment_parser/main.rst", "code/equipment_parser/serial/index.rst", "code/equipment_parser/serial/main.rst", "code/equipment_parser/serial/process_color.rst", "code/equipment_parser/serial/process_layout.rst", "code/equipment_parser/serial/process_line.rst", "code/equipment_parser/serial/process_version.rst", "code/equipment_parser/serial/serial_data_models.rst", "code/equipment_parser/serial/utils.rst", "code/equipment_parser/shared.rst", "code/equipment_parser/table/cell_state/cell_classification.rst", "code/equipment_parser/table/cell_state/index.rst", "code/equipment_parser/table/cell_state/main.rst", "code/equipment_parser/table/eq_packages.rst", "code/equipment_parser/table/index.rst", "code/equipment_parser/table/labeling.rst", "code/equipment_parser/table/main.rst", "code/equipment_parser/table/recycle_headers.rst", "code/equipment_parser/table/row_filtering.rst", "code/equipment_parser/table/table_semantics.rst", "code/equipment_parser/table/table_split.rst", "code/equipment_parser/table/utils.rst", "code/equipment_parser/table/versions_matching.rst", "code/equipment_parser/testing/generator.rst", "code/equipment_parser/testing/index.rst", "code/equipment_parser/testing/output_formatters.rst", "code/expert_rules/after_matching.rst", "code/expert_rules/before_matching.rst", "code/expert_rules/index.rst", "code/expert_rules/post_cell_split_transform.rst", "code/expert_rules/post_labeling_transform.rst", "code/expert_rules/post_table_parser.rst", "code/expert_rules/pre_labeling_td_transform.rst", "code/expert_rules/pre_labeling_transform.rst", "code/expert_rules/select_settings.rst", "code/expert_rules/unite_combinations.rst", "code/expert_rules/utils.rst", "code/gpt_processing/index.rst", "code/gpt_processing/querier.rst", "code/gpt_processing/td_gpt_data.rst", "code/index.rst", "code/layout_transforming/index.rst", "code/layout_transforming/index_mapper.rst", "code/layout_transforming/layout_mapper.rst", "code/layout_transforming/layout_parser.rst", "code/layout_transforming/merge_mappers.rst", "code/layout_transforming/technical_mappers.rst", "code/local_test.rst", "code/make_logs/create_excels.rst", "code/make_logs/excel_extractor.rst", "code/make_logs/excel_utility.rst", "code/make_logs/index.rst", "code/make_logs/read_excel.rst", "code/make_logs/table_makers.rst", "code/make_logs/write_labels.rst", "code/make_logs/write_logs.rst", "code/multi_values_split/index.rst", "code/multi_values_split/main.rst", "code/multi_values_split/utils.rst", "code/page_selection/index.rst", "code/page_selection/pl_finder/index.rst", "code/page_selection/pl_finder/main.rst", "code/page_selection/pl_finder/merge_tables.rst", "code/page_selection/pl_finder/price_page_finder.rst", "code/page_selection/pl_finder/price_table_finder.rst", "code/page_selection/pl_finder/table_preparator.rst", "code/page_selection/pl_finder/utils.rst", "code/page_selection/td_finder/features.rst", "code/page_selection/td_finder/index.rst", "code/page_selection/td_finder/main.rst", "code/parser_postprocessing/add_VE_id.rst", "code/parser_postprocessing/add_confidence.rst", "code/parser_postprocessing/duplicates.rst", "code/parser_postprocessing/index.rst", "code/parser_postprocessing/main.rst", "code/parser_postprocessing/model_generation_matching.rst", "code/parser_postprocessing/model_generation_matching_v2.rst", "code/parser_postprocessing/parser_postprocessing.rst", "code/parser_postprocessing/reduce_possible_ids.rst", "code/preprocess_pdf/adobe_parser.rst", "code/preprocess_pdf/assemble_extracts.rst", "code/preprocess_pdf/improve_textract.rst", "code/preprocess_pdf/index.rst", "code/preprocess_pdf/merge_tables.rst", "code/preprocess_pdf/pdf_parser.rst", "code/preprocess_pdf/segment_classifiers.rst", "code/preprocess_pdf/segment_divider.rst", "code/preprocess_pdf/textract_generator.rst", "code/preprocess_pdf/textract_parser.rst", "code/price_list_parser/index.rst", "code/price_list_parser/matching.rst", "code/price_list_parser/models.rst", "code/price_list_parser/multiple_price_lists_utils.rst", "code/price_list_parser/parser_v2.rst", "code/price_list_parser/parser_v2_utils.rst", "code/price_list_parser/settings.rst", "code/price_list_parser/storage.rst", "code/price_list_parser/transform_to_json.rst", "code/shared/eval.rst", "code/shared/helpers.rst", "code/shared/index.rst", "code/shared/preskok_api.rst", "code/shared/price_lists.rst", "code/shared/s3.rst", "code/shared/string_utils.rst", "code/table_parser_v2/abstract_parser.rst", "code/table_parser_v2/column_extraction.rst", "code/table_parser_v2/helpers.rst", "code/table_parser_v2/index.rst", "code/table_parser_v2/main.rst", "code/table_parser_v2/parse_tables.rst", "code/table_parser_v2/pl_summary.rst", "code/table_parser_v2/search_textract_blocks.rst", "code/table_parser_v2/table_surrounding.rst", "code/table_parser_v2/td_parser.rst", "code/table_parser_v2/transmission.rst", "code/table_parser_v2/utils.rst", "code/textract/helpers.rst", "code/textract/index.rst", "code/textract/line_splitting.rst", "code/textract/main.rst", "code/textract/textract_divider.rst", "code/textract/utils.rst", "flow/classifying_pages.rst", "flow/cleanup.rst", "flow/collect_combinations.rst", "flow/expert_rules1.rst", "flow/expert_rules2.rst", "flow/index.rst", "flow/json_format.rst", "flow/labeling.rst", "flow/logging.rst", "flow/pandas_dataframes.rst", "flow/processing_table.rst", "flow/setting_up.rst", "flow/splitting.rst", "flow/standard_form.rst", "flow/structuring_pdf.rst", "flow/validating.rst", "flow/vehicle_editor.rst", "index.rst", "sample.rst"], "titles": ["adobe_to_textract", "adobe_to_textract_utility", "create_blocks", "adobe_extract", "join_adobe_textract", "join_adobe_textract_structure", "join_adobe_textract_utility", "main", "table_tracking", "transform_tables", "app", "classifier", "main", "mapping_dict", "metrics", "cell_status", "check_transformation", "combined_confidence", "confidence", "dl_tools", "dl_classification", "table_classifier", "data_models", "equipment_parser", "main", "serial", "main", "process_color", "process_layout", "process_line", "process_version", "serial_data_models", "utils", "shared", "cell_classification", "cell_state", "main", "eq_packages", "table", "labeling", "main", "recycle_headers", "row_filtering", "table_semantics", "table_split", "utils", "versions_matching", "generator", "testing", "output_formatters", "after_matching", "before_matching", "expert_rules", "post_cell_split_transform", "post_labeling_transform", "post_table_parser", "pre_labeling_td_transform", "pre_labeling_transform", "select_settings", "unite_combinations", "utils", "gpt_processing", "querier", "td_gpt_data", "Function documentation", "layout_transforming", "index_mapper", "layout_mapper", "layout_parser", "merge_mappers", "technical_mappers", "local_test", "create_excels", "excel_extractor", "excel_utility", "make_logs", "read_excel", "table_makers", "write_labels", "write_logs", "multi_values_split", "main", "utils", "page_selection", "pl_finder", "main", "merge_tables", "price_page_finder", "price_table_finder", "table_preparator", "utils", "features", "td_finder", "main", "add_VE_id", "add_confidence", "duplicates", "parser_postprocessing", "main", "model_generation_matching", "model_generation_matching_v2", "parser_postprocessing", "reduce_possible_ids", "adobe_parser", "assemble_extracts", "improve_textract", "preprocess_pdf", "merge_tables", "pdf_parser", "segment_classifiers", "segment_divider", "textract_generator", "textract_parser", "price_list_parser", "matching", "models", "multiple_price_lists_utils", "parser_v2", "parser_v2_utils", "settings", "storage", "transform_to_json", "eval", "helpers", "shared", "preskok_api", "price_lists", "s3", "string_utils", "abstract_parser", "column_extraction", "helpers", "table_parser_v2", "main", "parse_tables", "pl_summary", "search_textract_blocks", "table_surrounding", "td_parser", "transmission", "utils", "helpers", "textract", "line_splitting", "main", "textract_divider", "utils", "Classifying pages", "Clean-up", "Collect combinations", "Applying expert rules", "Applying expert rules, round #2", "Overview", "Transforming to the correct format", "Labeling", "Logging", "Creating Pandas Dataframes", "<no title>", "Setting up", "Splitting", "Transforming to the standard table form", "Structuring the PDF", "Validating the results", "Add Vehicle Editor IDs", "Price list parser\u2019s documentation!", "Usage"], "terms": {"make_line_refer": [0, 3, 64], "text_to_lin": 0, "dict": [0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 17, 24, 26, 27, 28, 29, 31, 32, 33, 34, 40, 41, 43, 45, 46, 50, 51, 55, 58, 59, 63, 66, 67, 68, 74, 81, 82, 85, 86, 87, 88, 89, 90, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 105, 107, 109, 110, 114, 115, 117, 118, 120, 121, 122, 123, 125, 126, 127, 130, 131, 133, 134, 136, 137, 139, 140, 141, 143, 144, 145], "line_block": [0, 2, 8, 9], "thi": [0, 1, 5, 10, 28, 33, 39, 47, 63, 67, 69, 73, 96, 104, 111, 119, 126, 129, 145, 147, 150, 152, 154, 156, 158, 159, 160, 161, 164], "function": [0, 1, 5, 10, 28, 33, 37, 39, 43, 47, 49, 63, 67, 68, 69, 73, 96, 98, 102, 104, 116, 118, 119, 122, 123, 127, 129, 139, 145, 152, 164], "append": [0, 8, 85, 134], "line": [0, 2, 4, 6, 8, 9, 26, 28, 29, 30, 32, 37, 43, 49, 67, 85, 88, 89, 90, 109, 110, 120, 143, 156, 161], "block": [0, 2, 4, 5, 6, 8, 9, 12, 26, 27, 28, 31, 32, 34, 41, 44, 74, 77, 89, 107, 136, 137, 141, 143, 144], "list": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 17, 19, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 54, 55, 58, 59, 63, 66, 67, 68, 69, 71, 72, 73, 74, 77, 78, 79, 81, 82, 85, 86, 87, 88, 89, 90, 91, 93, 94, 96, 98, 99, 100, 101, 102, 103, 105, 107, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 121, 122, 125, 126, 127, 128, 129, 131, 133, 134, 136, 137, 138, 139, 140, 141, 143, 145, 147, 152, 158, 161], "correspond": [0, 4, 5, 6, 13, 39, 57, 71, 74, 88, 90, 135, 137], "text": [0, 2, 4, 6, 9, 12, 19, 21, 30, 31, 32, 49, 51, 55, 60, 62, 68, 77, 81, 86, 87, 88, 90, 91, 103, 108, 110, 111, 112, 117, 128, 136, 139, 141, 152, 156, 158, 161], "dictionari": [0, 1, 5, 6, 8, 13, 14, 17, 24, 28, 29, 32, 33, 46, 47, 58, 59, 74, 81, 85, 86, 87, 88, 89, 90, 96, 99, 100, 102, 105, 115, 118, 120, 121, 122, 126, 130, 131, 136, 140, 152, 158], "If": [0, 4, 5, 13, 24, 29, 31, 32, 37, 39, 45, 47, 50, 57, 58, 62, 66, 67, 68, 71, 74, 78, 88, 93, 99, 110, 111, 112, 114, 119, 122, 126, 129, 131, 135, 140, 144, 152, 159], "i": [0, 2, 4, 5, 6, 7, 9, 10, 12, 14, 19, 21, 22, 24, 26, 28, 29, 30, 31, 32, 37, 39, 41, 43, 44, 45, 47, 49, 53, 57, 58, 59, 60, 62, 66, 67, 68, 69, 70, 71, 73, 74, 77, 78, 82, 88, 89, 90, 91, 93, 95, 96, 109, 110, 112, 114, 115, 116, 119, 122, 126, 127, 128, 129, 131, 135, 136, 138, 140, 144, 145, 147, 152, 154, 158, 160, 161, 164], "alreadi": [0, 24, 95, 112, 138, 152, 156], "kei": [0, 6, 51, 54, 58, 87, 88, 90, 94, 101, 107, 121, 127, 131, 144, 152], "ad": [0, 2, 6, 8, 29, 30, 32, 39, 41, 67, 71, 90, 95, 98, 107, 122, 138], "paramet": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 79, 81, 82, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 133, 134, 136, 137, 138, 139, 140, 141, 143, 144, 145, 146], "A": [0, 1, 4, 5, 6, 7, 8, 12, 14, 17, 22, 24, 26, 28, 31, 34, 37, 39, 41, 42, 43, 44, 45, 46, 49, 55, 59, 63, 67, 74, 76, 86, 87, 88, 91, 96, 99, 102, 108, 109, 115, 118, 119, 121, 122, 123, 126, 128, 131, 136, 139, 140, 141, 143, 145, 146, 152, 161], "where": [0, 5, 7, 44, 58, 72, 73, 77, 87, 88, 90, 96, 105, 116, 118, 121, 128, 131, 144, 145, 147, 152, 158, 161], "ar": [0, 4, 5, 6, 13, 24, 28, 29, 30, 47, 53, 58, 59, 66, 67, 68, 70, 73, 86, 87, 88, 107, 109, 114, 116, 118, 122, 129, 131, 135, 145, 150, 152, 158, 160, 161, 164], "valu": [0, 1, 5, 12, 13, 14, 15, 16, 17, 19, 22, 36, 39, 41, 42, 43, 44, 45, 46, 49, 51, 53, 54, 56, 57, 58, 59, 63, 66, 67, 68, 69, 70, 73, 74, 76, 77, 78, 81, 87, 89, 90, 91, 94, 95, 99, 101, 110, 121, 123, 126, 128, 129, 130, 133, 136, 137, 139, 140, 152, 159], "repres": [0, 9, 12, 22, 34, 45, 74, 86, 88, 94, 121, 150, 152, 156, 160, 164], "return": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 54, 55, 56, 57, 59, 60, 62, 63, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 79, 81, 82, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 133, 135, 136, 137, 138, 139, 140, 141, 143, 144, 145, 146, 152], "none": [0, 5, 6, 7, 8, 10, 12, 13, 16, 19, 21, 24, 31, 36, 37, 40, 41, 43, 45, 46, 49, 50, 51, 56, 57, 62, 63, 66, 67, 68, 69, 72, 73, 77, 78, 82, 88, 89, 90, 91, 93, 94, 95, 100, 101, 103, 104, 108, 110, 114, 117, 118, 120, 122, 123, 125, 126, 127, 128, 129, 130, 131, 133, 134, 135, 136, 138, 139, 140, 144], "adobe_output": 0, "tupl": [0, 1, 2, 4, 6, 9, 15, 17, 19, 22, 24, 26, 27, 28, 30, 34, 36, 41, 42, 43, 44, 45, 46, 49, 51, 54, 55, 57, 58, 59, 60, 66, 67, 68, 69, 73, 74, 76, 79, 81, 86, 87, 88, 89, 90, 91, 94, 99, 103, 104, 105, 107, 108, 109, 116, 118, 121, 122, 123, 127, 128, 131, 136, 140, 141, 143, 145], "original_pag": 0, "int": [0, 1, 2, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 19, 21, 24, 26, 28, 29, 30, 31, 32, 33, 34, 36, 40, 42, 43, 45, 46, 54, 60, 66, 67, 68, 69, 72, 73, 74, 76, 77, 78, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 98, 101, 102, 103, 105, 107, 108, 109, 110, 111, 112, 114, 116, 118, 121, 122, 123, 125, 127, 128, 129, 130, 133, 135, 136, 138, 140, 141, 145], "convert": [0, 19, 26, 30, 32, 51, 54, 60, 66, 74, 76, 94, 105, 109, 115, 140], "adob": [0, 4, 5, 24, 26, 33, 103, 152], "output": [0, 4, 6, 17, 24, 26, 47, 73, 152, 154, 161], "textract": [0, 4, 6, 12, 26, 33, 34, 43, 46, 51, 64, 85, 89, 102, 103, 105, 107, 110, 115, 117, 136, 137, 144, 152, 156], "format": [0, 10, 12, 24, 40, 98, 105, 114, 122, 156, 159, 160, 164], "It": [0, 4, 16, 69, 93, 96, 104, 114, 119, 152, 158, 161], "process": [0, 7, 8, 10, 12, 19, 30, 31, 32, 41, 42, 56, 63, 67, 79, 85, 88, 89, 90, 94, 98, 108, 112, 126, 129, 137, 139, 144, 147, 150, 158], "creat": [0, 2, 6, 8, 15, 19, 22, 24, 26, 31, 32, 37, 66, 67, 69, 72, 73, 74, 77, 78, 81, 104, 110, 123, 129, 141, 143, 144, 159], "metadata": [0, 2, 8, 34, 117, 126, 131, 137], "page": [0, 2, 4, 6, 7, 8, 12, 23, 24, 25, 26, 28, 29, 31, 33, 34, 45, 55, 62, 64, 73, 85, 87, 88, 89, 91, 93, 102, 103, 105, 108, 110, 111, 112, 116, 117, 118, 120, 136, 144, 145, 156, 158, 161, 164], "relat": [0, 2, 5, 8, 58, 140], "contain": [0, 1, 5, 7, 10, 21, 22, 24, 26, 28, 30, 34, 36, 39, 41, 42, 43, 44, 45, 46, 49, 53, 55, 56, 58, 59, 64, 68, 74, 76, 78, 82, 85, 87, 88, 96, 98, 99, 102, 105, 106, 107, 109, 110, 116, 118, 120, 121, 126, 130, 134, 136, 138, 140, 141, 143, 145, 146, 147, 150, 152, 154, 156, 158, 159, 161], "json": [0, 10, 13, 17, 24, 82, 95, 98, 99, 103, 107, 114, 117, 121, 122, 125, 126, 152], "csv": [0, 9, 78, 103, 127, 141], "origin": [0, 4, 12, 30, 34, 41, 55, 59, 60, 66, 68, 77, 81, 86, 122, 129, 133, 145], "number": [0, 2, 4, 5, 7, 8, 12, 14, 19, 22, 24, 29, 30, 31, 33, 34, 41, 45, 54, 60, 68, 72, 81, 85, 86, 87, 89, 90, 91, 105, 107, 108, 109, 110, 116, 118, 122, 123, 125, 128, 131, 134, 136, 139, 144, 145], "type": [0, 1, 2, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 54, 55, 56, 57, 59, 60, 62, 63, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 81, 82, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 107, 108, 109, 110, 112, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 133, 136, 137, 139, 140, 141, 143, 144, 145, 146, 152, 154, 156, 159, 160, 161], "get_mean_bbox": [1, 3, 64], "df": [1, 19, 43, 45, 66, 78, 109, 140], "pd": [1, 9, 19, 21, 34, 36, 39, 40, 41, 42, 43, 44, 45, 53, 54, 56, 57, 67, 81, 109, 130, 131, 133, 136], "datafram": [1, 9, 15, 16, 19, 21, 34, 36, 39, 40, 41, 42, 43, 44, 45, 53, 54, 56, 57, 66, 67, 68, 69, 70, 73, 74, 77, 78, 81, 88, 103, 109, 130, 131, 133, 138, 140], "row": [1, 2, 5, 9, 15, 19, 34, 36, 40, 41, 42, 43, 45, 54, 67, 68, 69, 73, 74, 77, 78, 81, 88, 89, 107, 109, 136, 140, 141, 143, 152, 156, 160, 161], "col": [1, 42], "str": [1, 2, 4, 5, 6, 7, 9, 12, 13, 14, 15, 17, 19, 21, 22, 24, 26, 28, 30, 31, 32, 34, 36, 37, 42, 43, 44, 45, 46, 47, 51, 53, 54, 55, 56, 57, 59, 60, 62, 63, 67, 68, 70, 73, 74, 76, 77, 78, 81, 82, 85, 87, 90, 91, 93, 94, 95, 98, 99, 100, 101, 102, 108, 109, 110, 111, 112, 114, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 136, 139, 140, 141, 143, 144, 146], "float": [1, 6, 12, 14, 15, 17, 19, 27, 30, 31, 36, 45, 46, 51, 59, 67, 82, 94, 109, 110, 122, 123, 130, 136, 137], "take": [1, 4, 5, 69, 145, 152, 154, 156, 164], "panda": [1, 9, 19, 43, 66, 67, 68, 70, 74, 77, 78, 88, 103, 109, 136, 140], "index": [1, 2, 5, 12, 15, 19, 30, 32, 34, 36, 40, 43, 45, 66, 67, 69, 73, 74, 77, 78, 81, 89, 91, 108, 114, 121, 122, 129, 130, 133, 138, 140, 141, 164], "column": [1, 2, 15, 16, 19, 26, 28, 29, 31, 32, 34, 36, 40, 41, 42, 43, 45, 46, 53, 63, 66, 67, 68, 70, 73, 74, 77, 78, 81, 86, 107, 125, 126, 129, 130, 131, 133, 138, 139, 140, 141, 152, 156, 159, 160], "mean": [1, 28, 31, 45, 66, 129], "all": [1, 9, 12, 13, 24, 26, 28, 30, 42, 51, 60, 66, 74, 77, 85, 87, 90, 94, 96, 98, 107, 110, 112, 126, 127, 128, 129, 131, 136, 139, 143, 145, 152, 158, 160, 161, 164], "bound": [1, 2, 6, 28, 105, 107, 108, 136, 152, 156], "box": [1, 2, 28, 105, 107, 108, 136, 152, 156], "locat": [1, 4, 7, 12, 13, 73, 127, 131, 144, 152], "specifi": [1, 21, 42, 47, 53, 59, 63, 66, 68, 69, 72, 76, 77, 90, 94, 103, 108, 110, 120, 129, 145], "The": [1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 26, 27, 28, 29, 30, 31, 34, 36, 37, 39, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 66, 67, 68, 69, 70, 72, 73, 74, 76, 77, 78, 79, 81, 82, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 96, 99, 100, 101, 102, 103, 104, 107, 108, 109, 111, 112, 114, 115, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 134, 135, 136, 138, 139, 140, 141, 143, 144, 145, 152, 156, 158, 160, 161, 164], "fill_dummy_block": [1, 3, 64], "cell": [1, 2, 4, 5, 6, 9, 15, 21, 34, 36, 39, 45, 54, 69, 70, 73, 76, 77, 78, 88, 89, 107, 129, 143, 152, 154, 159, 160, 161, 164], "replac": [1, 4, 5, 6, 30, 57, 66, 99, 129], "dummi": 1, "same": [1, 32, 39, 59, 96, 122, 129, 131, 147, 152, 158], "max_bbox": [1, 3, 5, 64], "bbox": [1, 2, 5, 136], "max": [1, 5], "width": [1, 5, 73, 105, 131, 156, 161], "height": [1, 5, 73, 105, 137, 156, 161], "min": [1, 5], "left": [1, 5, 67, 107, 156, 161], "top": [1, 5, 12, 14, 28, 32, 67, 131, 136, 137, 152, 156, 160, 161], "get_bounding_box": [1, 3, 64], "element": [1, 2, 5, 8, 37, 45, 49, 94, 128, 131], "pdf_width": 1, "pdf_height": 1, "pdf_rotat": 1, "pdf": [1, 4, 7, 12, 27, 28, 34, 40, 62, 104, 108, 110, 111, 117, 134, 144, 150, 164], "rotat": 1, "mean_horizontal_bbox": [1, 3, 64], "seri": [1, 42, 68, 109, 136, 140], "mean_vertical_bbox": [1, 3, 64], "create_word": [2, 3, 64], "word_id_cnt": 2, "word": [2, 6, 9, 12, 14, 19, 30, 45, 57, 67, 82, 91, 93, 109, 143, 147, 152, 159, 161], "given": [2, 5, 7, 9, 13, 15, 17, 21, 22, 24, 26, 27, 28, 30, 31, 32, 40, 41, 43, 51, 60, 66, 69, 70, 73, 76, 77, 78, 86, 89, 90, 93, 94, 98, 100, 101, 108, 109, 110, 111, 112, 114, 117, 119, 126, 127, 130, 131, 133, 136, 144, 158], "us": [2, 4, 8, 10, 13, 15, 17, 19, 21, 24, 26, 27, 28, 29, 31, 37, 40, 43, 46, 51, 55, 56, 62, 63, 67, 74, 77, 78, 79, 81, 82, 85, 90, 93, 94, 96, 102, 103, 104, 105, 109, 110, 111, 112, 119, 121, 123, 125, 126, 129, 131, 136, 138, 141, 147, 152, 154, 158, 161, 165], "base": [2, 4, 5, 6, 8, 9, 13, 14, 22, 24, 28, 29, 30, 33, 37, 43, 44, 51, 53, 54, 55, 56, 57, 58, 59, 66, 67, 68, 70, 74, 77, 81, 82, 86, 88, 91, 94, 95, 96, 98, 101, 107, 109, 114, 116, 119, 121, 122, 125, 129, 130, 136, 139, 152, 158], "id": [2, 4, 5, 6, 13, 24, 26, 30, 31, 32, 40, 41, 43, 46, 50, 54, 74, 78, 93, 94, 95, 96, 98, 100, 107, 108, 109, 118, 121, 125, 126, 128, 140, 143, 144, 156, 158, 161], "count": [2, 12, 31, 33, 76, 91, 94, 109, 136], "create_lin": [2, 3, 64], "pg_num_to_page_metadata": [2, 8], "line_id_cnt": 2, "create_cel": [2, 3, 64], "reference_block": 2, "j": [2, 19, 66], "idd": 2, "refer": [2, 129, 152], "create_t": [2, 3, 64], "table_id_cnt": 2, "tabl": [2, 4, 5, 6, 8, 9, 15, 17, 19, 23, 24, 36, 40, 42, 43, 44, 45, 59, 62, 63, 64, 66, 67, 68, 69, 71, 72, 73, 74, 77, 78, 79, 81, 85, 86, 88, 89, 90, 102, 104, 107, 110, 114, 116, 117, 129, 131, 133, 134, 137, 138, 140, 141, 143, 150, 154, 156, 158, 159, 161], "create_pag": [2, 3, 64], "page_rel": [2, 8], "adobe_to_textract": [3, 64], "adobe_to_textract_util": [3, 64], "create_block": [3, 64], "join_adobe_textract": [3, 64], "join_lin": [3, 4, 64], "join_tables_lin": [3, 4, 64], "cell_match": [3, 4, 64], "correct_cel": [3, 4, 64], "replace_lin": [3, 4, 64], "extend_id": [3, 4, 64], "join_text": [3, 4, 64], "match_by_loc": [3, 4, 64], "join_adobe_textract_structur": [3, 64], "get_child_relationship": [3, 5, 64], "merge_relationship": [3, 5, 64], "remove_merged_block_refer": [3, 5, 64], "update_block_map": [3, 5, 64], "merge_geometri": [3, 5, 64], "merge_cel": [3, 5, 64], "merge_two_row": [3, 5, 64], "get_merge_group": [3, 5, 64], "merge_row": [3, 5, 64], "correct_zero": [3, 5, 64], "is_increas": [3, 5, 64], "merge_rows_based_on_adob": [3, 5, 64], "join_adobe_textract_util": [3, 64], "remove_duplicate_lin": [3, 6, 64], "get_block_map": [3, 6, 64], "get_word_to_lin": [3, 6, 64], "get_word_to_cel": [3, 6, 64], "get_child": [3, 6, 64], "block_posit": [3, 6, 64], "lines_by_pag": [3, 6, 64], "tables_by_pag": [3, 6, 64], "interval_overlap_length": [3, 6, 64], "overlap_percentag": [3, 6, 64], "find_pair": [3, 6, 64], "add_bounding_s": [3, 6, 64], "add_siz": [3, 6, 64], "replace_words_in_lin": [3, 6, 64], "is_vert": [3, 6, 64], "textractutil": [3, 4, 6, 64], "__init__": [3, 6, 7, 20, 21, 22, 23, 25, 26, 31, 38, 40, 61, 62, 64, 65, 66, 75, 77, 83, 84, 89, 103, 106, 108, 109, 110, 111, 112, 113, 117, 129, 132, 133, 138, 142, 144], "main": [3, 11, 23, 25, 35, 38, 45, 64, 79, 80, 83, 84, 92, 97, 132, 142, 158, 164], "extract": [3, 4, 5, 7, 19, 24, 26, 27, 39, 40, 44, 45, 54, 55, 60, 64, 73, 78, 82, 91, 100, 102, 103, 104, 108, 111, 112, 117, 128, 129, 130, 133, 134, 137, 138, 139, 152, 161, 164], "table_track": [3, 9, 64], "add_el": [3, 8, 64], "create_table_block": [3, 8, 64], "insert_t": [3, 8, 64], "process_table_el": [3, 8, 64], "transform_t": [3, 64], "find_blocks_for_text": [3, 9, 64], "fill_partial_t": [3, 9, 64], "fill_tabl": [3, 9, 64], "is_line_with_text": [3, 9, 64], "is_start_block": [3, 9, 64], "is_end_block": [3, 9, 64], "pdf_parser": [4, 26, 27, 28, 34, 40, 62, 64, 103, 104, 106, 110, 111, 112, 117], "is_pricelist": [4, 6], "fals": [4, 5, 6, 14, 19, 21, 22, 24, 30, 31, 40, 43, 45, 47, 59, 60, 62, 66, 67, 68, 69, 73, 74, 77, 78, 85, 88, 91, 93, 95, 107, 108, 110, 112, 115, 116, 122, 125, 127, 129, 131, 136, 140, 144], "verbos": [4, 6], "join": [4, 28, 29, 54, 62, 67, 94, 96, 98, 107], "from": [4, 5, 6, 7, 9, 12, 13, 19, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 40, 43, 44, 45, 49, 51, 54, 55, 58, 59, 60, 62, 63, 66, 70, 73, 74, 76, 78, 81, 82, 85, 87, 91, 94, 99, 100, 101, 102, 103, 104, 105, 108, 110, 111, 112, 114, 116, 117, 118, 120, 121, 122, 125, 126, 127, 128, 129, 130, 133, 135, 136, 137, 138, 139, 140, 141, 143, 152, 158, 159, 161, 164], "librari": [4, 152, 161], "data": [4, 5, 6, 7, 9, 10, 21, 24, 27, 40, 54, 55, 57, 94, 95, 96, 98, 99, 101, 102, 103, 104, 110, 112, 116, 117, 118, 121, 122, 125, 127, 129, 131, 133, 134, 136, 137, 138, 139, 140, 147, 152, 154, 158, 159, 161, 164], "provid": [4, 5, 13, 21, 28, 37, 56, 62, 70, 99, 139, 158, 164], "also": [4, 87, 90, 93, 104, 114, 122, 127, 134, 152, 156, 158, 159, 164], "deep": [4, 82, 120, 152], "copi": [4, 77, 127], "avoid": [4, 96, 116], "modifi": [4, 12, 24, 39, 49, 98, 127], "object": [4, 5, 6, 7, 12, 17, 19, 21, 22, 24, 26, 32, 36, 37, 39, 41, 42, 56, 58, 62, 63, 66, 77, 82, 85, 87, 89, 91, 93, 94, 103, 104, 105, 108, 109, 110, 111, 112, 114, 115, 120, 122, 126, 127, 134, 146], "parser": [4, 19, 27, 28, 34, 40, 62, 104, 110, 117, 118, 129, 133, 150, 152, 158], "bool": [4, 5, 6, 9, 12, 14, 15, 19, 21, 22, 24, 26, 28, 29, 30, 31, 40, 43, 45, 46, 47, 56, 57, 59, 60, 62, 66, 67, 68, 69, 73, 74, 77, 78, 85, 86, 88, 89, 91, 93, 94, 95, 96, 107, 108, 109, 110, 112, 115, 116, 119, 120, 122, 125, 127, 128, 129, 136, 138, 140, 144], "option": [4, 10, 12, 13, 19, 24, 37, 40, 60, 62, 66, 69, 74, 82, 90, 104, 110, 112, 117, 123, 127, 128, 129, 133, 134, 137, 140, 152, 161], "flag": [4, 26, 54, 67, 89, 112, 119, 120, 135], "indic": [4, 12, 19, 22, 26, 28, 30, 34, 41, 42, 43, 44, 45, 46, 49, 55, 56, 57, 67, 69, 73, 77, 86, 89, 91, 93, 96, 99, 107, 109, 116, 119, 122, 128, 129, 131, 133, 138, 139, 140, 143, 152], "document": [4, 27, 85, 102, 110, 114, 118, 145, 158, 160], "price": [4, 10, 12, 13, 16, 24, 32, 36, 39, 40, 45, 54, 59, 68, 88, 89, 90, 91, 95, 96, 98, 99, 116, 117, 118, 121, 122, 125, 126, 128, 129, 134, 147, 152, 154, 158, 160, 161], "find": [4, 6, 9, 12, 19, 30, 43, 45, 55, 67, 68, 70, 73, 78, 85, 90, 94, 99, 101, 102, 104, 110, 131, 136, 152, 159, 164], "best": [4, 12, 30, 70, 94, 101, 114, 122, 152], "match": [4, 9, 14, 19, 30, 40, 45, 46, 50, 64, 72, 74, 77, 78, 79, 94, 98, 99, 100, 101, 102, 113, 117, 118, 122, 126, 128, 143, 152, 158, 164], "each": [4, 5, 6, 12, 17, 32, 34, 37, 39, 45, 49, 56, 71, 74, 77, 82, 85, 86, 88, 89, 91, 93, 94, 99, 100, 105, 116, 129, 145, 150, 152, 154, 159, 160, 164], "exist": [4, 5, 26, 62, 66, 101, 112, 144, 152], "o": 4, "n": [4, 90], "2": [4, 12, 24, 28, 88, 89, 107, 125], "complex": [4, 152, 156, 161], "advantag": 4, "fact": 4, "sort": [4, 6, 28, 94, 116, 129, 140], "posit": [4, 5, 6, 77, 110], "acrobat": 4, "textract_data": [4, 6], "adobe_data": 4, "util": [4, 23, 25, 38, 52, 64, 80, 83, 84, 132, 142], "pair": [4, 6, 67, 107, 152], "cell_pair": 4, "pl": [4, 72, 116, 138], "correct": [4, 5, 50, 55, 81, 89, 114, 117, 161], "cell_textract": 4, "cell_adob": 4, "lines_textract": 4, "lines_adob": 4, "original_id": 4, "extra_identifi": 4, "extend": [4, 13, 31, 74, 77, 78, 81, 129, 133, 138, 139, 140, 152], "an": [4, 7, 8, 9, 19, 22, 24, 29, 30, 31, 32, 36, 39, 43, 44, 46, 62, 66, 67, 72, 73, 74, 76, 77, 78, 87, 108, 112, 114, 115, 116, 120, 121, 122, 126, 127, 131, 140, 144, 152, 161, 164], "extra": [4, 24, 67, 73, 114, 152], "identifi": [4, 5, 59, 96, 114], "current": [4, 5, 8, 9, 13, 21, 29, 34, 77, 90, 131, 133], "time": [4, 12, 127, 152], "ensur": 4, "uniqu": [4, 16, 31, 63, 69], "pdfparser": [4, 26, 27, 28, 103, 110, 111, 112, 117], "bbox_1": 5, "bbox_2": 5, "two": [5, 6, 14, 24, 28, 29, 30, 43, 45, 49, 59, 74, 93, 102, 107, 116, 122, 136, 152, 158], "first": [5, 6, 12, 14, 28, 49, 59, 68, 74, 90, 122, 123, 128, 131, 136, 152, 154, 160, 165], "second": [5, 6, 14, 28, 59, 68, 122, 123, 127, 128, 131, 136, 152], "relationship": [5, 152, 156, 161], "child": [5, 6, 143, 156, 161], "check": [5, 6, 9, 12, 14, 15, 16, 22, 24, 28, 30, 31, 32, 33, 39, 42, 43, 45, 50, 60, 66, 78, 81, 90, 91, 95, 96, 98, 107, 110, 114, 115, 116, 119, 122, 128, 131, 135, 136, 138, 152], "block_1": 5, "block_2": 5, "updat": [5, 9, 26, 28, 30, 41, 45, 47, 57, 59, 90, 91, 94, 96, 98, 99, 100, 101, 107, 114, 121, 129, 130, 131, 134, 137, 140, 145], "includ": [5, 13, 29, 39, 54, 57, 62, 67, 77, 86, 94, 119, 121, 127, 137, 164], "ani": [5, 10, 15, 24, 45, 67, 78, 127, 140, 152], "merg": [5, 21, 24, 41, 42, 45, 54, 67, 68, 69, 70, 86, 96, 104, 107, 112, 139, 145, 152, 161], "cell_id": [5, 107], "id_to_block": 5, "remov": [5, 6, 28, 30, 42, 45, 55, 57, 67, 70, 88, 89, 90, 128, 129], "whose": [5, 74, 90], "need": [5, 50, 152, 159], "map": [5, 6, 8, 12, 13, 24, 26, 28, 29, 31, 32, 33, 40, 41, 43, 45, 46, 51, 55, 59, 66, 67, 68, 70, 74, 78, 81, 82, 86, 89, 94, 98, 101, 107, 109, 118, 120, 121, 129, 130, 133, 137, 139, 140, 141, 143, 158], "overrid": [5, 34, 62, 112, 144], "delet": [5, 12, 24, 64, 70, 124, 127], "geometri": [5, 107, 156, 161], "cell_1": 5, "cell_2": 5, "merged_row_idx": 5, "adobe_rows_top_limit": 5, "boundari": [5, 73], "search": [5, 9, 19, 30, 40, 43, 45, 51, 55, 68, 78, 91, 110, 136, 152, 159, 164], "should": [5, 13, 19, 44, 46, 67, 69, 77, 111, 120, 121, 150, 152], "togeth": [5, 86, 152, 160, 164], "proxim": 5, "mergeabl": [5, 107], "inner": [5, 64, 123, 124], "limit": [5, 94, 117, 136, 137, 161], "lst": [5, 6, 15, 19, 21, 54, 94], "0": [5, 9, 32, 66, 71, 82, 109, 110, 121, 122, 123, 135, 136, 137, 152, 156, 159, 161, 164], "input": [5, 15, 30, 39, 41, 43, 45, 47, 49, 59, 63, 66, 73, 86, 91, 99, 100, 105, 117, 118, 129, 133, 134, 140, 141, 143, 145, 152, 154, 158], "averag": [5, 15, 46, 95], "non": [5, 12, 30, 45, 66, 68, 72, 118, 122, 128], "zero": [5, 12, 49, 108], "befor": [5, 67, 152, 161], "after": [5, 24, 68, 70, 98, 119, 152, 160], "increas": 5, "order": [5, 28, 49, 74, 94, 116, 129, 140, 152], "true": [5, 9, 14, 19, 21, 24, 29, 31, 43, 45, 47, 59, 62, 66, 68, 69, 74, 77, 78, 88, 89, 91, 93, 94, 95, 107, 110, 114, 115, 116, 119, 122, 128, 129, 135, 136, 140], "otherwis": [5, 14, 22, 24, 31, 37, 43, 45, 59, 66, 78, 91, 95, 99, 107, 110, 115, 116, 119, 122, 126, 129, 136], "adobe_t": 5, "tabledata": [5, 24, 40, 44, 85, 129, 131, 133, 138], "eq_textract": [5, 85], "span": [5, 30, 152, 156], "duplic": [6, 12, 59, 64, 67, 68, 97, 98, 116, 161], "which": [6, 10, 12, 13, 19, 53, 63, 68, 70, 71, 82, 86, 87, 102, 111, 112, 121, 150, 152, 156, 158, 164], "without": [6, 17, 45, 55, 59, 68, 73], "": [6, 24, 126, 152], "itself": [6, 37], "its": [6, 8, 9, 12, 22, 32, 37, 60, 90, 94, 141, 145, 150, 152, 160, 161, 164], "block_map": 6, "get": [6, 12, 19, 28, 30, 51, 63, 64, 81, 94, 105, 107, 118, 122, 123, 124, 125, 127, 129, 135, 136, 141, 143, 144, 158], "children": 6, "parent": 6, "group": [6, 12, 82, 116, 161], "them": [6, 63, 66, 67, 78, 85, 114, 116, 118, 119, 134, 138, 140, 145, 152, 158, 161], "interval1": 6, "interval2": 6, "calcul": [6, 14, 15, 17, 27, 31, 45, 74, 86, 91, 94, 99, 100, 129, 136, 146], "length": [6, 19, 30, 41, 42, 47, 67, 71, 74, 77, 90, 114], "overlap": 6, "between": [6, 14, 71, 77, 94, 99, 107, 110, 114, 122, 123, 128, 136, 152, 156], "interv": 6, "percentag": [6, 109], "squares1": 6, "blockposit": 6, "squares2": 6, "threshold_dist": 6, "threshold_s": 6, "squar": 6, "threshold": [6, 28, 67, 68, 88, 89, 90, 109, 110], "distanc": [6, 14, 71, 90, 136], "size": [6, 29, 31, 127], "textract_block": 6, "add": [6, 13, 17, 19, 24, 29, 30, 51, 55, 77, 90, 94, 95, 98, 100, 107, 114, 117, 121, 133, 134, 138], "new_text": 6, "new": [6, 13, 22, 32, 41, 44, 47, 54, 59, 66, 67, 78, 138, 145, 152], "whether": [6, 9, 12, 15, 19, 21, 22, 24, 26, 28, 30, 43, 47, 56, 57, 59, 60, 62, 69, 73, 74, 77, 85, 91, 93, 94, 108, 119, 125, 127, 128, 144], "print": [6, 28, 29, 30, 31, 47, 122, 165], "cell_block": 6, "vertic": [6, 136], "pricelist": [6, 147, 152, 158], "class": [6, 7, 22, 26, 31, 40, 77, 87, 89, 93, 109, 111, 112, 117, 119, 133, 138, 144], "handl": [6, 67, 117, 152, 156], "self": [6, 7, 8, 15, 21, 22, 26, 31, 40, 66, 77, 89, 109, 110, 111, 112, 117, 119, 120, 127, 129, 133, 135, 138, 144], "initi": [6, 7, 21, 22, 26, 31, 39, 40, 62, 64, 66, 77, 79, 89, 93, 103, 108, 109, 110, 111, 112, 113, 117, 119, 123, 129, 133, 138, 144, 158], "s3_bucket": [7, 108, 144], "pdf_file": [7, 144], "pdf_hash": [7, 144], "pdf_folder": [7, 144], "extract_fold": 7, "name": [7, 24, 26, 43, 47, 55, 57, 58, 59, 62, 63, 73, 76, 78, 82, 91, 95, 98, 99, 100, 101, 108, 118, 119, 120, 123, 125, 126, 127, 129, 130, 135, 140, 144, 161], "s3": [7, 10, 62, 64, 78, 108, 117, 118, 120, 124, 144, 152, 158], "bucket": [7, 108, 117, 127, 144], "file": [7, 12, 13, 47, 72, 73, 76, 78, 82, 91, 93, 103, 108, 118, 126, 127, 144, 146, 152, 158, 161, 164], "hash": [7, 62, 121, 144, 146], "folder": [7, 73, 118, 144], "store": [7, 47, 58, 62, 118, 125, 127, 144, 158], "relev": [7, 12, 81, 164], "bytesio": 7, "except": [7, 54, 60, 111, 122, 123, 131, 150, 152], "occur": [7, 12, 24, 62, 122], "respect": [8, 28], "insert": [8, 17, 30, 64, 65, 66, 67, 88, 129], "ha": [8, 12, 15, 24, 30, 31, 49, 62, 112, 119, 131, 158, 161], "next_el": 8, "next": [8, 121, 152], "remain": [9, 24, 100, 152], "table_block": [9, 107, 143], "cell_count": 9, "word_count": 9, "start_row": 9, "single_t": 9, "fill": [9, 54, 66, 74, 86], "partial": [9, 68, 99], "counter": [9, 74, 77], "start": [9, 29, 30, 31, 66, 110, 121, 127], "singl": [9, 24, 56, 57, 112, 126, 145, 147], "table_line_block": 9, "tabletrack": 9, "multipl": [9, 21, 44, 59, 81, 86, 116, 117, 152, 160], "tracker": 9, "alignment_scor": 9, "align": 9, "score": [9, 12, 14, 15, 30, 46, 94, 98, 99, 100, 101], "alignmentscor": 9, "end": [9, 30, 41, 66, 110, 125, 147, 152, 160], "event": 10, "context": 10, "pars": [10, 19, 24, 37, 45, 54, 64, 73, 78, 79, 98, 99, 100, 104, 108, 113, 114, 117, 118, 121, 127, 128, 129, 134, 152, 161, 164], "save": [10, 62, 72, 73, 78, 120, 152], "mode": [10, 26, 122], "eval_mod": [10, 122], "filenam": [10, 47, 71, 73, 76, 108, 117, 118, 126, 127], "price_list_model_gener": [10, 99], "country_id": [10, 54, 93, 95, 108, 109, 125], "ci_nam": 10, "parse_technical_data": 10, "parse_equip": 10, "ground_truth_s3_kei": 10, "document_id": 10, "call": [10, 47, 118, 119, 152, 160], "Not": 10, "get_best_combination_from_dupl": [11, 12, 64], "deduplicate_word_combin": [11, 12, 64], "generate_word_combin": [11, 12, 64], "predict_categori": [11, 12, 64], "ignore_model_classif": [11, 12, 64], "n_classified_tokens_on_pag": [11, 12, 64], "categorise_textract_word": [11, 12, 64], "is_valid": [11, 12, 23, 25, 31, 64, 65, 66, 132, 138], "delete_non_relevant_pag": [11, 12, 64], "process_price_list": [11, 12, 64], "_get_source_words_indic": [11, 12, 64], "mapping_dict": [11, 14, 64], "get_mapping_dict": [11, 13, 64], "get_full_mapping_dict": [11, 13, 64], "get_category_values_dict": [11, 13, 64], "extend_mapping_dict": [11, 13, 64], "get_map": [11, 13, 64], "metric": [11, 64, 86], "matching_words_ratio": [11, 14, 64], "exact_match": [11, 14, 64], "calculate_scor": [11, 14, 64, 97, 99, 100], "prediction_funct": [11, 14, 64], "word_combin": [12, 82], "word_combinations_original_indic": 12, "combin": [12, 17, 23, 24, 29, 50, 51, 55, 59, 64, 74, 82, 94, 95, 96, 98, 100, 101, 114, 116, 117, 121, 122, 125, 129, 133, 134, 138, 140, 150, 158, 159, 160, 161], "keep": 12, "onli": [12, 24, 40, 42, 45, 86, 90, 99, 126, 127, 145, 147, 150, 152, 159, 161], "one": [12, 24, 28, 32, 42, 43, 54, 96, 99, 102, 107, 127, 131, 139, 152, 156, 158, 159, 160, 161, 164], "select": [12, 24, 58, 70, 140], "sourc": [12, 152, 158, 161], "closest": [12, 136], "other": [12, 24, 66, 77, 91, 128, 129, 147, 152, 158, 164], "dedupl": 12, "more": [12, 30, 43, 67, 152], "than": [12, 24, 32, 33, 41, 42, 59, 71, 90, 107, 114, 122, 127], "param": [12, 28, 40, 73, 78, 133], "rtype": [12, 15, 22, 26, 28, 31, 66, 78, 79, 108, 110, 111, 115, 117, 125, 127, 129, 133, 135, 138, 144], "word_group": [12, 82], "max_n_word": 12, "5": [12, 78, 109], "gener": [12, 13, 15, 17, 21, 23, 24, 28, 34, 40, 41, 48, 59, 64, 67, 73, 77, 78, 82, 94, 98, 99, 100, 117, 118, 119, 121, 125, 127, 129, 133, 137, 138, 140, 141], "maximum": [12, 30, 42, 66, 89, 90, 123, 125, 126], "default": [12, 13, 19, 21, 24, 40, 45, 51, 53, 60, 62, 69, 73, 78, 82, 87, 88, 91, 93, 101, 109, 112, 117, 122, 123, 126, 127, 128, 133, 134, 137, 138, 140, 144, 158], "predictor_necess": [12, 81, 133, 139], "predictornecess": [12, 81, 82], "n_top_valu": [12, 14], "predict": [12, 14, 19, 20, 21, 29, 34, 37, 64, 67, 77, 82, 83, 91, 92, 93, 94, 109, 114, 122], "categori": [12, 13, 45, 51, 81, 82, 88, 93, 94, 101, 114, 121, 122, 129, 130, 131, 136, 137, 139, 140], "necessari": [12, 81, 103, 112], "predictor": [12, 81, 82, 133, 139], "consid": [12, 14, 29, 30, 68, 82, 86, 94, 101, 122, 150, 152, 164], "most": [12, 68, 93, 152, 158], "probabl": [12, 19, 21, 68, 81, 82, 93, 152, 160], "tripl": 12, "token": [12, 43, 44, 101, 125, 127, 128], "determin": [12, 22, 45, 59, 68, 74, 86, 90, 109, 120, 139, 164], "ignor": [12, 19, 21, 69], "model": [12, 19, 21, 26, 29, 30, 36, 37, 40, 42, 51, 54, 55, 57, 58, 59, 62, 63, 64, 67, 68, 69, 82, 93, 94, 98, 99, 100, 105, 108, 109, 113, 117, 118, 119, 120, 121, 125, 135, 137, 140, 152, 158, 161], "classif": [12, 16, 34, 36], "per": 12, "result": [12, 24, 26, 39, 47, 49, 51, 63, 72, 77, 78, 79, 81, 85, 102, 112, 114, 117, 118, 122, 123, 141, 144, 145, 156], "classifi": [12, 29, 34, 36, 64, 67, 82, 108, 117, 120, 161], "total": [12, 86, 118, 122], "categoris": 12, "valid": [12, 16, 23, 31, 47, 48, 64, 66, 82, 115, 129, 138, 158, 160], "against": [12, 30, 71, 126], "relevant_page_numb": 12, "file_lik": [12, 93, 146], "like": [12, 93, 108, 127, 146, 152, 154, 158, 161], "implement": [12, 129], "read": [12, 76, 103, 127, 140, 161], "seek": 12, "tell": 12, "method": [12, 111, 119, 127, 138, 147, 152], "min_chars_per_pag": 12, "20": 12, "For": [12, 152, 158], "now": [12, 152], "we": [12, 147, 150, 152, 154, 156, 158, 159, 160, 161, 164], "assum": 12, "minimum": [12, 30, 82, 90, 101, 122, 127, 130], "charact": [12, 19, 30, 37, 90, 109, 128], "price_list_id": [13, 118, 121, 126], "categories_of_interest": 13, "price_list_id_list": 13, "full": [13, 39, 139], "interest": 13, "revers": 13, "addit": [13, 24, 26, 30, 40, 43, 44, 46, 98, 137, 139, 140, 150, 152, 159, 164], "specif": [13, 15, 24, 34, 58, 78, 86, 91, 101, 102, 120, 126, 129, 140, 144, 150, 152, 158, 159], "brand": [13, 24, 30, 46, 50, 51, 53, 54, 55, 57, 58, 81, 82, 94, 95, 98, 99, 100, 120, 125, 128, 135, 150, 152, 158], "path_prefix": 13, "load": [13, 94, 119, 120, 127, 152, 158], "path": [13, 73, 78, 82, 91, 119, 120, 121, 125, 126, 146], "prefix": [13, 55, 57, 94, 127], "directori": [13, 73, 78, 126], "word_list_1": 14, "word_list_2": 14, "ratio": [14, 45, 71, 122], "value_list": 14, "levenshtein": [14, 71, 90], "compar": [14, 47, 59, 71, 90, 94, 107, 122], "ld": 14, "mw": 14, "exact": 14, "em": 14, "average_transformation_confid": [15, 18, 64], "transform": [15, 16, 19, 21, 40, 45, 53, 54, 56, 57, 68, 121, 125, 129, 133, 138, 140, 150], "confid": [15, 17, 64, 74, 77, 78, 95, 98, 156, 161], "confidence_t": [15, 18, 64], "remap_statu": [15, 18, 64], "mapper": [15, 66, 67, 69, 70, 81, 89, 129, 139, 152], "remap": 15, "statu": [15, 16, 54, 56, 57, 67, 69, 88, 89, 129], "make_statu": [15, 18, 64], "shape": [15, 39, 74, 88], "set_in_row": [15, 18, 64], "row_index": [15, 89], "variabl": [15, 47, 125], "set": [15, 26, 30, 39, 41, 42, 43, 44, 47, 53, 54, 55, 56, 57, 58, 59, 64, 67, 79, 85, 87, 93, 103, 104, 108, 110, 111, 112, 113, 114, 117, 135, 139, 150, 161], "set_in_column": [15, 18, 64], "column_index": [15, 32, 81], "set_in_t": [15, 18, 64], "any_in_row": [15, 18, 64], "certain": [15, 29, 37, 39, 57, 59, 60, 85, 122, 126], "transformation_confid": [15, 18, 64], "validate_transform": [16, 18, 64], "standard_label": 16, "standard_valu": 16, "standard_statu": 16, "standard": [16, 77, 159], "label": [16, 19, 21, 23, 34, 36, 38, 40, 42, 43, 45, 53, 54, 64, 67, 68, 69, 70, 73, 74, 77, 78, 81, 82, 85, 88, 89, 91, 105, 109, 114, 129, 130, 133, 138, 139, 140, 160], "consist": 16, "presenc": [16, 91], "essenti": [16, 152, 158], "rang": [16, 39, 110, 136], "confidence_featur": [17, 18, 64], "tablepars": [17, 59, 72, 78, 79, 114], "featur": [17, 34, 64, 83, 92, 152], "combine_confid": [17, 18, 64], "intercept": 17, "coeff": [17, 31], "coeffici": [17, 31], "linear": 17, "calculate_overall_confid": [17, 18, 64], "overal": [17, 74, 122], "item": [17, 22, 24, 42, 54, 68, 116, 118], "calculate_no_version_confid": [17, 18, 64], "exclud": [17, 60, 126], "version": [17, 24, 26, 28, 30, 31, 32, 34, 40, 41, 42, 43, 44, 45, 46, 49, 53, 54, 55, 60, 67, 68, 74, 77, 86, 95, 98, 101, 121, 125, 128, 133, 135, 137, 140, 152, 160], "inform": [17, 28, 29, 30, 31, 45, 58, 68, 76, 88, 121, 127, 147, 158, 161], "insert_confid": [17, 18, 64], "output_json": 17, "insert_confidence_combin": [17, 18, 64], "insert_confidence_no_vers": [17, 18, 64], "add_confidence_to_output": [17, 18, 64], "cell_statu": [18, 64], "check_transform": [18, 64], "combined_confid": [18, 64], "biggest_numb": [19, 20, 64], "convers": [19, 60, 90], "1": [19, 40, 60, 71, 87, 90, 123, 129, 133, 136, 138, 152, 159], "largest": [19, 60, 68, 78, 90, 129, 140], "webpag": 19, "factor": [19, 60, 90], "appli": [19, 21, 30, 39, 50, 51, 54, 55, 56, 58, 60, 68, 88, 98, 101, 129, 139, 140], "found": [19, 30, 43, 44, 54, 55, 60, 67, 78, 85, 87, 104, 110, 114, 116, 128, 130, 131, 136, 152, 158, 161, 164], "create_quintuple_valu": [19, 20, 64], "quintupl": 19, "create_decuple_valu": [19, 20, 64], "decupl": 19, "prepare_text": [19, 20, 64], "transform_typ": 19, "prepar": [19, 82, 85, 88, 119, 152], "further": [19, 152, 158], "truncat": 19, "pad": 19, "extended_text_to_matrix": [19, 20, 64], "matrix": 19, "represent": [19, 22, 31, 60, 71, 74, 94, 109, 110, 122, 140, 146, 152, 156], "numpi": [19, 76, 109], "ndarrai": [19, 76, 109], "text_to_matrix": [19, 20, 64], "text_to_index": [19, 20, 64], "sequenc": 19, "text_to_index10": [19, 20, 64], "word_to_index": [19, 20, 64], "add_numb": [19, 20, 64], "extract_numb": [19, 20, 64, 124, 128], "hot_encod": [19, 20, 64], "char": 19, "hot": 19, "encod": 19, "char_index": [19, 20, 64], "char_index_simpl": [19, 20, 64], "simpl": [19, 74, 152, 156], "get_predict": [19, 20, 64], "string": [19, 22, 30, 31, 42, 43, 44, 45, 60, 70, 71, 90, 91, 94, 98, 110, 122, 125, 126, 127, 129, 130, 141, 146, 152, 154, 159], "get_prediction_with_proba": [19, 20, 64], "get_prediction_df": [19, 20, 64], "transformation_typ": [19, 21], "ignore_cod": [19, 21], "ignore_model": [19, 21], "return_proba": [19, 21], "code": [19, 21, 43, 45, 158], "get_prediction_list": [19, 20, 64], "index_to_label": [19, 20, 64], "prepare_df_layout": [19, 20, 64], "layout": [19, 67, 73, 152], "dl_tool": [20, 64], "table_classifi": [20, 64], "label_t": [20, 21, 23, 38, 39, 40, 64], "dl_cleanup": [20, 21, 64], "predict_proba": [20, 21, 64], "predict_lst": [20, 21, 64], "predict_wid": [20, 21, 64], "predict_transpos": [20, 21, 64], "predict_merg": [20, 21, 64], "predict_split": [20, 21, 64], "df_valu": [21, 81, 140], "model_typ": 21, "area": 21, "storag": [21, 39, 49, 64, 81, 82, 113, 119], "preloadedasset": 21, "preload": 21, "asset": 21, "clear": 21, "kera": 21, "session": 21, "cell_text": 21, "df_label": [21, 68, 81, 140], "wide": [21, 45, 67, 68, 69, 74, 77, 152], "transpos": [21, 67, 152, 160], "split": [21, 37, 44, 67, 68, 77, 81, 89, 91, 93, 116, 130], "entry_properti": [22, 23, 64], "properti": 22, "entri": [22, 67, 69, 74, 77, 96, 99, 152, 159, 161], "boolean": [22, 28, 41, 43, 44, 45, 46, 49, 55, 56, 57, 67, 91, 96, 109, 116, 128, 131, 138], "equip": [22, 24, 26, 28, 29, 31, 32, 36, 37, 40, 42, 43, 45, 72, 74, 77, 79, 85, 109, 115, 116, 117, 147, 152, 160, 164], "color": [22, 24, 27, 29, 31, 37, 74, 77, 110, 115, 125], "packag": [22, 29, 37, 42, 109], "is_seri": [22, 23, 64, 74], "serial": [22, 23, 24, 29, 30, 31, 64, 74, 77, 127], "__repr__": [22, 23, 25, 31, 64, 106, 110], "create_packag": [22, 23, 37, 38, 64], "package_titl": 22, "titl": [22, 29, 37, 40, 42, 43, 45, 49, 67, 68, 74, 77, 81], "create_package_item": [22, 23, 64], "item_titl": 22, "eq": [22, 37, 40, 111, 116], "instanc": [22, 24, 119, 127], "data_model": [23, 64], "check_if_table_already_pars": [23, 24, 64], "extract_equip": [23, 24, 64], "create_versions_map": [23, 24, 64], "create_extra_brand_vers": [23, 24, 64], "create_extra_vers": [23, 24, 64], "parse_equipment_t": [23, 24, 64], "parse_opt": [23, 24, 64], "parse_seri": [23, 24, 64], "add_id": [23, 24, 64], "merge_vers": [23, 24, 64], "add_avail": [23, 24, 64, 113, 121], "share": [23, 64], "get_serial_eq_pag": [23, 33, 64], "extract_s": [23, 25, 26, 64], "update_block": [23, 25, 26, 64], "blocks_from_lin": [23, 25, 26, 64], "process_color": [23, 25, 64], "line_color_prop": [23, 25, 27, 64], "get_cropped_imag": [23, 25, 27, 64], "extract_color_prop": [23, 25, 27, 64], "process_layout": [23, 25, 64], "make_column": [23, 25, 28, 64], "join_empty_column": [23, 25, 28, 64], "sort_column": [23, 25, 28, 64], "join_version_type_on": [23, 25, 28, 64], "join_version_type_two": [23, 25, 28, 64], "remove_lone_vers": [23, 25, 28, 64], "check_margin": [23, 25, 28, 64], "all_lin": [23, 25, 28, 64, 142, 143], "join_version_block": [23, 25, 28, 64], "process_lin": [23, 25, 64], "add_titl": [23, 25, 29, 64], "classify_lin": [23, 25, 29, 64], "analyze_lin": [23, 25, 29, 64], "combine_lin": [23, 25, 29, 64], "process_vers": [23, 25, 64], "strip_non_lett": [23, 25, 30, 64], "find_vers": [23, 25, 30, 64, 65, 68], "replace_hyphen": [23, 25, 30, 64], "move_match": [23, 25, 30, 64], "insert_vers": [23, 25, 30, 64], "assemble_version_lin": [23, 25, 30, 64], "find_version_indic": [23, 25, 30, 64], "remove_recursive_vers": [23, 25, 30, 64], "remove_too_big_vers": [23, 25, 30, 64], "remove_lone_model": [23, 25, 30, 64], "remove_displaced_model": [23, 25, 30, 64], "remove_small_bottom_margin_vers": [23, 25, 30, 64], "remove_different_vers": [23, 25, 30, 64], "is_upgrad": [23, 25, 30, 64], "add_vers": [23, 25, 30, 64], "is_version_part_of_word": [23, 25, 30, 64], "get_word_at_index": [23, 25, 30, 64], "_replac": [23, 25, 30, 64], "serial_data_model": [23, 25, 64], "starts_low": [23, 25, 31, 64], "starts_upp": [23, 25, 31, 64], "is_clos": [23, 25, 31, 64], "is_big": [23, 25, 31, 64], "looks_big": [23, 25, 31, 64], "is_color": [23, 25, 31, 64], "is_far": [23, 25, 31, 64], "dash_start": [23, 25, 31, 64], "is_speci": [23, 25, 31, 64], "is_offset": [23, 25, 31, 64], "has_vers": [23, 25, 31, 64], "equipment_count": [23, 25, 31, 64], "return_uniqu": [23, 25, 31, 61, 63, 64], "return_extend": [23, 25, 31, 64], "make_template_avail": [23, 25, 31, 64], "create_equip": [23, 25, 31, 64], "make_equip": [23, 25, 32, 64], "to_block_format": [23, 25, 32, 64], "eq_packag": [23, 38, 64], "parse_according_to_typ": [23, 37, 38, 64], "label_price_cel": [23, 38, 39, 64], "full_label": [23, 38, 39, 64], "preprocess_t": [23, 38, 40, 64], "get_price_col_and_extract_pric": [23, 38, 40, 64], "infer_state_and_transform_to_v": [23, 38, 40, 64], "extract_equipment_pipelin": [23, 38, 40, 64], "run": [23, 38, 40, 62, 64, 144, 152, 158], "cleanup": [23, 38, 40, 64, 113, 117, 120], "recycle_head": [23, 38, 64], "generate_head": [23, 38, 41, 64], "concat_head": [23, 38, 41, 64], "try_previous_head": [23, 38, 41, 64], "row_filt": [23, 38, 64], "detect_rows_with_repeating_st": [23, 38, 42, 64], "remove_long_str": [23, 38, 42, 64], "handle_merge_package_item_row": [23, 38, 42, 64], "remove_blank": [23, 38, 42, 64], "table_semant": [23, 38, 42, 64], "get_eq_col": [23, 38, 43, 64], "get_title_row": [23, 38, 43, 64], "get_code_col": [23, 38, 43, 64], "search_for_code_head": [23, 38, 43, 64], "detect_cod": [23, 38, 43, 64], "table_split": [23, 38, 64], "detect_possible_splitted_t": [23, 38, 44, 64], "split_tabl": [23, 38, 44, 64], "preprocess_splitted_t": [23, 38, 44, 64], "extract_vers": [23, 38, 44, 52, 54, 64], "extract_price_simplifi": [23, 38, 39, 45, 64], "find_price_column": [23, 38, 45, 64], "parse_pric": [23, 38, 45, 64], "is_main_pl": [23, 38, 45, 64], "is_tabular_equip": [23, 38, 45, 64], "is_table_content_reason": [23, 38, 45, 64], "df_wide_to_long": [23, 38, 45, 64], "remove_empty_row": [23, 38, 45, 64], "merge_columns_with_empty_head": [23, 38, 45, 64], "versions_match": [23, 38, 64], "match_versions_to_ids_v3": [23, 38, 46, 64], "cell_stat": [23, 38, 64], "cell_classif": [23, 35, 38, 64], "get_cell_imag": [23, 34, 35, 38, 64], "get_cell_as_imag": [23, 34, 35, 38, 64], "generate_cell_imag": [23, 34, 35, 38, 64], "predict_batch": [23, 34, 35, 38, 64], "label_cel": [23, 34, 35, 38, 64], "classify_equipment_cel": [23, 35, 36, 38, 64], "get_cell_st": [23, 35, 36, 38, 64], "test": [23, 64], "log_to_pickl": [23, 47, 48, 64], "test_pickle_cont": [23, 47, 48, 64], "wrapper": [23, 47, 48, 64], "output_formatt": [23, 48, 64], "format_find_vers": [23, 48, 49, 64], "format_add_vers": [23, 48, 49, 64], "format_join_version_type_two": [23, 48, 49, 64], "format_join_version_type_on": [23, 48, 49, 64], "format_add_titl": [23, 48, 49, 64], "table_data": [24, 40, 129, 131, 133, 138], "been": [24, 30, 112, 119, 138, 152, 159], "pdfextract": [24, 104, 117, 134], "tables_adob": 24, "general_attr": [24, 40, 129, 133, 134, 138], "price_list_json": 24, "ground_truth": [24, 74, 117], "commonattr": [24, 40, 117, 129, 133, 134, 138], "attribut": [24, 29, 30, 31, 40, 110, 117, 119, 129, 133, 134, 138, 152, 158, 159, 160], "ground": [24, 74, 114, 117, 158], "truth": [24, 74, 94, 114, 117, 158], "equipmentresult": [24, 72, 77, 79, 117], "brand_specific_vers": 24, "summari": [24, 43, 53, 54, 55, 56, 57, 58, 67, 81, 85, 98, 104, 118, 140], "versions_to_id_map": [24, 40, 41, 43, 46], "additional_versions_to_id_map": [24, 26, 40, 43, 46], "common_attr": 24, "common": [24, 117, 134], "using_adobe_output": [24, 26], "se_list": 24, "oe_list": 24, "oe_tabl": 24, "remaining_pag": 24, "eq_pag": [24, 33, 116, 118], "negative_exampl": 24, "commonentri": [24, 74], "neg": [24, 77], "exampl": [24, 147, 152, 158, 159, 161, 164], "combined_versions_to_id_map": 24, "condit": [24, 29, 43, 85, 116], "greater": [24, 59, 90, 114, 122], "those": [24, 147], "fewer": 24, "heurist": [24, 152], "abov": [24, 131, 152], "met": 24, "anoth": [24, 66, 77, 127, 152], "my22": 24, "initial": 24, "pari": 24, "under": 24, "less": [24, 32, 33, 41, 71, 90, 107, 152], "half": [24, 91, 93], "avail": [24, 31, 32, 40, 74, 96, 121, 147], "equipmentavail": [24, 31], "prioriti": [24, 116], "system": [24, 62, 79], "present": [24, 131, 135], "higher": [24, 59], "chosen": [24, 94], "textract_result": [26, 51, 85, 102], "version_to_id_map": 26, "segment_mak": [26, 28, 134], "textractdivid": [26, 28, 134], "debug": [26, 28, 29, 30, 31, 152], "aw": [26, 158], "being": [26, 152, 164], "all_block": 26, "equipmentlin": [26, 28, 29, 30, 32], "page_to_col": [26, 28], "proport": 27, "black": 27, "white": 27, "analyz": [27, 29, 43, 45, 109, 139, 152, 154], "crop": 27, "imag": [27, 34, 36, 40, 42, 105, 108, 158, 161], "cropped_imag": 27, "segment": [28, 109, 110, 118, 134], "maker": [28, 134], "columns_dict": 28, "empti": [28, 44, 45, 66, 68, 72, 74, 114, 122, 126, 140, 144, 152, 160], "versionblock": [28, 32], "page_to_mean_margin": 28, "page_to_column_chang": 28, "margin": [28, 29, 30, 31, 32, 73, 77, 110], "chang": [28, 54, 56, 57, 88], "blocks_dict": 28, "ordered_vers": 28, "lone": [28, 30], "line_threshold": 28, "meet": [28, 29, 85], "block1": 28, "block2": 28, "serial_attribut": [29, 30, 31], "serialattribut": [29, 30, 31], "least": [29, 110, 152], "eqmodel": [29, 37, 40, 110], "can": [29, 107, 150, 152, 160, 161], "misc": [29, 68], "usag": 29, "dash": [29, 31, 109], "special": [29, 31, 49, 55, 95, 152, 156], "use_dash": 29, "lower": [29, 59, 67, 91, 128], "case": [29, 96, 150, 152, 158, 159, 161], "letter": [29, 30, 31], "previou": [29, 30, 31, 32, 40, 41, 45, 67, 68, 114, 118, 129, 133, 137, 156, 161], "upper": [29, 91], "doe": [29, 30, 45, 96, 101, 119, 144, 152, 156, 161], "input_str": 30, "uppercas": [30, 31], "line_text": 30, "price_list_vers": 30, "additional_vers": 30, "match_threshold": 30, "match_handicap": 30, "max_return": 30, "brand_id": [30, 46, 54, 95, 125, 128], "length_penalti": 30, "handicap": 30, "dure": [30, 152], "penalti": 30, "version_lin": 30, "hyphen": 30, "space": [30, 128, 152], "original_span": 30, "move": [30, 127, 152], "prev_lin": 30, "version_list": 30, "additional_version_list": 30, "version_found": 30, "previous_vers": 30, "wa": [30, 50, 55, 56, 57, 96, 110, 127, 131, 136, 150, 152], "assembl": [30, 94], "version_indic": 30, "recurs": [30, 121], "too": [30, 152, 156], "big": [30, 31, 90], "displac": 30, "have": [30, 33, 45, 96, 101, 107, 110, 138, 150, 152, 156, 159, 161], "small": 30, "bottom": 30, "differ": [30, 37, 72, 77, 85, 94, 98, 104, 107, 114, 119, 150, 152, 156, 158, 159, 161, 164], "major": 30, "upgrad": 30, "pareto_vers": 30, "pareto": [30, 117, 164], "found_vers": 30, "part": [30, 34, 44, 110, 150, 152, 156, 164], "index_search": 30, "prev_block": 31, "column_start": 31, "lowercas": [31, 128], "margin_mean": 31, "close": [31, 79], "size_mean": 31, "look": [31, 152, 154, 156, 161], "far": [31, 152], "special_dash": 31, "size_coeff": 31, "margin_coeff": 31, "offset": 31, "version_to_id": [31, 32], "make": [31, 62, 67, 85, 133, 152], "templat": 31, "loop": [31, 40, 129, 133, 138], "detect": [31, 43, 44, 105, 108, 116], "id_to_vers": 32, "page_to_eq_count": 33, "page_to_eq_count_textract": 33, "3": [33, 82, 114], "equat": 33, "cut": 34, "out": [34, 147, 152, 161], "pil": 34, "page_imag": 34, "row_idx": 34, "column_idx": 34, "df_cell": 34, "original_page_idx": 34, "df_trp_cell": 34, "version_column": 34, "faulti": 34, "np": 34, "arrai": [34, 76], "cell_imag": 34, "feature_extractor": 34, "batch": [34, 127, 152], "extractor": [34, 111], "pg_num": [34, 45, 105], "label_to_state_overrid": 34, "state": [34, 36, 40, 42], "content": [36, 40, 45, 47, 68, 85, 88, 89, 98, 102, 116, 127, 146, 152, 154], "row_id": 36, "version_col": [36, 42, 45], "price_col": 36, "image_label": 36, "eq_model": [36, 37, 40, 42, 110], "retriev": [36, 62], "accord": [37, 47], "within": [39, 110, 136], "table_loop_index": [40, 129, 133, 138], "previous_headers_as_df": 40, "header": [40, 41, 43, 45, 70, 131, 133, 152, 160], "preprocess": [40, 44, 117, 129], "hold": [40, 99], "infer": [40, 133], "ve": [40, 82, 94, 95, 152, 159], "use_previous_head": 40, "pipelin": [40, 147, 156, 158, 161], "clean": [40, 47, 64, 67, 68, 79, 101, 124, 128], "up": [40, 79, 93, 101, 103, 111, 112, 154], "equal": [41, 71, 114], "previous_head": 41, "values_origin": 41, "values_block": [41, 44], "concaten": 41, "attempt": 41, "success": [41, 43, 126, 131], "tri": [41, 123], "recycl": 41, "try": [41, 99, 123, 152], "df_image_label": 42, "repeat": 42, "over": [42, 116, 127, 161], "repetit": [42, 156], "max_str_len": 42, "title_row": [42, 45, 68], "longer": 42, "long": [42, 45, 67, 74, 129, 152], "allow": [42, 110], "blank_rows_ind": 42, "eq_col": [42, 43, 45], "requir": [42, 152], "blank": 42, "tablesemant": 42, "extrem": 42, "semant": [42, 43, 129], "longest": 43, "mai": [43, 44, 147], "pl_summari": [43, 64, 98, 118, 132, 140], "unknown": [43, 45], "product": [43, 85], "oper": [43, 102, 131, 152, 158], "versions_addit": 44, "possibli": 44, "split_indic": 44, "df_version": 44, "x": [45, 67, 70, 74, 76, 125, 126, 127, 129, 136, 140, 156, 161], "default_return_valu": 45, "cannot": [45, 111], "category_to_col_indices_map": [45, 139, 140], "tabular": [45, 152, 161], "reason": 45, "code_col": 45, "matched_vers": 45, "df_block": 45, "simultan": 45, "col_id_to_valu": 46, "versionmatch": 46, "func": [47, 123], "log": [47, 62, 78, 79, 94, 117, 123], "pickl": 47, "specify_format": 47, "vari": [47, 123, 152], "depend": [47, 53, 90], "function_nam": 47, "arg": [47, 123], "kwarg": [47, 98, 123], "argument": [47, 98, 123, 127], "arbitrari": 47, "keyword": [47, 54, 78, 91, 98, 123, 127, 139, 152], "funct_input": 49, "sum": [49, 59], "tag": 49, "default_transmission_for_electric_engin": [50, 52, 64], "electr": [50, 54, 98], "engin": [50, 51, 55, 70, 90, 94, 95, 96, 98, 101, 121, 125, 130, 152, 158, 159, 160, 164], "transmiss": [50, 63, 64, 94, 95, 96, 114, 125, 132, 133, 138, 152, 159, 160], "85": 50, "fix": 50, "vehicl": [50, 55, 159], "apply_to_engine_combin": [51, 52, 64], "apply_default": [51, 52, 64], "get_surcharge_for_text": [51, 52, 64], "surcharg": [51, 133, 134], "add_grand_tour_surcharg": [51, 52, 64], "all_combin": [51, 98], "super_model": [51, 98], "grand": 51, "tour": 51, "super": [51, 55, 98, 125, 140, 152, 158], "get_engine_surcharg": [51, 52, 64], "convert_ks_to_kw": [51, 52, 64], "power": [51, 152, 154, 159], "k": [51, 93, 129, 140, 147, 152], "kw": [51, 54, 129, 140, 152], "after_match": [52, 64], "before_match": [52, 64], "post_cell_split_transform": [52, 64], "restructure_vers": [52, 53, 64], "post_labeling_transform": [52, 64], "transform_values_label": [52, 54, 64], "label_transform": [52, 54, 64], "parse_kw_for_electric_car": [52, 54, 64], "join_rows_on_merged_cel": [52, 54, 64], "versions_to_one_cel": [52, 54, 64], "repair_pric": [52, 54, 64], "make_numer": [52, 54, 64, 75, 76], "repair_vers": [52, 54, 64], "neo_patentati_except": [52, 54, 64], "join_row": [52, 54, 64], "extract_keyword_item": [52, 54, 64], "fill_dacia_drivetrain": [52, 54, 64], "post_table_pars": [52, 64], "apply_engine_name_correct": [52, 55, 64], "remove_prefix": [52, 55, 64], "special_version_name_correct": [52, 55, 64], "add_hybrid_data": [52, 55, 64], "fuel_type_correct": [52, 55, 64], "model_correct": [52, 55, 64], "apply_map": [52, 55, 64, 97, 101], "find_hybrid": [52, 55, 64], "pre_labeling_td_transform": [52, 64], "transform_valu": [52, 56, 57, 64], "value_transform": [52, 56, 57, 64], "pre_labeling_transform": [52, 64], "select_set": [52, 64], "set_set": [52, 58, 64], "unite_combin": [52, 64], "get_combination_identifi": [52, 59, 64], "is_new_price_bett": [52, 59, 64], "update_model_nam": [52, 59, 64], "solve_duplicates_using_model": [52, 59, 64], "solve_duplicates_by_pric": [52, 59, 64], "is_bigg": [52, 59, 64], "are_tables_sam": [52, 59, 64], "largest_number_new": [52, 60, 64], "only_short_vers": [52, 60, 64], "largest_numb": [52, 60, 64], "countri": [53, 54, 55, 58, 93, 95, 108, 109, 120, 125, 150, 152, 158], "restructur": [53, 118], "values_merg": [54, 67, 69], "celllabel": 54, "neo": 54, "patentati": 54, "repair": 54, "dacia": 54, "drivetrain": [54, 94, 95, 96, 114, 125, 158], "car": [54, 147, 150, 152, 158, 160, 164], "made": [54, 82, 152], "raw_numb": 54, "raw": [54, 121], "numer": [54, 66, 76, 94, 129, 140, 152, 159], "possibl": [54, 67, 69, 94, 101, 135], "version_nam": [55, 95], "super_model_nam": [55, 98, 125, 140], "page_text": [55, 87, 91, 117], "hybrid": [55, 152, 159], "fuel": [55, 63, 98, 130, 133, 137, 140, 152, 154, 160, 161, 164], "rule": [56, 57], "expert": 57, "config": 58, "reload": 58, "configur": 58, "field": [59, 67, 74, 96, 99, 114, 164], "variou": [59, 67, 69, 138], "new_pric": 59, "old_pric": 59, "better": [59, 152, 161], "old": [59, 67, 82, 91, 120], "model_nam": [59, 62, 120], "identifier_to_combin": 59, "input_model_gener": [59, 99, 100], "solv": 59, "unit": 59, "altern": 59, "table1": 59, "table2": 59, "return_al": [60, 85], "short": [60, 91, 129], "querier": [61, 64, 133, 134], "run_queri": [61, 62, 64], "make_prompt": [61, 62, 64], "retrieve_s3_answ": [61, 62, 64], "save_s3_answ": [61, 62, 64], "td_gpt_data": [61, 64], "get_unique_transmiss": [61, 63, 64], "get_fuel_type_column": [61, 63, 64], "sys_prompt": 62, "table_prompt": 62, "queri": [62, 63, 98, 118, 125, 126], "openai": 62, "gpt": [62, 63, 133, 134], "respons": [62, 125, 127, 144], "error": [62, 76, 110, 117, 127, 161], "prompt": 62, "answer": 62, "query_hash": 62, "rais": [62, 111, 127, 131], "clienterror": 62, "gpt_querier": [63, 133, 134], "gptquerier": [63, 133, 134], "app": 64, "local_test": 64, "compare_dist": [64, 71], "adobe_extract": 64, "dl_classif": 64, "equipment_pars": 64, "expert_rul": 64, "gpt_process": 64, "layout_transform": 64, "index_mapp": [64, 65], "make_identity_mapp": [64, 65, 66], "make_df_numer": [64, 65, 66], "max_dimens": [64, 65, 66], "get_origin": [64, 65, 66], "construct_t": [64, 65, 66], "compos": [64, 65, 66], "layout_mapp": [64, 65], "make_long_mapp": [64, 65, 67], "clean_table_mapp": [64, 65, 67], "make_unsplit_mapp": [64, 65, 67], "make_top_left_model_mapp": [64, 65, 67], "remove_split_version_mapp": [64, 65, 67], "propagate_version_mapp": [64, 65, 67], "join_versions_mapp": [64, 65, 67], "propagate_versions_mapper_v2": [64, 65, 67], "make_transposed_mapp": [64, 65, 67], "make_title_mapp": [64, 65, 67], "add_row_mapp": [64, 65, 67], "propagate_titl": [64, 65, 67], "remove_duplicates_mapp": [64, 65, 67], "strip_field": [64, 65, 67], "insert_split_lines_mapp": [64, 65, 67], "remove_extra_version_mapp": [64, 65, 67], "mixed_column_mapp": [64, 65, 67], "make_extra_column": [64, 65, 67], "reconstruct_table_mapp": [64, 65, 67], "layout_pars": [64, 65], "most_frequ": [64, 65, 68], "column_typ": [64, 65, 68], "find_column": [64, 65, 68], "get_column_to_split": [64, 65, 68], "find_partial_vers": [64, 65, 68], "find_empti": [64, 65, 68], "find_split": [64, 65, 68], "find_technical_split": [64, 65, 68], "find_titl": [64, 65, 68], "find_first_pric": [64, 65, 68], "find_previous_titl": [64, 65, 68], "find_to_merg": [64, 65, 68], "find_numb": [64, 65, 68], "find_missing_column": [64, 65, 68], "apply_middle_symetri": [64, 65, 68], "merge_mapp": [64, 65], "make_unmerge_default_mapp": [64, 65, 69], "make_unmerge_uniqueness_mapp": [64, 65, 69], "construct_merge_entri": [64, 65, 69], "make_unmerge_symmetry_mapp": [64, 65, 69], "technical_mapp": [64, 65], "merge_header_mapp": [64, 65, 70], "find_gears_mapp": [64, 65, 70], "remove_superscript": [64, 65, 70], "select_best_columns_mapp": [64, 65, 70], "delete_no_engine_mapp": [64, 65, 70], "make_log": 64, "create_excel": [64, 75], "write_excel": [64, 72, 75], "create_pl_t": [64, 72, 75], "create_td_t": [64, 72, 75], "create_eq_t": [64, 72, 75], "excel_extractor": [64, 75], "make_legend": [64, 73, 75, 77], "xls_for_annot": [64, 73, 75], "extract_label": [64, 73, 75], "shuffle_t": [64, 73, 75], "find_table_boundari": [64, 73, 75], "parse_t": [64, 73, 75, 132], "create_cell_annot": [64, 73, 75], "create_layout_annot": [64, 73, 75], "excel_util": [64, 75], "extend_valu": [64, 74, 75], "ordered_dict_to_datafram": [64, 74, 75], "dict_to_datafram": [64, 74, 75], "missed_entri": [64, 74, 75], "fill_overall_confid": [64, 74, 75], "equipment_to_color": [64, 74, 75], "equipment_to_symbol": [64, 74, 75], "equipment_to_symbol_simpl": [64, 74, 75], "entry_to_color": [64, 74, 75], "make_equipment_datafram": [64, 74, 75], "make_equipment_table_datafram": [64, 74, 75], "color_equipment_wid": [64, 74, 75], "color_equipment_long": [64, 74, 75], "match_equip": [64, 74, 75], "create_missing_equipment_df": [64, 74, 75], "_eq_typ": [64, 74, 75], "read_excel": [64, 75], "read_error": [64, 75, 76], "read_td_stat": [64, 75, 76], "read_cel": [64, 75, 76], "table_mak": [64, 75], "draw_tabl": [64, 75, 77], "make_base_t": [64, 75, 77], "make_standard_t": [64, 75, 77], "make_extended_t": [64, 75, 77], "make_match_t": [64, 75, 77], "make_is_copy_t": [64, 75, 77], "make_original_t": [64, 75, 77], "make_not_matched_t": [64, 75, 77], "write_miss": [64, 75, 77], "make_standard_td_t": [64, 75, 77], "make_extended_td_t": [64, 75, 77], "write_eq_statist": [64, 75, 77], "make_serial_t": [64, 75, 77], "make_serial_table_wid": [64, 75, 77], "make_legend_eq_diff": [64, 75, 77], "make_generic_t": [64, 75, 77], "write_td_stat": [64, 75, 77], "make_all_equipment_datafram": [64, 75, 77], "make_eq_legend": [64, 75, 77], "write_label": [64, 75], "generate_cell_label": [64, 75, 78], "labels_from_values_new": [64, 75, 78], "labels_from_valu": [64, 75, 78], "labels_from_values_extend": [64, 75, 78], "contains_keyword": [64, 75, 78], "generate_confidence_label": [64, 75, 78], "generate_confidence_logs_label": [64, 75, 78], "generate_mapping_label": [64, 75, 78], "generate_id_mapping_label": [64, 75, 78], "generate_handwritten_label": [64, 75, 78], "find_largest_index": [64, 75, 78], "extract_valu": [64, 75, 78], "make_csv": [64, 75, 78], "csv_to_s3": [64, 75, 78], "write_log": [64, 75, 113, 117], "initialize_log": [64, 75, 79], "cleanup_log": [64, 75, 79], "log_result": [64, 75, 79], "multi_values_split": 64, "split_multi_valu": [64, 80, 81], "multi_values_splitt": [64, 80, 81], "get_categori": [64, 80, 81], "title_splitt": [64, 80, 81], "extend_t": [64, 80, 81], "multi_values_datafram": [64, 80, 81], "split_title_row": [64, 80, 81], "split_column": [64, 67, 80, 81], "split_check": [64, 80, 81], "predict_categories_dl": [64, 80, 82], "get_brand_from_fil": [64, 80, 82], "get_brand_from_json": [64, 80, 82], "get_predictor_necess": [64, 80, 82], "page_select": 64, "pl_finder": [64, 83], "prepare_tables_ful": [64, 83, 84, 85], "make_label": [64, 83, 84, 85], "make_t": [64, 83, 84, 85], "find_tabl": [64, 83, 84, 85, 106, 110], "merge_t": [64, 83, 84, 106], "stack_tables_al": [64, 83, 84, 86], "table_metr": [64, 83, 84, 86], "stack_tabl": [64, 83, 84, 86], "get_filled_column": [64, 83, 84, 86], "stack_fil": [64, 83, 84, 86], "price_page_find": [64, 83, 84], "find_pag": [64, 83, 84, 87], "price_table_find": [64, 83, 84], "insert_statu": [64, 83, 84, 88], "prepare_t": [64, 83, 84, 88], "table_prepar": [64, 83, 84], "process_t": [64, 83, 84, 89], "process_row": [64, 83, 84, 89], "process_multiline_cel": [64, 83, 84, 89], "process_row_valu": [64, 83, 84, 89], "compare_and_add_engin": [64, 83, 84, 90], "process_price_lin": [64, 83, 84, 90], "largest_number_textract": [64, 83, 84, 90], "td_finder": [64, 83], "general_lemmat": [64, 83, 91, 92], "feature_n_kw": [64, 83, 91, 92], "feature_n_ccm": [64, 83, 91, 92], "feature_rim": [64, 83, 91, 92], "feature_n_pric": [64, 83, 91, 92], "is_price_in_text": [64, 83, 91, 92], "feature_n_other_numb": [64, 83, 91, 92], "search_for_str": [64, 83, 91, 92], "extract_bag": [64, 83, 91, 92], "calculate_featur": [64, 83, 91, 92], "split_page_in_half": [64, 83, 91, 92], "extract_featur": [64, 83, 91, 92], "classnam": [64, 83, 92, 93], "get_pag": [64, 83, 92, 93], "parser_postprocess": 64, "add_confid": [64, 97], "is_special_version_nam": [64, 95, 97], "add_confidence_for_vers": [64, 95, 97], "add_confidence_for_combin": [64, 95, 97], "add_ve_id": [64, 97], "object_to_str": [64, 94, 97], "list_to_str": [64, 94, 97], "string_scor": [64, 94, 97], "numeric_scor": [64, 94, 97], "scorer": [64, 94, 97, 98], "log_match": [64, 94, 97], "update_object_id": [64, 94, 97], "load_from_preskok": [64, 94, 97], "choose_categori": [64, 94, 97], "find_drivetrain_id": [64, 94, 97], "find_transmission_id": [64, 94, 97], "limit_engines_by_batteri": [64, 94, 97], "get_possible_engin": [64, 94, 97], "get_id_to_count_map": [64, 94, 97], "generate_and_sort_possible_object": [64, 94, 97], "get_possible_transmission_and_drivetrain": [64, 94, 97], "match_drivetrain": [64, 94, 97], "match_transmiss": [64, 94, 97], "get_matching_battery_id": [64, 94, 97], "_assemble_str": [64, 94, 97], "join_on_pric": [64, 96, 97], "join_duplicate_engine_combin": [64, 96, 97], "correct_model": [64, 97, 98], "correct_models_after_match": [64, 97, 98], "apply_model_name_map": [64, 97, 98], "add_electric_model": [64, 97, 98], "check_and_unify_minor_model_name_differ": [64, 97, 98], "unify_version_nam": [64, 97, 98], "make_new_models_from_vers": [64, 97, 98], "update_models_for_dacia_stepwai": [64, 97, 98], "post_process_parsed_cont": [64, 97, 98], "custom_scor": [64, 97, 98], "model_generation_match": [64, 97], "get_model_nam": [64, 97, 99, 100], "add_model_generation_id": [64, 97, 99, 100], "model_generation_matching_v2": [64, 97], "get_model_generation_id": [64, 97, 100], "update_combo_with_model_generation_id": [64, 97, 100], "cleanup_map": [64, 97, 101], "fuzzy_match": [64, 74, 77, 97, 101], "map_to_default": [64, 97, 101], "postprocess_vers": [64, 97, 101], "apply_tehnical_data": [64, 97, 101], "post_process_combin": [64, 97, 101], "_apply_tehnical_data_for_categori": [64, 97, 101], "reduce_possible_id": [64, 97], "get_data_from_t": [64, 97, 102], "get_tehnical_data": [64, 97, 102], "tehnical_data_content_for_id_reduct": [64, 97, 102], "preprocess_pdf": [64, 113, 117], "adobe_pars": [64, 106], "unpack_stream": [64, 103, 106], "set_up_extractor": [64, 103, 106, 111, 112], "textract_pag": [64, 103, 106, 111, 112], "adobe_pag": [64, 103, 106], "assemble_extract": [64, 106], "create_extract": [64, 104, 106], "improve_textract": [64, 106], "convert_to_textract_format": [64, 105, 106], "get_model_bbox": [64, 105, 106], "are_parallel": [64, 106, 107], "get_tables_to_merg": [64, 106, 107], "update_right_table_cel": [64, 106, 107], "join_table_block": [64, 106, 107], "add_cel": [64, 106, 107], "merge_table_pair": [64, 106, 107], "get_lang_detector": [64, 106, 108], "get_file_like_pdf": [64, 106, 108], "extract_text": [64, 106, 108], "detect_languag": [64, 106, 108], "classify_pag": [64, 106, 108, 113, 117], "extract_imag": [64, 106, 108], "display_bounding_box": [64, 106, 108], "segment_classifi": [64, 106], "delimiters_count": [64, 106, 109], "separate_packag": [64, 106, 109], "sentence_to_vector": [64, 106, 109], "sentence_to_map": [64, 106, 109], "predict_lin": [64, 106, 109], "predict_t": [64, 106, 109], "is_equip": [64, 85, 106, 109], "segment_divid": [64, 106], "text_list": [64, 106, 110], "is_contained_in": [64, 106, 110], "find_page_gap": [64, 106, 110], "create_all_seg": [64, 106, 110], "create_seg": [64, 106, 110], "find_lin": [64, 106, 110], "find_seg": [64, 106, 110], "set_us": [64, 106, 110], "get_us": [64, 106, 110], "color_used_seg": [64, 106, 110], "textract_gener": [64, 106], "textract_pl_pag": [64, 106, 111], "textract_td_pag": [64, 106, 111], "textract_eq_pag": [64, 106, 111], "textract_pars": [64, 106], "price_list_pars": 64, "identify_missing_categori": [64, 113, 114], "correct_predict": [64, 113, 114], "correct_field": [64, 113, 114], "dict_to_equip": [64, 113, 115], "dict_to_color": [64, 113, 115], "is_textract_valid": [64, 113, 115], "multiple_price_lists_util": [64, 113], "check_for_two_price_list": [64, 113, 116], "sort_pag": [64, 113, 116], "split_list": [64, 113, 116], "group_page_numb": [64, 113, 116], "detect_and_split_multi_price_list": [64, 113, 116], "custom_sort_kei": [64, 113, 116], "parser_v2": [64, 113], "parse_with_call_back": [64, 113, 117], "generate_extract": [64, 113, 117], "generate_general_attr": [64, 113, 117], "extract_t": [64, 113, 117], "limit_combin": [64, 113, 117], "combination_correct": [64, 113, 117], "ve_id_match": [64, 113, 117], "combination_postprocess": [64, 113, 117], "json_postprocess": [64, 113, 117], "handle_multiple_price_list": [64, 113, 117], "add_metadata": [64, 113, 117], "limit_model_data": [64, 113, 117], "parser_v2_util": [64, 113], "generate_parser_input": [64, 113, 118], "get_s3_url_from_price_list_detail": [64, 113, 118], "summarize_result": [64, 113, 118], "restructure_model_gener": [64, 113, 118], "get_latest_pl": [64, 113, 118], "get_parsed_eq_pag": [64, 113, 118], "__post_init__": [64, 113, 119], "load_model": [64, 113, 119], "_prepare_path": [64, 113, 119], "load_huggingface_model": [64, 113, 120], "load_old_classifi": [64, 113, 120], "load_dl_model": [64, 113, 120], "load_page_classifi": [64, 113, 120], "load_line_resourc": [64, 113, 120], "load_yolo_fiat_model": [64, 113, 120], "load_brand_country_specif": [64, 113, 120], "transform_to_json": [64, 113], "write_nest": [64, 113, 121], "get_engine_combin": [64, 113, 121, 124, 125], "get_vers": [64, 113, 121, 124, 125], "get_model_gener": [64, 113, 121, 124, 125], "eval": [64, 124], "perfect_match": [64, 114, 122, 124], "partial_match": [64, 122, 124], "price_match": [64, 122, 124], "match_combin": [64, 122, 124], "evaluate_combin": [64, 122, 124], "print_match_side_by_sid": [64, 122, 124], "get_best_match": [64, 122, 124], "simplify_preskok_json_format": [64, 122, 124], "helper": [64, 124, 132, 142], "get_secret": [64, 123, 124], "retri": [64, 123, 124], "decor": [64, 123, 124], "preskok_api": [64, 124], "get_preskok_auth_token": [64, 124, 125], "get_preskok": [64, 124, 125], "get_engin": [64, 124, 125], "get_transmiss": [64, 124, 125], "get_drivetrain": [64, 124, 125], "get_batteri": [64, 124, 125], "get_price_list": [64, 124, 125], "get_price_list_detail": [64, 124, 125], "get_color": [64, 124, 125], "transform_gener": [64, 124, 125], "price_list": [64, 87, 124, 158], "get_price_lists_loc": [64, 124, 126], "get_price_lists_by_filenam": [64, 124, 126], "flatten_json": [64, 124, 126], "get_all_processed_price_list": [64, 124, 126], "get_all_matching_local_price_list": [64, 124, 126], "flatten": [64, 124, 126], "make_s3url": [64, 124, 127], "put": [64, 124, 127], "put_file_lik": [64, 124, 127], "get_file_lik": [64, 124, 127], "get_dict_stream": [64, 124, 127], "get_dict_read": [64, 124, 127], "store_csv": [64, 124, 127], "copy_to": [64, 124, 127], "move_to": [64, 124, 127], "l": [64, 124, 127], "delete_prefix": [64, 124, 127], "get_last_modifi": [64, 124, 127], "parse_bucket": [64, 124, 127], "parse_kei": [64, 124, 127], "parse_url": [64, 124, 127], "parse_filenam": [64, 124, 127], "generate_presigned_url": [64, 124, 127], "head": [64, 124, 127], "string_util": [64, 124], "normal": [64, 98, 124, 128, 129], "normalize_vers": [64, 124, 128], "is_it_pric": [64, 124, 128], "extract_pric": [64, 124, 128], "extract_other_numb": [64, 124, 128], "extract_all_numb": [64, 124, 128], "extract_kw": [64, 124, 128], "table_parser_v2": 64, "abstract_pars": [64, 132], "normalize_label": [64, 129, 132], "normalize_nam": [64, 129, 132], "use_original_valu": [64, 129, 132], "make_cell_statu": [64, 129, 132], "insert_cell_statu": [64, 129, 132], "label_and_transform_t": [64, 129, 132, 133, 138], "extend_tables_and_extract_combin": [64, 129, 132, 133, 138], "preprocess_data": [64, 129, 132], "sort_and_upd": [64, 129, 132], "get_all_valid_pric": [64, 129, 132], "get_eur_pric": [64, 129, 132], "prioritize_numerical_column": [64, 129, 132], "column_extract": [64, 132], "extract_kw_or_k": [64, 130, 132], "extract_fuel": [64, 130, 132], "_extract_kw": [64, 130, 132], "_extract_k": [64, 130, 132], "_extract_voltag": [64, 130, 132], "check_for_mandatory_categori": [64, 131, 132], "find_table_above_current_on": [64, 131, 132], "use_above_table_head": [64, 131, 132], "is_table_above_and_same_width": [64, 131, 132], "extract_data": [64, 132, 133], "add_surcharg": [64, 132, 133, 134], "add_value_indic": [64, 132, 133], "infer_version_from_table_head": [64, 132, 133], "make_column_index": [64, 132, 133], "extract_td_fuel": [64, 132, 133], "extract_td_transmiss": [64, 132, 133], "parse_td_t": [64, 132, 134], "parse_pl_t": [64, 132, 134], "process_model_brand": [64, 132, 135], "get_year": [64, 132, 135], "search_textract_block": [64, 132], "count_nan": [64, 132, 136], "get_bbox_center_point": [64, 132, 136], "euclidean_dist": [64, 132, 136], "vertical_dist": [64, 132, 136], "is_bbox_within_rang": [64, 132, 136], "find_block_by_rang": [64, 132, 136], "find_closest_block_by_euclid": [64, 132, 136], "get_closest_textract_block": [64, 132, 136], "find_block_by_text": [64, 132, 136], "table_surround": [64, 132], "get_data_from_table_surround": [64, 132, 137], "get_versions_data": [64, 132, 137], "get_fuel_data": [64, 132, 137], "get_top_limit": [64, 132, 137], "update_model_generations_data": [64, 132, 137], "get_model_generations_data": [64, 132, 137], "get_additional_data": [64, 132, 137], "td_parser": [64, 132], "add_column": [64, 132, 138], "contains_transmiss": [64, 132, 138], "contains_gear": [64, 132, 138], "get_transmission_typ": [64, 132, 139], "get_gear": [64, 132, 139], "apply_transmission_process": [64, 132, 139], "merge_transmission_column": [64, 132, 139], "extend_transmissions_data": [64, 132, 139], "get_column_semant": [64, 132, 140], "transform_kw_ks_column": [64, 132, 140], "apply_column_transform": [64, 132, 140], "sort_numerical_column": [64, 132, 140], "row_to_dict": [64, 132, 140], "enhance_combin": [64, 132, 140], "read_row": [64, 132, 140], "select_model_data": [64, 132, 140], "extend_fuel_data": [64, 132, 140], "extend_model_data": [64, 132, 140], "get_textract_blocks_map": [64, 141, 142, 143], "get_rows_columns_map": [64, 141, 142], "get_text": [64, 141, 142], "get_table_csv_result": [64, 141, 142], "generate_table_csv": [64, 141, 142], "line_split": [64, 142], "get_rows_to_blocks_map": [64, 142, 143], "match_words_to_lin": [64, 142, 143], "get_child_id": [64, 142, 143], "get_textract_result": [64, 142, 144], "run_textract": [64, 142, 144], "get_and_store_block": [64, 142, 144], "empty_textract": [64, 142, 144], "textract_divid": [64, 142], "isolate_pag": [64, 142, 145], "merge_pag": [64, 142, 145], "calculate_file_hash": [64, 142, 146], "dim": 66, "ident": [66, 67, 127], "dimens": 66, "indexmapp": [66, 67], "els": 66, "cell_mapp": 66, "construct": [66, 69], "wide_model": [67, 68], "clean_threshold": [67, 68], "forced_entri": 67, "loose_split": [67, 68], "unsplit": 67, "forc": 67, "loos": [67, 68, 110], "pre_split_len": 67, "propag": 67, "possible_vers": 67, "always_join": 67, "override_subset": 67, "alwai": 67, "subset": 67, "overridden": [67, 111], "add_base_vers": 67, "old_titl": 67, "row_valu": [67, 89], "row_label": 67, "previous_titl": 67, "take_low": 67, "version_pair": 67, "taken": [67, 152, 158], "strip": 67, "split_word": 67, "selector": 67, "mixed_typ": 67, "mix": 67, "miss": [67, 68, 74, 77, 114, 122, 131, 152], "layoutlabel": 67, "previous_t": [67, 68, 129, 133], "technical_t": [67, 133], "reconstruct": 67, "entir": [67, 150, 152], "technic": [67, 68, 77, 85, 101, 102, 133, 134, 164], "frequent": 68, "return_prob": 68, "version_row": 68, "df_probabl": 68, "type_to_weight": 68, "weight": 68, "splitter": [68, 77], "after_transform": 68, "duplicate_search": 68, "middl": 68, "symmetri": [68, 69], "backward": 69, "ignore_merg": 69, "enforce_uniqu": 69, "merge_model_upward": 69, "unmerg": 69, "account": 69, "enforc": 69, "upward": 69, "iter": [69, 127], "cellmapp": 69, "direct": 69, "possible_valu": 69, "merge_indic": 69, "gear": [70, 138, 139], "superscript": 70, "sampl": [71, 91, 158], "destin": [72, 78, 79], "logdestin": [72, 78, 79], "match_result": [72, 77, 78, 79, 117], "matchallresult": [72, 78, 79, 114, 117], "eq_result": [72, 77, 79], "pl_tabl": [72, 77], "td_tabl": [72, 77, 79], "technicalpars": [72, 77, 133], "excel": [72, 73, 76, 88, 161], "workbook": 72, "sheet": [72, 73, 76, 77], "td": [72, 76, 79, 111, 116], "wb": 72, "write": [72, 77, 117, 121, 152], "written": [72, 77, 121, 150, 152], "legend": [73, 77], "extens": 73, "directory_path": 73, "save_nam": 73, "table_indic": 73, "annot": [73, 88], "file_index": 73, "input_fold": 73, "output_nam": 73, "cell_label": 73, "output_layout_nam": 73, "generate_extra": 73, "generate_gener": 73, "input_nam": 73, "shuffl": 73, "margin_row": [73, 77], "margin_column": [73, 77], "worksheet": [73, 76, 77], "table_width": 73, "table_height": 73, "values_extend": 74, "table_index": [74, 77, 78, 141], "table_length": 74, "ordered_comb": 74, "collect": 74, "ordereddict": 74, "symbol": 74, "all_equip": 74, "all_color": 74, "version_map": 74, "id_to_match_av": 74, "gt_availability_map": 74, "three": [74, 150, 152, 158], "background": [74, 77, 108], "ground_truth_color": 74, "fuzzi": [74, 77, 98, 152], "unmatch": [74, 114], "statist": [76, 77], "about": [76, 118, 152, 158], "y": [76, 110, 136, 156, 161], "coordin": [76, 131, 136, 152, 156], "openpyxl": [76, 77], "border": 77, "fill_background": 77, "wrap_around": 77, "draw": [77, 152], "drawn": [77, 161], "wrap": 77, "around": 77, "level": [77, 90, 126, 152, 160, 164], "current_table_index": 77, "show": 77, "were": [77, 116, 128, 152, 156], "len_diff": 77, "detail": [77, 85, 118, 125, 126, 152, 158, 164], "negative_equip": 77, "positive_equip": 77, "positive_label": 77, "negative_label": 77, "packet_equip": 77, "color_equip": 77, "packet_label": 77, "color_label": 77, "packet": 77, "table_i": 78, "labels5": 78, "labels10": 78, "10": [78, 90], "handwritten": 78, "path_to_directori": 78, "col_nam": 78, "label_typ": 78, "s3_path": 78, "handler": [78, 120], "s3handler": 78, "servic": [78, 118, 152], "buffer": [79, 91], "logger": [79, 123, 152, 158], "logging_buff": 79, "stringio": 79, "parsed_t": [79, 114], "necess": [81, 82, 133, 139], "subcategori": 81, "multicell_valu": 81, "defaultdict": [81, 86, 143], "labeler_bas": [81, 82], "categor": 81, "category_to_values_map": [81, 130], "split_column_index": 81, "prob": [81, 82], "splitresult": 81, "n_original_col": 81, "df_prob": 81, "relevant_typ": 81, "main_mapp": 81, "dl_label_to_ve_label_map": 82, "learn": [82, 120, 152, 158], "filepath": 82, "model_gener": [82, 99, 100], "trp": [85, 88, 89], "page_numb": 85, "languag": [85, 87, 91, 93, 108, 120], "is_technical_detail": 85, "thei": [85, 150, 152, 158, 164], "amazon": [85, 105, 152, 161], "pl_textract": [85, 117], "td_textract": 85, "stack": 86, "filled_length": 86, "filled_map_dict": 86, "filled_list": 86, "col_numb": 86, "could": [87, 152, 161], "enumer": 87, "status_dict": 88, "splitted_row": 88, "altered_cel": 88, "multi_line_cel": 88, "cell_value_chang": 88, "line_to_label": [88, 89], "content_typ": [88, 89], "contenttyp": [88, 89], "correct_textract": [88, 89], "remove_price_nois": [88, 89, 90], "post_process": [88, 89], "nois": [88, 89, 90], "post": [88, 89, 98], "nest": [88, 121, 126], "blocks_map": [89, 107, 141, 143], "cell_index": 89, "multilin": 89, "is_splitted_row": 89, "max_lin": 89, "similar": [90, 94, 99, 152], "comparison": 90, "done": [90, 152, 158], "max_numb": 90, "max_number_str": 90, "prices_lin": 90, "6000": 90, "100000": 90, "language_short_nam": 91, "perform": [91, 136, 152], "lemmat": 91, "ccm": 91, "rim": 91, "language_descriptor": 91, "descriptor": [91, 93], "string_list": 91, "sample_text": 91, "bag_word": 91, "bag": [91, 93, 147, 152], "filepath_or_buff": 91, "bow": 91, "split_pag": [91, 93], "union": [91, 127, 146], "bufferedread": [91, 127], "nearest": [93, 147, 152], "neighbor": [93, 147, 152], "random": 93, "forest": 93, "belong": 93, "filter": [93, 127], "pl_object": 94, "pred_obj": 94, "true_obj": 94, "comb": 94, "ve_object": 94, "string_categori": 94, "numeric_categori": 94, "pl_string": 94, "ve_objects_sort": 94, "preskok": [94, 95, 99, 122, 125, 158], "choos": 94, "choic": [94, 98], "categories_drivetrain": 94, "drivetrain_to_count": 94, "categories_transmiss": 94, "transmission_to_count": 94, "engines_id": 94, "model_id": [94, 125], "battery_id": [94, 125], "batteri": [94, 125, 130], "pl_date": 94, "date": [94, 118, 125, 126, 135], "engine_id": [94, 125], "model_id_to_engine_ids_map": 94, "all_object": 94, "id_to_count_map": 94, "sorting_ord": 94, "sorting_kei": 94, "possible_objects_sort": 94, "match_categori": 94, "obj_id": 94, "pl_json": [95, 96, 98, 99], "appear": [95, 147, 150, 152, 159, 164], "databas": [95, 152, 158, 159, 164], "defin": [95, 96, 121], "individu": [95, 164], "collis": 96, "lose": 96, "price_list_model_generation_id": 96, "In": [96, 147, 152, 154, 156, 158, 159, 160, 161, 164], "accordingli": [96, 145], "comput": 96, "unifi": 98, "minor": 98, "super_model_id": 98, "predefin": 98, "stepwai": 98, "model_generations_data": [98, 99, 137, 140], "custom": [98, 116], "parsed_model_gener": 99, "incom": 99, "human": 99, "both": [99, 114, 158, 164], "overwritten": 99, "parsed_model_nam": 100, "modelcombinationscor": 100, "combo": 100, "parsed_model_name_to_input_model_gener": 100, "possible_match": 101, "treshold": 101, "category_to_tokens_map": 101, "postprocess": [101, 117], "engine_nam": 101, "tehnical_data": 101, "technical_data": 101, "combination_td": 101, "default_td": 101, "regex_express": 102, "type_cast": 102, "regular": 102, "express": 102, "cast": 102, "kilowatt": [102, 128, 130], "cubic": 102, "centimet": 102, "td_page": [102, 116], "reduc": 102, "identif": 102, "purpos": 102, "adobe_stream": 103, "unpack": 103, "stream": [103, 127], "structur": [103, 121, 126, 160], "zipfil": 103, "adobeextractor": 103, "bboxes_label": 105, "img_width": 105, "img_height": 105, "img": 105, "yolo_model": 105, "yolo": [105, 120], "left_tabl": 107, "right_tabl": 107, "parallel": 107, "right": [107, 152, 160], "n_cols_left": 107, "row_diff": 107, "n_col": 107, "textract_json": 107, "nlp": 108, "languagedetector": 108, "natur": 108, "detector": 108, "s3_url": 108, "url": [108, 118, 125, 127], "bb_lst_lst": 108, "displai": 108, "resourc": [109, 120, 152, 158], "must": [109, 158], "sentenc": 109, "comma": 109, "pluse": 109, "vocab": 109, "separ": 109, "vocabulari": 109, "vector": 109, "line_num": 109, "row_num": 109, "y_start": 110, "y_end": 110, "doc_pag": 110, "gap": 110, "seg_start": 110, "seg_end": 110, "02": [110, 136], "some": [110, 147, 150, 152, 158, 159, 161, 164], "mismatch": 110, "polish": 111, "notimplementederror": 111, "subclass": [111, 129], "textractor": 112, "cach": 112, "ground_truth_json": 114, "predictions_json": 114, "previous_docu": 114, "simplifi": [114, 122, 152, 156], "soft_copi": 114, "evalu": [114, 122], "categories_bas": 114, "categories_extra": 114, "absolut": 114, "prev_match": 114, "json_lik": 115, "pl_page": 116, "give": [116, 152, 164], "sublist": 116, "whenev": 116, "follow": [116, 158], "input_data": [117, 118, 133, 134], "s3_bucket_pareto": 117, "callback": 117, "error_messag": 117, "messag": 117, "ve_id": 117, "equipment_result": 117, "local_pl_details_fold": 118, "either": [118, 146, 158], "local": [118, 119, 120, 126], "pl_document": 118, "price_list_detail": 118, "n_match": 118, "n_id_match": 118, "n_non_match": 118, "n_total": 118, "experiment_nam": 118, "experiment_d": 118, "summar": 118, "experi": [118, 152, 161], "previous_price_list_id": 118, "latest": 118, "page_to_seg": 118, "equival": 118, "automat": [119, 139], "load_asset": 119, "so": [119, 150, 152, 156, 158, 159, 160], "base_path": 119, "rel": [119, 161], "_s3": 120, "model_path": 120, "load_loc": 120, "save_loc": 120, "pre": 120, "train": [120, 152], "hug": 120, "face": 120, "brand_nam": 120, "fiat": 120, "dict_": 121, "categories_by_depth": 121, "model_generation_id": 121, "depth": 121, "engine_comb_hash_to_parsed_engine_comb": 121, "engine_comb_hash_to_raw_engine_comb": 121, "versions_field": 121, "start_idx": 121, "start_availability_id": 121, "model_to_combinations_map": 121, "model_data": 121, "model_to_model_data": 121, "compat": 121, "value_1": 122, "value_2": 122, "exactli": 122, "min_ratio": 122, "8": 122, "return_scor": 122, "9": 122, "true_comb": 122, "pred_comb": 122, "missing_categori": 122, "pred_index": 122, "true_index": 122, "criteria": 122, "true_json": 122, "pred_json": 122, "side": [122, 152, 160], "add_origin": 122, "secret_nam": 123, "region_nam": 123, "eu": 123, "central": 123, "secret": [123, 164], "manag": 123, "client": [123, 127, 152], "region": 123, "max_tri": 123, "delai": 123, "backoff": 123, "upon": 123, "multipli": 123, "execut": [123, 152], "pass": [123, 152], "authent": 125, "api": 125, "environ": 125, "json_bodi": 125, "stage": [125, 152, 156], "send": 125, "request": 125, "endpoint": 125, "bodi": [125, 127], "date_to": 125, "years_back": 125, "year": [125, 135], "back": [125, 152], "query_column": [125, 126], "max_created_at": [125, 126], "datetim": [125, 126, 127], "return_data": 125, "creation": [125, 126, 147, 152], "idx": [125, 140], "pricelisttupl": 126, "price_list_metadata": 126, "nested_json": 126, "query_valu": 126, "todai": 126, "local_price_list": 126, "filename_to_price_list_detail": 126, "s3url": 127, "anystr": 127, "s3kei": 127, "lambda": [127, 158], "serializer_kwarg": 127, "put_object": 127, "f": 127, "upload": [127, 152], "upload_fileobj": 127, "loader": 127, "delete_object": 127, "dictread": 127, "compress": 127, "while": [127, 150, 152, 161], "gzip": 127, "target_bucket": 127, "target_kei": 127, "target": [127, 158], "s3error": 127, "There": [127, 147, 152], "pattern": 127, "batch_size_min": 127, "starting_token": 127, "support": 127, "regex": [127, 152, 159], "desir": 127, "last": 127, "callabl": 127, "reader": 127, "larger": [127, 158], "memori": 127, "copy_object": 127, "move_object": 127, "client_method": 127, "get_object": 127, "expires_in": 127, "3600": 127, "temporari": [127, 137, 152], "expir": 127, "presign": 127, "meta": 127, "alphanumer": 128, "third": [128, 152], "25": 128, "500": 128, "remove_td": 129, "form": 129, "counterpart": 129, "td_": 129, "substr": 129, "abstractpars": 129, "status": 129, "placehold": 129, "descend": [129, 140], "ascend": 129, "reference_column": 129, "eur": 129, "priorit": 129, "extended_valu": 130, "extended_label": 130, "splitted_column_index": 130, "min_kw": 130, "kilosecond": 130, "info": 130, "voltag": 130, "category_to_column": 131, "mandatory_categori": 131, "mandatori": 131, "table_metadata": 131, "all_tabl": 131, "table_1": 131, "table_2": 131, "extended_map": 133, "current_col_to_origin": 133, "nan": 136, "point": [136, 152], "center": 136, "x1": 136, "y1": 136, "x2": 136, "y2": 136, "euclidean": 136, "x_rang": 136, "y_rang": 136, "toler": [136, 137], "category_block": 136, "top_limit": [136, 137], "category_to_textract_block": 136, "search_typ": 136, "table_metadata_block": 137, "category_to_textract_blocks_map": 137, "previous_table_metadata_block": 137, "tmp_model_generations_data": 137, "model_categori": 137, "height_toler": 137, "columns_already_ad": 138, "manual_transmission_keyword": 139, "manual": [139, 164], "smallest": 140, "table_id": 140, "versions_data": 140, "fuel_data": 140, "enhanc": 140, "input_model_nam": 140, "table_surrounding_data": 140, "surround": [140, 152, 154], "table_result": 141, "word_id": 143, "textract_fold": 144, "s3_kei": 144, "job": 144, "job_id": 144, "textract_s3_kei": 144, "_s3kei": 144, "page_w": 145, "isol": 145, "binaryio": 146, "sha256": 146, "binari": 146, "hexadecim": 146, "To": [147, 161, 165], "minim": [147, 152], "expens": [147, 152], "expect": [147, 152], "achiev": [147, 152], "adopt": [147, 152], "Its": [147, 152, 161], "complic": 147, "you": [147, 152, 159, 161], "want": [147, 152, 156, 159], "enlarg": 147, "At": [147, 152, 156], "narrow": 147, "down": [147, 152, 156, 161], "containin": 147, "might": [147, 152, 159, 161], "kind": 147, "weed": 147, "progress": [147, 152, 164], "covet": 147, "well": [147, 152, 154, 158], "road": 152, "step": [152, 154, 156, 158, 161], "yourself": 152, "tea": 152, "And": [150, 152, 160], "sandwich": 152, "section": [152, 164], "outlin": 152, "link": 152, "subsect": [150, 152, 159], "describ": [152, 159], "These": [152, 158], "through": 152, "graph": [152, 164], "connect": 152, "idea": 152, "refactor": [152, 156], "place": 152, "No": 152, "Will": 152, "stop": 152, "me": 152, "begin": [152, 158], "mundan": [152, 158], "yet": [152, 158], "fetch": [152, 158], "addition": [152, 158], "obtain": [152, 158], "supplementari": [152, 158], "re": [152, 158], "sold": [152, 158, 164], "adjust": 152, "open": [152, 161], "challeng": [150, 152, 161], "approach": [152, 154, 161], "directli": [152, 161], "ultim": [152, 161], "opt": [152, 161], "conjunct": [152, 161], "parti": 152, "howev": 152, "come": [152, 161], "cost": 152, "pinpoint": 152, "potenti": 152, "archiv": 152, "outcom": 152, "futur": 152, "possess": [152, 156], "crucial": [152, 156], "compon": [152, 156, 158], "care": [152, 156], "sever": [152, 156], "when": [152, 158], "stori": 152, "teeni": 152, "tini": 152, "problem": [152, 158], "dramat": 152, "denot": 152, "etc": [152, 158, 160, 164], "mental": 152, "discard": 152, "hope": [152, 164], "easi": 152, "solut": 152, "instead": 152, "emploi": [152, 159], "ai": 152, "algorithm": 152, "plethora": 152, "hand": [152, 160], "understand": [152, 154, 164], "what": [152, 154, 156, 160, 161, 164], "hors": [152, 154], "here": [152, 154, 164], "spit": [152, 154], "With": [152, 160], "tackl": [152, 160], "few": [152, 160, 161], "deduct": [152, 160], "bit": [152, 160], "eventu": [152, 160], "let": 152, "moment": 152, "pat": 152, "ourself": 152, "carefulli": [152, 159], "inspect": [152, 159], "seen": [152, 159], "our": [150, 152, 158, 159, 161], "t": [152, 159], "gdi": [152, 159], "120": [152, 159], "ibvm": [152, 159], "48v": [152, 159], "truli": [152, 159], "do": [152, 159], "e": [152, 159], "ie": [152, 159], "80": [152, 159], "everi": [150, 152], "own": [150, 152, 158, 161], "perk": [150, 152], "usual": [150, 152, 158], "celebr": [150, 152], "divers": [150, 152], "additon": [150, 152], "throughout": [150, 152, 158], "sprinkl": [150, 152], "meant": [150, 152], "handli": [150, 152], "don": 152, "forget": 152, "stretch": 152, "still": [152, 161], "everyth": 152, "organ": 152, "abstract": 152, "straightforward": 152, "go": [152, 161], "though": 152, "mani": 152, "discart": 152, "overload": 152, "correctli": [152, 161], "inevit": 152, "g0i": 152, "neither": 152, "turn": [152, 161], "french": 152, "imt": 152, "typo": 152, "ocr": [152, 161], "real": 152, "strength": [152, 161], "guess": 152, "honestli": 152, "hardli": 152, "surpris": 152, "click": 152, "reveal": 152, "hidden": 152, "slightli": 152, "tediou": 152, "narrat": 152, "weren": 152, "quit": 152, "eas": 152, "develop": [152, 164], "asses": 152, "known": 152, "measur": 152, "how": 152, "just": [152, 164], "dl": 152, "perfom": 152, "monitor": 152, "obbject": 152, "free": 152, "thing": 152, "crash": 152, "went": 152, "wrong": [152, 161], "mayb": 152, "restart": 152, "again": 152, "recur": 158, "275": 158, "below": [158, 161], "four": 158, "mixtur": 158, "illustr": 158, "flow": [158, 164], "project": [158, 164], "deploi": 158, "particular": [158, 164], "extern": 158, "serv": 158, "encount": 158, "behaviour": 158, "ovveridden": 158, "carri": 158, "wai": [158, 161], "supermodel": 158, "final": 158, "machin": 158, "term": 158, "activ": [161, 164], "work": [161, 164], "abandon": 164, "ye": 164, "who": 164, "continu": [161, 164], "goal": 164, "advertis": 164, "highlight": 164, "model_generations__model_generation__model__super_model__nam": 164, "i30": 164, "model_generations__model_generation__model__nam": 164, "sw": 164, "versions__version_nam": 164, "busi": 164, "engine_combinations__availability__price_gross": 164, "28800": 164, "engine_combinations__engine__nam": 164, "crdi": 164, "engine_combinations__transmission__typ": 164, "engine_combinations__drivetrain__typ": 164, "fwd": 164, "engine_combinations__engine__power_main_kw": 164, "84": 164, "58227": 164, "engine_combinations__engine__fuel_typ": 164, "diesel": 164, "engine_combinations__engine__id": 164, "1234": 164, "engine_combinations__trafsnsmission__id": 164, "42": 164, "engine_combinations__drivetrain__id": 164, "13": 164, "overview": 164, "high": 164, "descript": 164, "insight": 164, "cover": 164, "But": [156, 161, 164], "sinc": 164, "trade": 164, "kid": 164, "lazi": 164, "modul": 164, "lumach": 165, "pip": 165, "venv": 165, "def": 165, "hello_world": 165, "hello": 165, "world": 165, "intern": 161, "logic": 161, "someth": 161, "blocktyp": [156, 161], "98": [156, 161], "21517944335938": [156, 161], "pdeb5d21m67bz1": [156, 161], "boundingbox": [156, 161], "05293244868516922": [156, 161], "007043752353638411": [156, 161], "3939276337623596": [156, 161], "30503466725349426": [156, 161], "polygon": [156, 161], "44686007499694824": [156, 161], "31207841634750366": [156, 161], "434b6051": [156, 161], "ed59": [156, 161], "46a5": [156, 161], "b69f": [156, 161], "a28e6d6d860a": [156, 161], "af1d52fd": [156, 161], "0893": [156, 161], "48e8": [156, 161], "85a0": [156, 161], "064eb1317e68": [156, 161], "26": [156, 161], "visual": 161, "hei": 161, "One": 161, "inabl": 161, "upsid": 161, "foundament": 161, "weak": 161, "henc": 161, "why": 161, "would": [154, 161], "note": [156, 161], "ampl": 161, "opportun": 161, "shown": 156, "littl": 156, "hurt": 156, "ongo": 154, "good": 160, "luck": 160, "editor": 159}, "objects": {"": [[93, 0, 1, "", "ClassName"], [6, 0, 1, "", "TextractUtils"], [144, 2, 1, "init__", "__init__"], [119, 2, 1, "post_init__", "__post_init__"], [110, 2, 1, "repr__", "__repr__"], [101, 2, 1, "apply_tehnical_data_for_category", "_apply_tehnical_data_for_category"], [94, 2, 1, "assemble_string", "_assemble_string"], [74, 2, 1, "eq_type", "_eq_type"], [130, 2, 1, "extract_ks", "_extract_ks"], [130, 2, 1, "extract_kw", "_extract_kw"], [130, 2, 1, "extract_voltage", "_extract_voltage"], [12, 2, 1, "get_source_words_indices", "_get_source_words_indices"], [119, 2, 1, "prepare_path", "_prepare_path"], [30, 2, 1, "replace", "_replace"], [121, 2, 1, "", "add_availability"], [6, 2, 1, "", "add_bounding_size"], [107, 2, 1, "", "add_cells"], [138, 2, 1, "", "add_columns"], [95, 2, 1, "", "add_confidence_for_combination"], [95, 2, 1, "", "add_confidence_for_versions"], [17, 2, 1, "", "add_confidence_to_output"], [98, 2, 1, "", "add_electric_models"], [8, 2, 1, "", "add_element"], [51, 2, 1, "", "add_grand_tour_surcharge"], [55, 2, 1, "", "add_hybrid_data"], [24, 2, 1, "", "add_ids"], [117, 2, 1, "", "add_metadata"], [100, 2, 1, "", "add_model_generation_id"], [19, 2, 1, "", "add_numbers"], [67, 2, 1, "", "add_row_mapper"], [6, 2, 1, "", "add_sizes"], [134, 2, 1, "", "add_surcharge"], [29, 2, 1, "", "add_titles"], [133, 2, 1, "", "add_value_indices"], [94, 2, 1, "", "add_ve_ids"], [30, 2, 1, "", "add_versions"], [103, 1, 1, "", "adobe_pages"], [0, 2, 1, "", "adobe_to_textract"], [143, 2, 1, "", "all_lines"], [29, 2, 1, "", "analyze_lines"], [15, 2, 1, "", "any_in_row"], [10, 2, 1, "id0", "app"], [140, 2, 1, "", "apply_column_transformations"], [51, 2, 1, "", "apply_default"], [55, 2, 1, "", "apply_engine_name_corrections"], [101, 2, 1, "", "apply_mappings"], [68, 2, 1, "", "apply_middle_symetry"], [98, 2, 1, "", "apply_model_name_map"], [101, 2, 1, "", "apply_tehnical_data"], [51, 2, 1, "", "apply_to_engine_combinations"], [139, 2, 1, "", "apply_transmission_processing"], [107, 2, 1, "", "are_parallel"], [59, 2, 1, "", "are_tables_same"], [30, 2, 1, "", "assemble_version_lines"], [15, 2, 1, "", "average_transformation_confidence"], [19, 2, 1, "", "biggest_number"], [6, 2, 1, "", "block_position"], [26, 2, 1, "", "blocks_from_lines"], [91, 2, 1, "", "calculate_features"], [146, 2, 1, "", "calculate_file_hash"], [17, 2, 1, "", "calculate_no_version_confidence"], [17, 2, 1, "", "calculate_overall_confidence"], [100, 2, 1, "", "calculate_scores"], [12, 2, 1, "", "categorise_textract_words"], [4, 2, 1, "", "cell_matches"], [19, 2, 1, "", "char_index"], [19, 2, 1, "", "char_index_simple"], [98, 2, 1, "", "check_and_unify_minor_model_name_differences"], [131, 2, 1, "", "check_for_mandatory_categories"], [116, 2, 1, "", "check_for_two_price_lists"], [24, 2, 1, "", "check_if_table_already_parsed"], [28, 2, 1, "", "check_margins"], [94, 2, 1, "", "choose_categories"], [36, 2, 1, "", "classify_equipment_cell"], [29, 2, 1, "", "classify_lines"], [117, 2, 1, "", "classify_pages"], [128, 2, 1, "", "clean"], [67, 2, 1, "", "clean_table_mapper"], [120, 2, 1, "", "cleanup"], [79, 2, 1, "", "cleanup_logging"], [101, 2, 1, "", "cleanup_map"], [74, 2, 1, "", "color_equipment_long"], [74, 2, 1, "", "color_equipment_wide"], [110, 2, 1, "", "color_used_segments"], [68, 2, 1, "", "column_type"], [117, 2, 1, "", "combination_corrections"], [117, 2, 1, "", "combination_postprocessing"], [24, 2, 1, "", "combine"], [17, 2, 1, "", "combine_confidence"], [29, 2, 1, "", "combine_lines"], [90, 2, 1, "", "compare_and_add_engine"], [71, 2, 1, "", "compare_distance"], [66, 1, 1, "", "compose"], [41, 2, 1, "", "concat_header"], [17, 2, 1, "", "confidence_features"], [15, 2, 1, "", "confidence_table"], [69, 2, 1, "", "construct_merge_entry"], [66, 1, 1, "", "construct_table"], [110, 2, 1, "", "contains"], [138, 2, 1, "", "contains_gears"], [78, 2, 1, "", "contains_keyword"], [138, 2, 1, "", "contains_transmission"], [51, 2, 1, "", "convert_ks_to_kw"], [105, 2, 1, "", "convert_to_textract_format"], [127, 2, 1, "id5", "copy_to"], [4, 2, 1, "", "correct_cell"], [4, 2, 1, "", "correct_cells"], [114, 2, 1, "", "correct_field"], [98, 2, 1, "", "correct_models"], [98, 2, 1, "", "correct_models_after_match"], [114, 2, 1, "", "correct_predictions"], [5, 2, 1, "", "correct_zeros"], [136, 2, 1, "", "count_nans"], [110, 2, 1, "", "create_all_segments"], [2, 2, 1, "", "create_cell"], [73, 2, 1, "", "create_cell_annotations"], [19, 2, 1, "", "create_decuple_values"], [72, 2, 1, "", "create_eq_tables"], [31, 2, 1, "", "create_equipment"], [72, 2, 1, "", "create_excel"], [24, 2, 1, "", "create_extra_brand_versions"], [24, 2, 1, "", "create_extra_versions"], [104, 2, 1, "", "create_extracts"], [73, 2, 1, "", "create_layout_annotations"], [2, 2, 1, "", "create_line"], [74, 2, 1, "", "create_missing_equipment_df"], [37, 2, 1, "", "create_package"], [22, 2, 1, "", "create_package_item"], [2, 2, 1, "", "create_page"], [72, 2, 1, "", "create_pl_tables"], [19, 2, 1, "", "create_quintuple_values"], [110, 2, 1, "", "create_segment"], [2, 2, 1, "", "create_table"], [8, 2, 1, "", "create_table_block"], [72, 2, 1, "", "create_td_tables"], [24, 2, 1, "", "create_versions_map"], [2, 2, 1, "", "create_word"], [78, 2, 1, "", "csv_to_s3"], [98, 2, 1, "", "custom_scorer"], [116, 2, 1, "", "custom_sort_key"], [31, 2, 1, "", "dash_start"], [123, 2, 1, "", "decorator"], [12, 2, 1, "", "deduplicate_word_combinations"], [50, 2, 1, "", "default_transmission_for_electric_engines"], [127, 2, 1, "id4", "delete"], [70, 2, 1, "", "delete_no_engine_mapper"], [12, 2, 1, "", "delete_non_relevant_pages"], [127, 2, 1, "", "delete_prefix"], [109, 2, 1, "", "delimiters_count"], [116, 2, 1, "", "detect_and_split_multi_price_lists"], [43, 2, 1, "", "detect_code"], [108, 2, 1, "", "detect_language"], [44, 2, 1, "", "detect_possible_splitted_table"], [42, 2, 1, "", "detect_rows_with_repeating_state"], [45, 2, 1, "", "df_wide_to_long"], [115, 2, 1, "", "dict_to_color"], [74, 2, 1, "", "dict_to_dataframe"], [115, 2, 1, "", "dict_to_equipment"], [108, 2, 1, "", "display_bounding_boxes"], [21, 2, 1, "", "dl_cleanup"], [77, 2, 1, "", "draw_table"], [144, 2, 1, "", "empty_textract"], [140, 2, 1, "", "enhance_combination"], [22, 2, 1, "", "entry_properties"], [74, 2, 1, "", "entry_to_color"], [31, 2, 1, "", "equipment_count"], [74, 2, 1, "", "equipment_to_color"], [74, 2, 1, "", "equipment_to_symbol"], [74, 2, 1, "", "equipment_to_symbol_simple"], [136, 2, 1, "", "euclidean_distance"], [122, 2, 1, "", "evaluate_combinations"], [14, 2, 1, "", "exact_match"], [140, 2, 1, "", "extend_fuel_data"], [4, 2, 1, "", "extend_id"], [13, 2, 1, "", "extend_mapping_dict"], [140, 2, 1, "", "extend_model_data"], [81, 2, 1, "", "extend_tables"], [138, 2, 1, "", "extend_tables_and_extract_combinations"], [139, 2, 1, "", "extend_transmissions_data"], [74, 2, 1, "", "extend_values"], [19, 2, 1, "", "extended_text_to_matrix"], [7, 2, 1, "", "extract"], [128, 2, 1, "", "extract_all_numbers"], [91, 2, 1, "", "extract_bag"], [27, 2, 1, "", "extract_color_prop"], [133, 2, 1, "", "extract_data"], [24, 2, 1, "", "extract_equipment"], [40, 2, 1, "", "extract_equipment_pipeline"], [91, 2, 1, "", "extract_features"], [130, 2, 1, "", "extract_fuel"], [108, 2, 1, "", "extract_image"], [54, 2, 1, "", "extract_keyword_items"], [128, 2, 1, "", "extract_kw"], [130, 2, 1, "", "extract_kw_or_ks"], [73, 2, 1, "", "extract_labels"], [128, 2, 1, "", "extract_number"], [19, 2, 1, "", "extract_numbers"], [128, 2, 1, "", "extract_other_numbers"], [128, 2, 1, "", "extract_price"], [45, 2, 1, "", "extract_price_simplified"], [26, 2, 1, "", "extract_se"], [117, 2, 1, "", "extract_tables"], [133, 2, 1, "", "extract_td_fuel"], [133, 2, 1, "", "extract_td_transmission"], [108, 2, 1, "", "extract_text"], [78, 2, 1, "", "extract_value"], [54, 2, 1, "", "extract_versions"], [91, 2, 1, "", "feature_n_ccm"], [91, 2, 1, "", "feature_n_kw"], [91, 2, 1, "", "feature_n_other_numbers"], [91, 2, 1, "", "feature_n_prices"], [91, 2, 1, "", "feature_rims"], [54, 2, 1, "", "fill_dacia_drivetrain"], [1, 2, 1, "", "fill_dummy_blocks"], [74, 2, 1, "", "fill_overall_confidence"], [9, 2, 1, "", "fill_partial_table"], [9, 2, 1, "", "fill_table"], [9, 2, 1, "", "fill_tables"], [136, 2, 1, "", "find_block_by_range"], [136, 2, 1, "", "find_block_by_text"], [9, 2, 1, "", "find_blocks_for_text"], [136, 2, 1, "", "find_closest_block_by_euclid"], [68, 2, 1, "", "find_column"], [94, 2, 1, "", "find_drivetrain_id"], [68, 2, 1, "", "find_empty"], [68, 2, 1, "", "find_first_price"], [70, 2, 1, "", "find_gears_mapper"], [55, 2, 1, "", "find_hybrid"], [78, 2, 1, "", "find_largest_index"], [110, 2, 1, "", "find_lines"], [68, 2, 1, "", "find_missing_column"], [68, 2, 1, "", "find_number"], [110, 2, 1, "", "find_page_gaps"], [87, 2, 1, "", "find_pages"], [6, 2, 1, "", "find_pairs"], [68, 2, 1, "", "find_partial_versions"], [68, 2, 1, "", "find_previous_title"], [45, 2, 1, "", "find_price_column"], [110, 2, 1, "", "find_segment"], [110, 2, 1, "", "find_segments"], [68, 2, 1, "", "find_spliters"], [131, 2, 1, "", "find_table_above_current_one"], [73, 2, 1, "", "find_table_boundaries"], [110, 2, 1, "", "find_tables"], [68, 2, 1, "", "find_technical_spliters"], [68, 2, 1, "", "find_title"], [68, 2, 1, "", "find_to_merge"], [94, 2, 1, "", "find_transmission_id"], [30, 2, 1, "", "find_version_indices"], [68, 2, 1, "", "find_versions"], [126, 2, 1, "", "flatten"], [126, 2, 1, "", "flatten_json"], [49, 2, 1, "", "format_add_titles"], [49, 2, 1, "", "format_add_versions"], [49, 2, 1, "", "format_find_versions"], [49, 2, 1, "", "format_join_version_type_one"], [49, 2, 1, "", "format_join_version_type_two"], [55, 2, 1, "", "fuel_type_correction"], [39, 2, 1, "", "full_labeling"], [101, 2, 1, "", "fuzzy_match"], [91, 2, 1, "", "general_lemmatization"], [94, 2, 1, "", "generate_and_sort_possible_objects"], [34, 2, 1, "", "generate_cell_images"], [78, 2, 1, "", "generate_cell_labels"], [78, 2, 1, "", "generate_confidence_labels"], [78, 2, 1, "", "generate_confidence_logs_labels"], [117, 2, 1, "", "generate_extracts"], [117, 2, 1, "", "generate_general_attr"], [78, 2, 1, "", "generate_handwritten_labels"], [41, 2, 1, "", "generate_header"], [78, 2, 1, "", "generate_id_mapping_labels"], [78, 2, 1, "", "generate_mapping_labels"], [118, 2, 1, "", "generate_parser_input"], [127, 2, 1, "", "generate_presigned_url"], [141, 2, 1, "", "generate_table_csv"], [12, 2, 1, "", "generate_word_combinations"], [127, 2, 1, "id1", "get"], [137, 2, 1, "", "get_additional_data"], [126, 2, 1, "", "get_all_matching_local_price_lists"], [126, 2, 1, "", "get_all_processed_price_lists"], [129, 2, 1, "", "get_all_valid_prices"], [144, 2, 1, "", "get_and_store_blocks"], [125, 2, 1, "", "get_batteries"], [136, 2, 1, "", "get_bbox_center_point"], [12, 2, 1, "", "get_best_combination_from_duplicates"], [122, 2, 1, "", "get_best_match"], [6, 2, 1, "", "get_block_map"], [1, 2, 1, "", "get_bounding_box"], [82, 2, 1, "", "get_brand_from_file"], [82, 2, 1, "", "get_brand_from_json"], [81, 2, 1, "", "get_categories"], [13, 2, 1, "", "get_category_values_dict"], [34, 2, 1, "", "get_cell_as_image"], [34, 2, 1, "", "get_cell_image"], [36, 2, 1, "", "get_cell_state"], [143, 2, 1, "", "get_child_ids"], [5, 2, 1, "", "get_child_relationships"], [6, 2, 1, "", "get_childs"], [136, 2, 1, "", "get_closest_textract_block"], [43, 2, 1, "", "get_code_col"], [125, 2, 1, "", "get_colors"], [140, 2, 1, "", "get_column_semantics"], [68, 2, 1, "", "get_column_to_split"], [59, 2, 1, "", "get_combination_identifier"], [27, 2, 1, "", "get_cropped_image"], [102, 2, 1, "", "get_data_from_table"], [137, 2, 1, "", "get_data_from_table_surrounding"], [127, 2, 1, "id2", "get_dict_reader"], [127, 2, 1, "id3", "get_dict_streamer"], [125, 2, 1, "", "get_drivetrains"], [121, 2, 1, "", "get_engine_combination"], [125, 2, 1, "", "get_engine_combinations"], [51, 2, 1, "", "get_engine_surcharge"], [125, 2, 1, "", "get_engines"], [43, 2, 1, "", "get_eq_col"], [129, 2, 1, "", "get_eur_prices"], [127, 2, 1, "id0", "get_file_like"], [108, 2, 1, "", "get_file_like_pdf"], [86, 2, 1, "", "get_filled_columns"], [137, 2, 1, "", "get_fuel_data"], [63, 2, 1, "", "get_fuel_type_column"], [13, 2, 1, "", "get_full_mapping_dict"], [139, 2, 1, "", "get_gears"], [94, 2, 1, "", "get_id_to_count_map"], [108, 2, 1, "", "get_lang_detector"], [127, 2, 1, "", "get_last_modified"], [118, 2, 1, "", "get_latest_pl"], [13, 2, 1, "", "get_mapping_dict"], [13, 2, 1, "", "get_mappings"], [94, 2, 1, "", "get_matching_battery_id"], [1, 2, 1, "", "get_mean_bboxes"], [5, 2, 1, "", "get_merge_groups"], [105, 2, 1, "", "get_model_bboxes"], [121, 2, 1, "", "get_model_generation"], [100, 2, 1, "", "get_model_generation_id"], [125, 2, 1, "", "get_model_generations"], [137, 2, 1, "", "get_model_generations_data"], [100, 2, 1, "", "get_model_name"], [66, 1, 1, "", "get_origins"], [118, 2, 1, "", "get_parsed_eq_pages"], [94, 2, 1, "", "get_possible_engines"], [94, 2, 1, "", "get_possible_transmission_and_drivetrains"], [19, 2, 1, "", "get_prediction"], [19, 2, 1, "", "get_prediction_df"], [19, 2, 1, "", "get_prediction_list"], [19, 2, 1, "", "get_prediction_with_proba"], [82, 2, 1, "", "get_predictor_necessities"], [125, 2, 1, "", "get_preskok"], [125, 2, 1, "", "get_preskok_auth_token"], [40, 2, 1, "", "get_price_col_and_extract_prices"], [125, 2, 1, "", "get_price_list_details"], [125, 2, 1, "", "get_price_lists"], [126, 2, 1, "", "get_price_lists_by_filename"], [126, 2, 1, "", "get_price_lists_local"], [141, 2, 1, "", "get_rows_columns_map"], [143, 2, 1, "", "get_rows_to_blocks_map"], [118, 2, 1, "", "get_s3_url_from_price_list_details"], [123, 2, 1, "", "get_secret"], [33, 2, 1, "", "get_serial_eq_pages"], [51, 2, 1, "", "get_surcharge_for_text"], [141, 2, 1, "", "get_table_csv_results"], [107, 2, 1, "", "get_tables_to_merge"], [102, 2, 1, "", "get_tehnical_data"], [141, 2, 1, "", "get_text"], [143, 2, 1, "", "get_textract_blocks_map"], [144, 2, 1, "", "get_textract_result"], [43, 2, 1, "", "get_title_row"], [137, 2, 1, "", "get_top_limit"], [139, 2, 1, "", "get_transmission_type"], [125, 2, 1, "", "get_transmissions"], [63, 2, 1, "", "get_unique_transmissions"], [110, 2, 1, "", "get_used"], [125, 2, 1, "", "get_versions"], [137, 2, 1, "", "get_versions_data"], [30, 2, 1, "", "get_word_at_index"], [6, 2, 1, "", "get_word_to_cell"], [6, 2, 1, "", "get_word_to_line"], [135, 2, 1, "", "get_year"], [116, 2, 1, "", "group_page_numbers"], [42, 2, 1, "", "handle_merge_package_item_rows"], [117, 2, 1, "", "handle_multiple_price_lists"], [31, 2, 1, "", "has_version"], [127, 2, 1, "", "head"], [19, 2, 1, "", "hot_encode"], [114, 2, 1, "", "identify_missing_categories"], [12, 2, 1, "", "ignore_model_classification"], [19, 2, 1, "", "index_to_label"], [40, 2, 1, "", "infer_state_and_transform_to_VE"], [133, 2, 1, "", "infer_version_from_table_header"], [117, 2, 1, "", "initialize"], [79, 2, 1, "", "initialize_logging"], [123, 2, 1, "", "inner"], [66, 1, 1, "", "insert"], [129, 2, 1, "", "insert_cell_status"], [17, 2, 1, "", "insert_confidence"], [17, 2, 1, "", "insert_confidence_combinations"], [17, 2, 1, "", "insert_confidence_no_version"], [67, 2, 1, "", "insert_split_lines_mapper"], [88, 2, 1, "", "insert_status"], [8, 2, 1, "", "insert_table"], [30, 2, 1, "", "insert_versions"], [6, 2, 1, "", "interval_overlap_length"], [136, 2, 1, "", "is_bbox_within_range"], [31, 2, 1, "", "is_big"], [59, 2, 1, "", "is_bigger"], [31, 2, 1, "", "is_close"], [31, 2, 1, "", "is_colored"], [110, 2, 1, "", "is_contained_in"], [9, 2, 1, "", "is_end_block"], [109, 2, 1, "", "is_equipment"], [31, 2, 1, "", "is_far"], [5, 2, 1, "", "is_increasing"], [128, 2, 1, "", "is_it_price"], [9, 2, 1, "", "is_line_with_text"], [45, 2, 1, "", "is_main_pl"], [59, 2, 1, "", "is_new_price_better"], [31, 2, 1, "", "is_offset"], [91, 2, 1, "", "is_price_in_text"], [22, 2, 1, "", "is_serial"], [31, 2, 1, "", "is_special"], [95, 2, 1, "", "is_special_version_name"], [9, 2, 1, "", "is_start_block"], [131, 2, 1, "", "is_table_above_and_same_width"], [45, 2, 1, "", "is_table_content_reasonable"], [45, 2, 1, "", "is_tabular_equipment"], [115, 2, 1, "", "is_textract_valid"], [30, 2, 1, "", "is_upgrade"], [138, 2, 1, "", "is_valid"], [30, 2, 1, "", "is_version_part_of_word"], [6, 2, 1, "", "is_vertical"], [145, 2, 1, "", "isolate_page"], [4, 2, 1, "", "join_adobe_textract"], [96, 2, 1, "", "join_duplicate_engine_combinations"], [28, 2, 1, "", "join_empty_columns"], [4, 2, 1, "", "join_lines"], [96, 2, 1, "", "join_on_price"], [54, 2, 1, "", "join_rows"], [54, 2, 1, "", "join_rows_on_merged_cells"], [107, 2, 1, "", "join_table_blocks"], [4, 2, 1, "", "join_tables_lines"], [4, 2, 1, "", "join_text"], [28, 2, 1, "", "join_version_blocks"], [28, 2, 1, "", "join_version_type_one"], [28, 2, 1, "", "join_version_type_two"], [67, 2, 1, "", "join_versions_mapper"], [117, 2, 1, "", "json_postprocessing"], [138, 2, 1, "", "label_and_transform_table"], [34, 2, 1, "", "label_cells"], [39, 2, 1, "", "label_price_cells"], [40, 2, 1, "", "label_table"], [54, 2, 1, "", "label_transformation"], [78, 2, 1, "", "labels_from_values"], [78, 2, 1, "", "labels_from_values_extended"], [78, 2, 1, "", "labels_from_values_new"], [60, 2, 1, "", "largest_number"], [60, 2, 1, "", "largest_number_new"], [90, 2, 1, "", "largest_number_textract"], [117, 2, 1, "", "limit_combinations"], [94, 2, 1, "", "limit_engines_by_battery"], [117, 2, 1, "", "limit_model_data"], [27, 2, 1, "", "line_color_prop"], [6, 2, 1, "", "lines_by_page"], [94, 2, 1, "", "list_to_string"], [120, 2, 1, "", "load_brand_country_specific"], [120, 2, 1, "", "load_dl_model"], [94, 2, 1, "", "load_from_preskok"], [120, 2, 1, "", "load_huggingface_model"], [120, 2, 1, "", "load_line_resources"], [119, 2, 1, "", "load_models"], [120, 2, 1, "", "load_old_classifier"], [120, 2, 1, "", "load_page_classifiers"], [120, 2, 1, "", "load_yolo_fiat_model"], [94, 2, 1, "", "log_matching"], [79, 2, 1, "", "log_results"], [47, 2, 1, "", "log_to_pickle"], [31, 2, 1, "", "looks_big"], [127, 2, 1, "id7", "ls"], [127, 2, 1, "", "make_S3URL"], [77, 2, 1, "", "make_all_equipment_dataframe"], [77, 2, 1, "", "make_base_table"], [129, 2, 1, "", "make_cell_status"], [133, 2, 1, "", "make_column_index"], [28, 2, 1, "", "make_columns"], [78, 2, 1, "", "make_csvs"], [66, 2, 1, "", "make_df_numeric"], [77, 2, 1, "", "make_eq_legend"], [32, 2, 1, "", "make_equipment"], [74, 2, 1, "", "make_equipment_dataframe"], [74, 2, 1, "", "make_equipment_table_dataframe"], [77, 2, 1, "", "make_extended_table"], [77, 2, 1, "", "make_extended_td_table"], [67, 2, 1, "", "make_extra_columns"], [77, 2, 1, "", "make_generic_table"], [66, 2, 1, "", "make_identity_mapper"], [77, 2, 1, "", "make_is_copy_table"], [85, 2, 1, "", "make_labels"], [77, 2, 1, "", "make_legend"], [77, 2, 1, "", "make_legend_eq_diff"], [0, 2, 1, "", "make_line_reference"], [67, 2, 1, "", "make_long_mapper"], [77, 2, 1, "", "make_match_table"], [98, 2, 1, "", "make_new_models_from_versions"], [77, 2, 1, "", "make_not_matched_tables"], [76, 2, 1, "", "make_numeric"], [77, 2, 1, "", "make_original_table"], [62, 1, 1, "", "make_prompt"], [77, 2, 1, "", "make_serial_table"], [77, 2, 1, "", "make_serial_table_wide"], [77, 2, 1, "", "make_standard_table"], [77, 2, 1, "", "make_standard_td_table"], [15, 2, 1, "", "make_status"], [85, 2, 1, "", "make_tables"], [31, 2, 1, "", "make_template_availability"], [67, 2, 1, "", "make_title_mapper"], [67, 2, 1, "", "make_top_left_model_mapper"], [67, 2, 1, "", "make_transposed_mapper"], [69, 2, 1, "", "make_unmerge_default_mapper"], [69, 2, 1, "", "make_unmerge_symmetry_mapper"], [69, 2, 1, "", "make_unmerge_uniqueness_mapper"], [67, 2, 1, "", "make_unsplit_mapper"], [101, 2, 1, "", "map_to_default"], [114, 2, 1, "", "match"], [4, 2, 1, "", "match_by_location"], [122, 2, 1, "", "match_combination"], [122, 2, 1, "", "match_combinations"], [94, 2, 1, "", "match_drivetrain"], [74, 2, 1, "", "match_equipment"], [94, 2, 1, "", "match_transmission"], [46, 2, 1, "", "match_versions_to_ids_V3"], [143, 2, 1, "", "match_words_to_lines"], [14, 2, 1, "", "matching_words_ratio"], [5, 2, 1, "", "max_bbox"], [66, 1, 1, "", "max_dimension"], [1, 2, 1, "", "mean_horizontal_bbox"], [1, 2, 1, "", "mean_vertical_bbox"], [5, 2, 1, "", "merge_cells"], [45, 2, 1, "", "merge_columns_with_empty_header"], [5, 2, 1, "", "merge_geometry"], [70, 2, 1, "", "merge_header_mapper"], [145, 2, 1, "", "merge_pages"], [5, 2, 1, "", "merge_relationship"], [5, 2, 1, "", "merge_rows"], [5, 2, 1, "", "merge_rows_based_on_adobe"], [107, 2, 1, "", "merge_table_pair"], [107, 2, 1, "", "merge_tables"], [139, 2, 1, "", "merge_transmission_columns"], [5, 2, 1, "", "merge_two_rows"], [24, 2, 1, "", "merge_versions"], [74, 2, 1, "", "missed_entries"], [67, 2, 1, "", "mixed_column_mapper"], [55, 2, 1, "", "model_corrections"], [99, 2, 1, "", "model_generation_matching"], [100, 2, 1, "", "model_generation_matching_v2"], [68, 2, 1, "", "most_frequent"], [30, 2, 1, "", "move_match"], [127, 2, 1, "id6", "move_to"], [81, 2, 1, "", "multi_values_dataframe"], [81, 2, 1, "", "multi_values_splitter"], [12, 2, 1, "", "n_classified_tokens_on_page"], [54, 2, 1, "", "neo_patentati_exception"], [128, 2, 1, "", "normalize"], [129, 2, 1, "", "normalize_labels"], [129, 2, 1, "", "normalize_names"], [128, 2, 1, "", "normalize_version"], [94, 2, 1, "", "numeric_scorer"], [94, 2, 1, "", "object_to_string"], [60, 2, 1, "", "only_short_versions"], [74, 2, 1, "", "ordered_dict_to_dataframe"], [6, 2, 1, "", "overlap_percentage"], [31, 2, 1, "", "page"], [117, 2, 1, "", "parse"], [37, 2, 1, "", "parse_according_to_type"], [127, 2, 1, "", "parse_bucket"], [24, 2, 1, "", "parse_equipment_tables"], [127, 2, 1, "", "parse_filename"], [127, 2, 1, "", "parse_key"], [54, 2, 1, "", "parse_kw_for_electric_cars"], [24, 2, 1, "", "parse_optional"], [134, 2, 1, "", "parse_pl_tables"], [45, 2, 1, "", "parse_prices"], [24, 2, 1, "", "parse_serial"], [73, 2, 1, "", "parse_table"], [134, 2, 1, "", "parse_td_tables"], [127, 2, 1, "", "parse_url"], [117, 2, 1, "", "parse_with_call_back"], [122, 2, 1, "", "partial_match"], [122, 2, 1, "", "perfect_match"], [53, 2, 1, "", "post_cell_split_transform"], [101, 2, 1, "", "post_process_combination"], [98, 2, 1, "", "post_process_parsed_content"], [101, 2, 1, "", "postprocess_version"], [21, 1, 1, "id1", "predict"], [34, 2, 1, "", "predict_batch"], [12, 2, 1, "", "predict_categories"], [82, 2, 1, "", "predict_categories_dl"], [12, 2, 1, "", "predict_category"], [109, 2, 1, "", "predict_lines"], [21, 1, 1, "", "predict_lst"], [21, 1, 1, "", "predict_merge"], [21, 1, 1, "", "predict_proba"], [21, 1, 1, "", "predict_splitted"], [109, 2, 1, "", "predict_table"], [21, 1, 1, "", "predict_transposed"], [21, 1, 1, "", "predict_wide"], [14, 2, 1, "", "prediction_function"], [19, 2, 1, "", "prepare_df_layout"], [88, 2, 1, "", "prepare_tables"], [85, 2, 1, "", "prepare_tables_full"], [19, 2, 1, "", "prepare_text"], [129, 2, 1, "", "preprocess_data"], [117, 2, 1, "", "preprocess_pdf"], [44, 2, 1, "", "preprocess_splitted_tables"], [40, 2, 1, "", "preprocess_table"], [122, 2, 1, "", "price_match"], [122, 2, 1, "", "print_match_side_by_side"], [129, 2, 1, "", "prioritize_numerical_columns"], [135, 2, 1, "", "process_model_brand"], [89, 2, 1, "", "process_multiline_cell"], [90, 2, 1, "", "process_price_line"], [12, 2, 1, "", "process_price_list"], [89, 2, 1, "", "process_row"], [89, 2, 1, "", "process_row_values"], [89, 2, 1, "", "process_table"], [8, 2, 1, "", "process_table_element"], [67, 2, 1, "", "propagate_title"], [67, 2, 1, "", "propagate_version_mapper"], [67, 2, 1, "", "propagate_versions_mapper_v2"], [127, 2, 1, "", "put"], [127, 2, 1, "", "put_file_like"], [76, 2, 1, "", "read_cell"], [76, 2, 1, "", "read_errors"], [140, 2, 1, "", "read_rows"], [76, 2, 1, "", "read_td_stats"], [67, 2, 1, "", "reconstruct_table_mapper"], [41, 2, 1, "", "recycle_header"], [15, 2, 1, "", "remap_status"], [42, 2, 1, "", "remove_blanks"], [30, 2, 1, "", "remove_different_versions"], [30, 2, 1, "", "remove_displaced_model"], [6, 2, 1, "", "remove_duplicate_lines"], [67, 2, 1, "", "remove_duplicates_mapper"], [45, 2, 1, "", "remove_empty_rows"], [67, 2, 1, "", "remove_extra_version_mapper"], [30, 2, 1, "", "remove_lone_model"], [28, 2, 1, "", "remove_lone_versions"], [42, 2, 1, "", "remove_long_strings"], [5, 2, 1, "", "remove_merged_block_references"], [55, 2, 1, "", "remove_prefix"], [30, 2, 1, "", "remove_recursive_versions"], [30, 2, 1, "", "remove_small_bottom_margin_versions"], [67, 2, 1, "", "remove_split_version_mapper"], [70, 2, 1, "", "remove_superscript"], [30, 2, 1, "", "remove_too_big_versions"], [54, 2, 1, "", "repair_price"], [54, 2, 1, "", "repair_prices"], [54, 2, 1, "", "repair_versions"], [30, 2, 1, "", "replace_hyphen"], [4, 2, 1, "", "replace_lines"], [6, 2, 1, "", "replace_words_in_line"], [118, 2, 1, "", "restructure_model_generations"], [53, 2, 1, "", "restructure_versions"], [62, 1, 1, "", "retrieve_s3_answer"], [123, 2, 1, "", "retry"], [31, 2, 1, "", "return_extended"], [63, 2, 1, "", "return_unique"], [140, 2, 1, "", "row_to_dict"], [40, 2, 1, "", "run"], [62, 2, 1, "", "run_query"], [144, 2, 1, "", "run_textract"], [62, 1, 1, "", "save_s3_answer"], [94, 2, 1, "", "scorer"], [43, 2, 1, "", "search_for_code_header"], [91, 2, 1, "", "search_for_strings"], [70, 2, 1, "", "select_best_columns_mapper"], [140, 2, 1, "", "select_model_data"], [58, 2, 1, "", "select_settings"], [109, 2, 1, "", "sentence_to_mapping"], [109, 2, 1, "", "sentence_to_vector"], [109, 2, 1, "", "separate_packages"], [15, 2, 1, "", "set_in_column"], [15, 2, 1, "", "set_in_row"], [15, 2, 1, "", "set_in_table"], [58, 2, 1, "", "set_settings"], [112, 2, 1, "", "set_up_extractor"], [110, 2, 1, "", "set_used"], [73, 2, 1, "", "shuffle_table"], [122, 2, 1, "", "simplify_preskok_json_format"], [59, 2, 1, "", "solve_duplicates_by_price"], [59, 2, 1, "", "solve_duplicates_using_models"], [129, 2, 1, "", "sort_and_update"], [28, 2, 1, "", "sort_column"], [140, 2, 1, "", "sort_numerical_columns"], [116, 2, 1, "", "sort_pages"], [55, 2, 1, "", "special_version_name_corrections"], [81, 2, 1, "", "split_checker"], [81, 2, 1, "", "split_column"], [116, 2, 1, "", "split_list"], [81, 2, 1, "", "split_multi_values"], [91, 2, 1, "", "split_page_in_half"], [44, 2, 1, "", "split_table"], [81, 2, 1, "", "split_title_row"], [86, 2, 1, "", "stack_filled"], [86, 2, 1, "", "stack_tables"], [86, 2, 1, "", "stack_tables_all"], [31, 2, 1, "", "starts_lower"], [31, 2, 1, "", "starts_upper"], [127, 2, 1, "", "store_csv"], [94, 2, 1, "", "string_scorer"], [67, 2, 1, "", "strip_field"], [30, 2, 1, "", "strip_non_letters"], [118, 2, 1, "", "summarize_results"], [86, 2, 1, "", "table_metrics"], [43, 2, 1, "", "table_semantics"], [6, 2, 1, "", "tables_by_page"], [102, 2, 1, "", "tehnical_data_content_for_id_reduction"], [47, 2, 1, "", "test_pickle_content"], [110, 2, 1, "", "text_list"], [19, 2, 1, "", "text_to_index"], [19, 2, 1, "", "text_to_index10"], [19, 2, 1, "", "text_to_matrix"], [111, 2, 1, "", "textract_eq_pages"], [112, 2, 1, "", "textract_page"], [112, 2, 1, "", "textract_pages"], [111, 2, 1, "", "textract_pl_pages"], [111, 2, 1, "", "textract_td_pages"], [81, 2, 1, "", "title_splitter"], [32, 2, 1, "", "to_block_format"], [125, 2, 1, "", "transform_generations"], [140, 2, 1, "", "transform_kw_ks_column"], [121, 2, 1, "", "transform_to_json"], [57, 2, 1, "", "transform_values"], [54, 2, 1, "", "transform_values_labels"], [15, 2, 1, "", "transformation_confidence"], [41, 2, 1, "", "try_previous_header"], [98, 2, 1, "", "unify_version_names"], [59, 2, 1, "", "unite_combinations"], [103, 2, 1, "", "unpack_stream"], [5, 2, 1, "", "update_block_map"], [26, 2, 1, "", "update_blocks"], [100, 2, 1, "", "update_combo_with_model_generation_id"], [137, 2, 1, "", "update_model_generations_data"], [59, 2, 1, "", "update_model_name"], [98, 2, 1, "", "update_models_for_dacia_stepway"], [94, 2, 1, "", "update_object_id"], [107, 2, 1, "", "update_right_table_cells"], [131, 2, 1, "", "use_above_table_header"], [129, 2, 1, "", "use_original_values"], [47, 2, 1, "", "validate"], [16, 2, 1, "", "validate_transformation"], [57, 2, 1, "", "value_transformation"], [117, 2, 1, "", "ve_id_matching"], [54, 2, 1, "", "versions_to_one_cell"], [136, 2, 1, "", "vertical_distance"], [19, 2, 1, "", "word_to_index"], [47, 2, 1, "", "wrapper"], [77, 2, 1, "", "write_eq_statistics"], [72, 2, 1, "", "write_excel"], [117, 2, 1, "", "write_logs"], [77, 2, 1, "", "write_missing"], [121, 2, 1, "", "write_nested"], [77, 2, 1, "", "write_td_stats"], [73, 2, 1, "", "xls_for_annotation"]], "ClassName": [[93, 1, 1, "", "get_pages"], [93, 1, 1, "", "predict"]], "TextractUtils": [[6, 1, 1, "", "__init__"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "function", "Python function"]}, "titleterms": {"adobe_to_textract": 0, "adobe_to_textract_util": 1, "create_block": 2, "adobe_extract": 3, "join_adobe_textract": 4, "join_adobe_textract_structur": 5, "join_adobe_textract_util": 6, "main": [7, 12, 24, 26, 36, 40, 81, 85, 93, 98, 133, 144], "table_track": 8, "transform_t": 9, "app": 10, "classifi": [11, 147, 152, 159], "mapping_dict": 13, "metric": 14, "cell_statu": 15, "check_transform": 16, "combined_confid": 17, "confid": 18, "dl_tool": 19, "dl_classif": 20, "table_classifi": 21, "data_model": 22, "equipment_pars": 23, "serial": 25, "process_color": 27, "process_layout": 28, "process_lin": 29, "process_vers": 30, "serial_data_model": 31, "util": [32, 45, 60, 82, 90, 140, 146], "share": [33, 124], "cell_classif": 34, "cell_stat": 35, "eq_packag": 37, "tabl": [38, 152, 160, 164], "label": [39, 150, 152, 154], "recycle_head": 41, "row_filt": 42, "table_semant": 43, "table_split": 44, "versions_match": 46, "gener": 47, "test": 48, "output_formatt": 49, "after_match": 50, "before_match": 51, "expert_rul": 52, "post_cell_split_transform": 53, "post_labeling_transform": 54, "post_table_pars": 55, "pre_labeling_td_transform": 56, "pre_labeling_transform": 57, "select_set": 58, "unite_combin": 59, "gpt_process": 61, "querier": 62, "td_gpt_data": 63, "function": 64, "document": [64, 164], "layout_transform": 65, "index_mapp": 66, "layout_mapp": 67, "layout_pars": 68, "merge_mapp": 69, "technical_mapp": 70, "local_test": 71, "create_excel": 72, "excel_extractor": 73, "excel_util": 74, "make_log": 75, "read_excel": 76, "table_mak": 77, "write_label": 78, "write_log": 79, "multi_values_split": 80, "page_select": 83, "pl_finder": 84, "merge_t": [86, 107], "price_page_find": 87, "price_table_find": 88, "table_prepar": 89, "featur": [91, 147], "td_finder": 92, "add_ve_id": 94, "add_confid": 95, "duplic": 96, "parser_postprocess": [97, 101], "model_generation_match": 99, "model_generation_matching_v2": 100, "reduce_possible_id": 102, "adobe_pars": 103, "assemble_extract": 104, "improve_textract": 105, "preprocess_pdf": 106, "pdf_parser": 108, "segment_classifi": 109, "segment_divid": 110, "textract_gener": 111, "textract_pars": 112, "price_list_pars": 113, "match": 114, "model": [115, 147, 154], "multiple_price_lists_util": 116, "parser_v2": 117, "parser_v2_util": 118, "set": [119, 152, 158], "storag": [120, 158], "transform_to_json": 121, "eval": 122, "helper": [123, 131, 141], "preskok_api": 125, "price_list": 126, "s3": 127, "string_util": 128, "abstract_pars": 129, "column_extract": 130, "table_parser_v2": 132, "parse_t": 134, "pl_summari": 135, "search_textract_block": 136, "table_surround": 137, "td_parser": 138, "transmiss": 139, "textract": [142, 161], "line_split": 143, "textract_divid": 145, "page": [147, 152], "knn": 147, "rf": 147, "fallback": 147, "code": [147, 154, 156, 159, 160, 161], "flow": [147, 154, 156, 159, 160, 161], "output": [147, 156, 159, 160], "clean": [148, 152], "up": [148, 152, 158], "collect": [149, 152], "combin": [149, 152], "appli": [150, 151, 152], "expert": [150, 151, 152], "rule": [150, 151, 152, 156], "round": [151, 152], "2": [151, 152], "overview": 152, "inform": 152, "structur": [152, 161, 164], "pdf": [152, 158, 161], "creat": [152, 156], "panda": [152, 156], "datafram": [152, 156], "process": 152, "transform": [152, 153, 160], "standard": [152, 160], "form": [152, 160], "split": [150, 152, 159, 160], "add": [152, 163], "vehicl": [152, 158, 163], "editor": [152, 158, 163], "id": [152, 163], "wrap": 152, "correct": [152, 153], "format": [152, 153], "valid": [152, 162], "result": [152, 161, 162], "log": [152, 155], "web": 158, "servic": 158, "amazon": 158, "kei": 158, "object": 158, "being": 158, "summari": 158, "price": 164, "list": 164, "parser": 164, "": 164, "indic": 164, "usag": 165, "instal": 165, "hash": 161, "adob": 161, "sampl": [156, 159, 160], "input": [156, 159, 160], "cell": 156, "merg": [156, 160], "The": 154, "deep": 154, "learn": 154, "dataset": 154, "train": 154, "exampl": 154, "represent": 154, "mapper": 160, "idea": 160, "encyclopedia": 160, "wide": 160, "old": 159, "mechan": 159, "extract": 159, "kw": 159, "k": 159, "fuel": 159, "adjust": 159, "befor": 150, "after": 150}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"adobe_to_textract": [[0, "adobe-to-textract"]], "adobe_to_textract_utility": [[1, "adobe-to-textract-utility"]], "create_blocks": [[2, "create-blocks"]], "adobe_extract": [[3, "adobe-extract"]], "join_adobe_textract": [[4, "join-adobe-textract"]], "join_adobe_textract_structure": [[5, "join-adobe-textract-structure"]], "join_adobe_textract_utility": [[6, "join-adobe-textract-utility"]], "main": [[7, "main"], [12, "main"], [24, "main"], [26, "main"], [36, "main"], [40, "main"], [81, "main"], [85, "main"], [93, "main"], [98, "main"], [133, "main"], [144, "main"]], "table_tracking": [[8, "table-tracking"]], "transform_tables": [[9, "transform-tables"]], "app": [[10, "app"]], "classifier": [[11, "classifier"]], "mapping_dict": [[13, "mapping-dict"]], "metrics": [[14, "metrics"]], "cell_status": [[15, "cell-status"]], "check_transformation": [[16, "check-transformation"]], "combined_confidence": [[17, "combined-confidence"]], "confidence": [[18, "confidence"]], "dl_tools": [[19, "dl-tools"]], "dl_classification": [[20, "dl-classification"]], "table_classifier": [[21, "table-classifier"]], "data_models": [[22, "data-models"]], "equipment_parser": [[23, "equipment-parser"]], "serial": [[25, "serial"]], "process_color": [[27, "process-color"]], "process_layout": [[28, "process-layout"]], "process_line": [[29, "process-line"]], "process_version": [[30, "process-version"]], "serial_data_models": [[31, "serial-data-models"]], "utils": [[32, "utils"], [45, "utils"], [60, "utils"], [82, "utils"], [90, "utils"], [140, "utils"], [146, "utils"]], "shared": [[33, "shared"], [124, "shared"]], "cell_classification": [[34, "cell-classification"]], "cell_state": [[35, "cell-state"]], "eq_packages": [[37, "eq-packages"]], "table": [[38, "table"]], "labeling": [[39, "labeling"]], "recycle_headers": [[41, "recycle-headers"]], "row_filtering": [[42, "row-filtering"]], "table_semantics": [[43, "table-semantics"]], "table_split": [[44, "table-split"]], "versions_matching": [[46, "versions-matching"]], "generator": [[47, "generator"]], "testing": [[48, "testing"]], "output_formatters": [[49, "output-formatters"]], "after_matching": [[50, "after-matching"]], "before_matching": [[51, "before-matching"]], "expert_rules": [[52, "expert-rules"]], "post_cell_split_transform": [[53, "post-cell-split-transform"]], "post_labeling_transform": [[54, "post-labeling-transform"]], "post_table_parser": [[55, "post-table-parser"]], "pre_labeling_td_transform": [[56, "pre-labeling-td-transform"]], "pre_labeling_transform": [[57, "pre-labeling-transform"]], "select_settings": [[58, "select-settings"]], "unite_combinations": [[59, "unite-combinations"]], "gpt_processing": [[61, "gpt-processing"]], "querier": [[62, "querier"]], "td_gpt_data": [[63, "td-gpt-data"]], "Function documentation": [[64, "function-documentation"]], "layout_transforming": [[65, "layout-transforming"]], "index_mapper": [[66, "index-mapper"]], "layout_mapper": [[67, "layout-mapper"]], "layout_parser": [[68, "layout-parser"]], "merge_mappers": [[69, "merge-mappers"]], "technical_mappers": [[70, "technical-mappers"]], "local_test": [[71, "local-test"]], "create_excels": [[72, "create-excels"]], "excel_extractor": [[73, "excel-extractor"]], "excel_utility": [[74, "excel-utility"]], "make_logs": [[75, "make-logs"]], "read_excel": [[76, "read-excel"]], "table_makers": [[77, "table-makers"]], "write_labels": [[78, "write-labels"]], "write_logs": [[79, "write-logs"]], "multi_values_split": [[80, "multi-values-split"]], "page_selection": [[83, "page-selection"]], "pl_finder": [[84, "pl-finder"]], "merge_tables": [[86, "merge-tables"], [107, "merge-tables"]], "price_page_finder": [[87, "price-page-finder"]], "price_table_finder": [[88, "price-table-finder"]], "table_preparator": [[89, "table-preparator"]], "features": [[91, "features"]], "td_finder": [[92, "td-finder"]], "add_VE_id": [[94, "add-ve-id"]], "add_confidence": [[95, "add-confidence"]], "duplicates": [[96, "duplicates"]], "parser_postprocessing": [[97, "parser-postprocessing"], [101, "parser-postprocessing"]], "model_generation_matching": [[99, "model-generation-matching"]], "model_generation_matching_v2": [[100, "model-generation-matching-v2"]], "reduce_possible_ids": [[102, "reduce-possible-ids"]], "adobe_parser": [[103, "adobe-parser"]], "assemble_extracts": [[104, "assemble-extracts"]], "improve_textract": [[105, "improve-textract"]], "preprocess_pdf": [[106, "preprocess-pdf"]], "pdf_parser": [[108, "pdf-parser"]], "segment_classifiers": [[109, "segment-classifiers"]], "segment_divider": [[110, "segment-divider"]], "textract_generator": [[111, "textract-generator"]], "textract_parser": [[112, "textract-parser"]], "price_list_parser": [[113, "price-list-parser"]], "matching": [[114, "matching"]], "models": [[115, "models"]], "multiple_price_lists_utils": [[116, "multiple-price-lists-utils"]], "parser_v2": [[117, "parser-v2"]], "parser_v2_utils": [[118, "parser-v2-utils"]], "settings": [[119, "settings"]], "storage": [[120, "storage"]], "transform_to_json": [[121, "transform-to-json"]], "eval": [[122, "eval"]], "helpers": [[123, "helpers"], [131, "helpers"], [141, "helpers"]], "preskok_api": [[125, "preskok-api"]], "price_lists": [[126, "price-lists"]], "s3": [[127, "s3"]], "string_utils": [[128, "string-utils"]], "abstract_parser": [[129, "abstract-parser"]], "column_extraction": [[130, "column-extraction"]], "table_parser_v2": [[132, "table-parser-v2"]], "parse_tables": [[134, "parse-tables"]], "pl_summary": [[135, "pl-summary"]], "search_textract_blocks": [[136, "search-textract-blocks"]], "table_surrounding": [[137, "table-surrounding"]], "td_parser": [[138, "td-parser"]], "transmission": [[139, "transmission"]], "textract": [[142, "textract"]], "line_splitting": [[143, "line-splitting"]], "textract_divider": [[145, "textract-divider"]], "Classifying pages": [[147, "classifying-pages"], [152, "classifying-pages"]], "Features": [[147, "features"]], "Model (kNN +RF)": [[147, "model-knn-rf"]], "Fallbacks": [[147, "fallbacks"]], "Code flow": [[147, "code-flow"], [161, "code-flow"], [161, "id1"], [160, "code-flow"], [159, "code-flow"]], "Code output": [[147, "code-output"]], "Clean-up": [[148, "clean-up"], [152, "clean-up"]], "Collect combinations": [[149, "collect-combinations"], [152, "collect-combinations"]], "Applying expert rules, round #2": [[151, "applying-expert-rules-round-2"], [152, "applying-expert-rules-round-2"]], "Overview": [[152, "overview"]], "Collecting information": [[152, "collecting-information"]], "Setting up": [[152, "setting-up"], [158, "setting-up"]], "Structuring PDF": [[152, "structuring-pdf"]], "Creating Pandas Dataframes": [[152, "creating-pandas-dataframes"], [156, "creating-pandas-dataframes"]], "Processing table": [[152, "processing-table"]], "Labeling": [[152, "labeling"], [154, "labeling"]], "Transforming to the standard table form": [[152, "transforming-to-the-standard-table-form"], [160, "transforming-to-the-standard-table-form"]], "Splitting": [[152, "splitting"], [159, "splitting"]], "Applying expert rules": [[152, "applying-expert-rules"], [150, "applying-expert-rules"]], "Create combinations": [[152, "create-combinations"]], "Add Vehicle Editor IDs": [[152, "add-vehicle-editor-ids"], [163, "add-vehicle-editor-ids"]], "Wrap up": [[152, "wrap-up"]], "Transforming to the correct format": [[152, "transforming-to-the-correct-format"], [153, "transforming-to-the-correct-format"]], "Validating the results": [[152, "validating-the-results"], [162, "validating-the-results"]], "Logging": [[152, "logging"], [155, "logging"]], "PDF": [[158, "pdf"]], "Web services": [[158, "web-services"]], "Amazon Web Services": [[158, "amazon-web-services"]], "Vehicle Editor": [[158, "vehicle-editor"]], "Key object being set up": [[158, "key-object-being-set-up"]], "Settings": [[158, "settings"]], "Summary": [[158, "summary"]], "Storage": [[158, "storage"]], "Price list parser\u2019s documentation!": [[164, "price-list-parser-s-documentation"]], "Structure": [[164, "structure"]], "Indices and tables": [[164, "indices-and-tables"]], "Usage": [[165, "usage"]], "Installation": [[165, "installation"]], "Sample input": [[156, "sample-input"], [160, "sample-input"], [159, "sample-input"]], "Sample output": [[156, "sample-output"], [160, "sample-output"], [159, "sample-output"]], "Rules for cell merging": [[156, "rules-for-cell-merging"]], "Code Flow": [[156, "code-flow"], [154, "code-flow"]], "Structuring the PDF": [[161, "structuring-the-pdf"]], "Textract": [[161, "textract"]], "Hashing results": [[161, "hashing-results"]], "Adobe": [[161, "adobe"]], "The deep-learning model": [[154, "the-deep-learning-model"]], "Dataset": [[154, "dataset"]], "Training example representation": [[154, "training-example-representation"]], "Mapper idea": [[160, "mapper-idea"]], "Mapper encyclopedia": [[160, "mapper-encyclopedia"]], "Merge mapper": [[160, "merge-mapper"]], "Split mapper": [[160, "split-mapper"]], "Wide mapper": [[160, "wide-mapper"]], "Old classifier": [[159, "old-classifier"]], "Splitting mechanic": [[159, "splitting-mechanic"]], "Extracting kw and ks": [[159, "extracting-kw-and-ks"]], "Fuel adjustments": [[159, "fuel-adjustments"]], "Before labeling": [[150, "before-labeling"]], "After labeling": [[150, "after-labeling"]], "After splitting": [[150, "after-splitting"]]}, "indexentries": {}})