From 8e1b82f389c069ab877f961d450d4849e9ea9111 Mon Sep 17 00:00:00 2001 From: scanossmining Date: Wed, 31 Jan 2024 02:01:29 +0000 Subject: [PATCH] Solve bug of best match seleccion using a sbom list. Improve logs to detect memory errors during snippet scanning. Add scan type 'failed. Solve minor bugs --- inc/scanoss.h | 4 +- src/binary_scan.c | 4 + src/main.c | 1 - src/match.c | 14 +- src/match_list.c | 4 +- src/report.c | 4 +- src/scan.c | 15 +- src/snippets.c | 340 ++++++++++++++++++++++++---------------------- 8 files changed, 206 insertions(+), 180 deletions(-) diff --git a/inc/scanoss.h b/inc/scanoss.h index 86552f1..b67513b 100644 --- a/inc/scanoss.h +++ b/inc/scanoss.h @@ -44,7 +44,7 @@ #define WFP_REC_LN 18 /* Log files */ -#define SCANOSS_VERSION "5.3.5" +#define SCANOSS_VERSION "5.3.6" #define SCAN_LOG "/tmp/scanoss_scan.log" #define MAP_DUMP "/tmp/scanoss_map.dump" #define SLOW_QUERY_LOG "/tmp/scanoss_slow_query.log" @@ -86,7 +86,7 @@ extern const char *vulnerability_sources[]; extern const char *quality_sources[]; extern const char *dependency_sources[]; -typedef enum {MATCH_NONE, MATCH_FILE, MATCH_SNIPPET, MATCH_BINARY} match_t; +typedef enum {MATCH_NONE, MATCH_FILE, MATCH_SNIPPET, MATCH_BINARY, MATCH_FAILED} match_t; typedef struct keywords { diff --git a/src/binary_scan.c b/src/binary_scan.c index a8c2ce7..82dad4b 100644 --- a/src/binary_scan.c +++ b/src/binary_scan.c @@ -162,6 +162,8 @@ static bool get_all_file_ids(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8 static void fhash_process(char * hash, component_list_t * comp_list) { struct ldb_table oss_fhash = {.db = "oss", .table = "fhashes", .key_ln = 16, .rec_ln = 0, .ts_ln = 2, .tmp = false}; + if (!ldb_table_exists(oss_fhash.db, oss_fhash.table)) // skip purl if the table is not present + return; uint8_t fhash[16]; ldb_hex_to_bin(hash, 32, fhash); /* Get all file IDs for given wfp */ @@ -304,7 +306,9 @@ int binary_scan(char * input) break; component_list_destroy(result.components); free(result.file); + result.file = NULL; free(result.md5); + result.md5 = NULL; sensibility++; }; diff --git a/src/main.c b/src/main.c index fd2f498..1c4e029 100644 --- a/src/main.c +++ b/src/main.c @@ -512,7 +512,6 @@ int main(int argc, char **argv) /* Scan file directly */ else { - scanlog("Scanning file %s\n", target); scan_data_t * scan = scan_data_init(target, scan_max_snippets, scan_max_components); ldb_scan(scan); } diff --git a/src/match.c b/src/match.c index ee63e80..339496f 100644 --- a/src/match.c +++ b/src/match.c @@ -48,7 +48,7 @@ #include "dependency.h" #include "ignorelist.h" -const char *matchtypes[] = {"none", "file", "snippet", "binary"}; /** describe the availables kinds of match */ +const char *matchtypes[] = {"none", "file", "snippet", "binary", "failed"}; /** describe the availables kinds of match */ bool match_extensions = false; /** global match extension flag */ char *component_hint = NULL; @@ -107,12 +107,13 @@ static int hint_eval(component_data_t *a, component_data_t *b) /*Check for component hint in purl, select components matching with the hint */ if (a->purls[0] && strstr(a->purls[0], component_hint) && !(b->purls[0] && strstr(b->purls[0], component_hint))) { - scanlog("Reject component %s by hint: %s\n", b->purls[0], component_hint); + scanlog("Reject component %s by purl hint: %s\n", b->purls[0], component_hint); return -1; } if (b->purls[0] && strstr(b->purls[0], component_hint) && !(a->purls[0] && strstr(a->purls[0], component_hint))) { - scanlog("Accept component %s by hint: %s\n", b->purls[0], component_hint); + scanlog("Accept component %s by purl hint: %s\n", b->purls[0], component_hint); + b->identified = 1; return 1; } @@ -125,6 +126,7 @@ static int hint_eval(component_data_t *a, component_data_t *b) if (b->component && strstr(b->component, component_hint) && !(a->component && strstr(a->purls[0], component_hint))) { scanlog("Accept component %s by hint: %s\n", b->component, component_hint); + b->identified = 1; return 1; } @@ -365,7 +367,8 @@ bool load_matches(match_data_t *match) if (!item->entries.le_next || !item->entries.le_next->component) break; /* if the date of two components it's the same */ - if(!strcmp(item->component->release_date, item->entries.le_next->component->release_date)) + if(!strcmp(item->component->release_date, item->entries.le_next->component->release_date) && + item->component->identified <= item->entries.le_next->component->identified) { /* If item has no dependencies or depencencies are empty I must check the next one */ if(!item->component->dependency_text || strlen(item->component->dependency_text) < 4) @@ -376,6 +379,7 @@ bool load_matches(match_data_t *match) /*if the next component has dependencies, permute */ else if (print_dependencies(item->entries.le_next->component)) { + scanlog("Best match replaced due to dependencies"); struct comp_entry *aux = item->entries.le_next->entries.le_next; LIST_INSERT_HEAD(&match->component_list.headp, item->entries.le_next, entries); item->entries.le_next = aux; @@ -580,7 +584,7 @@ void match_select_best(scan_data_t *scan) if (!scan->best_match || !scan->best_match->component_list.items || ((engine_flags & DISABLE_REPORT_IDENTIFIED) && scan->best_match->component_list.headp.lh_first->component->identified)) { scan->match_type = MATCH_NONE; - scanlog("Match without components or declared in sbom"); + scanlog("Match without components or declared in sbom\n"); } } diff --git a/src/match_list.c b/src/match_list.c index e39f52d..ad92348 100644 --- a/src/match_list.c +++ b/src/match_list.c @@ -80,13 +80,13 @@ bool component_list_add(component_list_t *list, component_data_t *new_comp, bool if (!list->headp.lh_first) { - scanlog("first component in list\n"); struct comp_entry *nn = calloc(1, sizeof(struct comp_entry)); /* Insert at the head. */ LIST_INSERT_HEAD(&list->headp, nn, entries); nn->component = new_comp; list->items++; list->last_element = nn; list->last_element_aux = NULL; + scanlog("first component in list: %s\n", list->last_element->component->purls[0]); return true; } else if (val) @@ -117,7 +117,7 @@ bool component_list_add(component_list_t *list, component_data_t *new_comp, bool } struct comp_entry *nn = calloc(1, sizeof(struct comp_entry)); /* Insert after. */ - nn->component = new_comp; + nn->component = new_comp; LIST_INSERT_BEFORE(np, nn, entries); if (!np->entries.le_next) diff --git a/src/report.c b/src/report.c index 8d724b4..c0cf547 100644 --- a/src/report.c +++ b/src/report.c @@ -161,13 +161,13 @@ void print_server_stats(scan_data_t *scan) * @brief Return a match=none result * @param scan scan data pointer */ -void print_json_nomatch() +void print_json_nomatch(match_t m) { if (quiet) return; if (engine_flags & DISABLE_BEST_MATCH) printf("{"); - printf("\"id\": \"none\""); + printf("\"id\":\"%s\"",matchtypes[m]); //print_server_stats(scan); if (engine_flags & DISABLE_BEST_MATCH) printf("}"); diff --git a/src/scan.c b/src/scan.c index 0a4ac23..d9f5da9 100644 --- a/src/scan.c +++ b/src/scan.c @@ -55,15 +55,14 @@ char *ignored_assets = NULL; */ scan_data_t * scan_data_init(char *target, int max_snippets, int max_components) { - scanlog("Scan Init\n"); scan_data_t * scan = calloc(1, sizeof(*scan)); scan->file_path = strdup(target); scan->file_size = malloc(32); scan->hashes = malloc(MAX_FILE_SIZE); scan->lines = malloc(MAX_FILE_SIZE); scan->match_type = MATCH_NONE; - scan->max_components_to_process = max_components; + scanlog("Scan Init - path: %s\n", scan->file_path); scan->max_snippets_to_process = max_snippets > MAX_MULTIPLE_COMPONENTS ? MAX_MULTIPLE_COMPONENTS : max_snippets; scan->max_snippets_to_process = scan->max_snippets_to_process == 0 ? 1 : scan->max_snippets_to_process; @@ -281,7 +280,6 @@ int wfp_scan(char * path, int scan_max_snippets, int scan_max_components) extract_csv(scan->file_size, (char *)rec, 1, LDB_MAX_REC_LN); scan->preload = true; free(rec); - scanlog("File md5 to be scanned: %s\n", hexmd5); ldb_hex_to_bin(hexmd5, MD5_LEN * 2, scan->md5); free(hexmd5); } @@ -358,7 +356,7 @@ void output_matches_json(scan_data_t *scan) match_list_t *best_list = match_select_m_component_best(scan); scanlog("<<>>\n", best_list->items); if(!match_list_print(best_list, print_json_match, ",")) - print_json_nomatch(); + print_json_nomatch(scan->match_type); match_list_destroy(best_list); } @@ -375,15 +373,15 @@ void output_matches_json(scan_data_t *scan) } if (first) { - print_json_nomatch(); + print_json_nomatch(MATCH_NONE); } scan->printed_succed = !first; } /* prinf no match if the scan was evaluated as none */ // TODO must be unified with the "else" clause - else if (scan->match_type == MATCH_NONE) + else if (scan->match_type == MATCH_NONE || scan->match_type == MATCH_FAILED) { printf("\"%s\": [{", scan->file_path); - print_json_nomatch(); + print_json_nomatch(scan->match_type); } else if (scan->best_match && scan->best_match->component_list.items) { @@ -393,7 +391,7 @@ void output_matches_json(scan_data_t *scan) else { printf("\"%s\": [{", scan->file_path); - print_json_nomatch(); + print_json_nomatch(scan->match_type); } json_close_file(scan); @@ -462,6 +460,7 @@ void ldb_scan(scan_data_t * scan) char *tmp_md5_hex = md5_hex(scan->md5); strcpy(scan->source_md5, tmp_md5_hex); free(tmp_md5_hex); + scanlog("File MD5: %s\n", scan->source_md5); /* Look for full file match or url match in ldb */ scan->match_type = ldb_scan_file(scan); diff --git a/src/snippets.c b/src/snippets.c index ecbf7ec..3d64700 100644 --- a/src/snippets.c +++ b/src/snippets.c @@ -119,7 +119,7 @@ void biggest_snippet(scan_data_t *scan) memcpy(match_new->file_md5, scan->matchmap[j].md5, MD5_LEN); match_new->hits = scan->matchmap[j].hits; match_new->matchmap_reg = scan->matchmap[j].md5; - match_new->type = scan->match_type; + match_new->type = MATCH_SNIPPET; match_new->from = scan->matchmap[j].range->from; strcpy(match_new->source_md5, scan->source_md5); match_new->scan_ower = scan; @@ -190,6 +190,13 @@ void biggest_snippet(scan_data_t *scan) static bool get_all_file_ids(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr) { uint8_t *record = (uint8_t *)ptr; + // Manage memory issue or problem quering the WFP table. + if (data == NULL && datalen > 0) + { + scanlog("Error quering WFP table. datalen=%u but data is NULL\n", datalen); + uint32_write(record,0); + return true; + } if (datalen) { @@ -721,16 +728,16 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5 match_t ldb_scan_snippets(scan_data_t *scan) { - + match_t result = MATCH_NONE; scanlog("ldb_scan_snippets\n"); if (!scan->hash_count) { scanlog("No hashes return NONE\n"); - return MATCH_NONE; + return result; } if (engine_flags & DISABLE_SNIPPET_MATCHING) - return MATCH_NONE; + return result; if (trace_on) scanlog("Checking snippets. Traced (-qi) matches marked with *\n"); @@ -747,6 +754,14 @@ match_t ldb_scan_snippets(scan_data_t *scan) { /* Get all file IDs for given wfp */ map[i].md5_set = malloc(WFP_REC_LN * MATCHMAP_ITEM_SIZE); + //Exit if there is not RAM memory available + if (!map[i].md5_set) + { + scanlog("Error: memory allocation failed, aborting scan\n"); + result = MATCH_FAILED; + break; + } + wfp_invert(scan->hashes[i], map[i].wfp); //scanlog(" Add wfp %02x%02x%02x%02x to map\n",map[i].wfp[0], map[i].wfp[1],map[i].wfp[2],map[i].wfp[3]); uint32_write(map[i].md5_set, 0); @@ -757,210 +772,215 @@ match_t ldb_scan_snippets(scan_data_t *scan) map_max_size = map[i].size; } + //Manage unexpected condition, some WFP should have a hit. + if (map_max_size <= 0) + { + scanlog("Warning no WFP with hits, returning failed\n"); + result = MATCH_FAILED; + } - /* Classify the WFPs in cathegories depending on popularity - Each cathegoy will contain a sub set of index refered to map rows*/ - #define MAP_INDIRECTION_CAT_NUMBER 1000 - #define MAP_INDIRECTION_CAT_SIZE (map_max_size / (MAP_INDIRECTION_CAT_NUMBER-1)) - int map_indedirection_items_size = scan->hash_count / (MAP_INDIRECTION_CAT_NUMBER / 10) < 10 ? - scan->hash_count : - scan->hash_count / (MAP_INDIRECTION_CAT_NUMBER / 10); - - int map_indirection[MAP_INDIRECTION_CAT_NUMBER][map_indedirection_items_size]; //define the cathegories - int map_indirection_index[MAP_INDIRECTION_CAT_NUMBER]; //index for each cathegory - - memset(map_indirection, 0, sizeof(map_indirection)); - memset(map_indirection_index, 0, sizeof(map_indirection_index)); + if (map_max_size > 0 && result != MATCH_FAILED) + { + /* Classify the WFPs in cathegories depending on popularity + Each cathegoy will contain a sub set of index refered to map rows*/ + #define MAP_INDIRECTION_CAT_NUMBER 1000 + #define MAP_INDIRECTION_CAT_SIZE (map_max_size / (MAP_INDIRECTION_CAT_NUMBER-1)) + int map_indedirection_items_size = scan->hash_count / (MAP_INDIRECTION_CAT_NUMBER / 10) < 10 ? + scan->hash_count : + scan->hash_count / (MAP_INDIRECTION_CAT_NUMBER / 10); + + int map_indirection[MAP_INDIRECTION_CAT_NUMBER][map_indedirection_items_size]; //define the cathegories + int map_indirection_index[MAP_INDIRECTION_CAT_NUMBER]; //index for each cathegory + + memset(map_indirection, 0, sizeof(map_indirection)); + memset(map_indirection_index, 0, sizeof(map_indirection_index)); - scanlog ("< Snippet scan setup: Min hits: %d, Min lines: %d, Map max size = %d, Cat N = %d, Cat size = %d >\n", - min_match_hits, min_match_lines, map_max_size, MAP_INDIRECTION_CAT_NUMBER, MAP_INDIRECTION_CAT_SIZE); + scanlog ("< Snippet scan setup: Min hits: %d, Min lines: %d, Map max size = %d, Cat N = %d, Cat size = %d >\n", + min_match_hits, min_match_lines, map_max_size, MAP_INDIRECTION_CAT_NUMBER, MAP_INDIRECTION_CAT_SIZE); - for (int i =0; i < scan->hash_count; i++) - { - int cat = map[i].size / (MAP_INDIRECTION_CAT_SIZE+1); - - if (map_indirection_index[cat] >= map_indedirection_items_size) + for (int i =0; i < scan->hash_count; i++) { - scanlog("Cat %d is full, skiping...\n", cat); - continue; - } + int cat = map[i].size / (MAP_INDIRECTION_CAT_SIZE+1); + + if (map_indirection_index[cat] >= map_indedirection_items_size) + { + scanlog("Cat %d is full, skiping...\n", cat); + continue; + } - map_indirection[cat][map_indirection_index[cat]] = i; - map_indirection_index[cat]++; - } + map_indirection[cat][map_indirection_index[cat]] = i; + map_indirection_index[cat]++; + } - /* Calculate a limit to the quantity of cathegories to be processed, - the cathegoies with less quantity of MD5s (less popular) will be prioritased*/ - int cat_limit = 0; - int cat_limit_index=0; - int hashes_to_process = 0; - for (int i = 0; i < MAP_INDIRECTION_CAT_NUMBER; i++) - { - bool exit = false; - for (int j=0; j < map_indirection_index[i]; j++) + /* Calculate a limit to the quantity of cathegories to be processed, + the cathegoies with less quantity of MD5s (less popular) will be prioritased*/ + int cat_limit = 0; + int cat_limit_index=0; + int hashes_to_process = 0; + for (int i = 0; i < MAP_INDIRECTION_CAT_NUMBER; i++) { - if (map[map_indirection[i][j]].size <= 0) - continue; - hashes_to_process++; - cat_limit += map[map_indirection[i][j]].size; - if (cat_limit > matchmap_max_files) + bool exit = false; + for (int j=0; j < map_indirection_index[i]; j++) { - if (hashes_to_process < scan->hash_count / 10 && cat_limit < MAX_MATCHMAP_FILES) - { - matchmap_max_files += map[map_indirection[i][j]].size; - } - else + if (map[map_indirection[i][j]].size <= 0) + continue; + hashes_to_process++; + cat_limit += map[map_indirection[i][j]].size; + if (cat_limit > matchmap_max_files) { - cat_limit_index = i; - exit = true; - break; + if (hashes_to_process < scan->hash_count / 10 && cat_limit < MAX_MATCHMAP_FILES) + { + matchmap_max_files += map[map_indirection[i][j]].size; + } + else + { + cat_limit_index = i; + exit = true; + break; + } } } + if (exit) + break; + else + cat_limit_index = i; } - if (exit) - break; - else - cat_limit_index = i; - } - if (debug_on) - { - scanlog("Cathegories result:\n"); - for (int i = 0; i < MAP_INDIRECTION_CAT_NUMBER; i++) + if (debug_on) { - for (int j=0; j < map_indirection_index[i]; j++) + scanlog("Cathegories result:\n"); + for (int i = 0; i < MAP_INDIRECTION_CAT_NUMBER; i++) { - uint8_t * wfp = map[map_indirection[i][j]].wfp; - scanlog("Cat :%d.%d - line %d - %02x%02x%02x%02x - size %d\n",i,j, - map[map_indirection[i][j]].line, wfp[0], wfp[1],wfp[2],wfp[3], map[map_indirection[i][j]].size); + for (int j=0; j < map_indirection_index[i]; j++) + { + uint8_t * wfp = map[map_indirection[i][j]].wfp; + scanlog("Cat :%d.%d - line %d - %02x%02x%02x%02x - size %d\n",i,j, + map[map_indirection[i][j]].line, wfp[0], wfp[1],wfp[2],wfp[3], map[map_indirection[i][j]].size); + } } } - } - matchmap_max_files = cat_limit; - scanlog("Map limit on %d MD5s at %d of %d. Selected hashed: %d/%d - cat_limit_files = %d\n",matchmap_max_files, cat_limit_index, MAP_INDIRECTION_CAT_NUMBER, hashes_to_process, scan->hash_count, cat_limit); - scan->matchmap = calloc(matchmap_max_files, sizeof(matchmap_entry)); + if (matchmap_max_files < cat_limit) + matchmap_max_files = cat_limit; + scanlog("Map limit on %d MD5s at %d of %d. Selected hashed: %d/%d - cat_limit_files = %d\n",matchmap_max_files, cat_limit_index, MAP_INDIRECTION_CAT_NUMBER, hashes_to_process, scan->hash_count, cat_limit); + scan->matchmap = calloc(matchmap_max_files, sizeof(matchmap_entry)); - int map_indexes[scan->hash_count]; - memset(map_indexes, 0, sizeof(map_indexes)); + int map_indexes[scan->hash_count]; + memset(map_indexes, 0, sizeof(map_indexes)); - /*Add MD5s to the matchmap, sorting by sector. First add the MD5s starting with 00, then with 01 and so on*/ - int last_sector_aux = 0; - for (int sector = 0; sector < 256; sector++) - { - scan->matchmap_rank_by_sector[sector] = -1; - int sector_max = min_match_hits; - for (int cat = 0; cat < cat_limit_index; cat++) + /*Add MD5s to the matchmap, sorting by sector. First add the MD5s starting with 00, then with 01 and so on*/ + int last_sector_aux = 0; + for (int sector = 0; sector < 256; sector++) { - /* travel the cathegories map*/ - for (int item_in_cat = 0; item_in_cat < map_indirection_index[cat]; item_in_cat++) + scan->matchmap_rank_by_sector[sector] = -1; + int sector_max = min_match_hits; + for (int cat = 0; cat < cat_limit_index; cat++) { - int i = map_indirection[cat][item_in_cat]; - uint8_t *md5s = map[i].md5_set + 4; - /* Add each item to the matchmap*/ - for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++) + /* travel the cathegories map*/ + for (int item_in_cat = 0; item_in_cat < map_indirection_index[cat]; item_in_cat++) { - int wfp_p = wfp_index * WFP_REC_LN; - /*Stop when a new sector appers*/ - if (md5s[wfp_p] != sector) + int i = map_indirection[cat][item_in_cat]; + uint8_t *md5s = map[i].md5_set + 4; + /* Add each item to the matchmap*/ + for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++) { - map_indexes[i] = wfp_index; - break; + int wfp_p = wfp_index * WFP_REC_LN; + /*Stop when a new sector appers*/ + if (md5s[wfp_p] != sector) + { + map_indexes[i] = wfp_index; + break; + } + + add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], last_sector_aux, §or_max, &scan->matchmap_rank_by_sector[sector]); } - - add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], last_sector_aux, §or_max, &scan->matchmap_rank_by_sector[sector]); - } - } + } + } + /*start to look from the last added md5*/ + last_sector_aux = scan->matchmap_size - 1; } - /*start to look from the last added md5*/ - last_sector_aux = scan->matchmap_size - 1; - } - - /* Check if we have at least one possible match*/ - bool at_least_one_possible_match = false; - for (int sector = 0; sector < 256; sector++) - { - if (scan->matchmap_rank_by_sector[sector] > -1) + + /* Check if we have at least one possible match*/ + bool at_least_one_possible_match = false; + for (int sector = 0; sector < 256; sector++) { - if (scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits > 0) + if (scan->matchmap_rank_by_sector[sector] > -1) { - at_least_one_possible_match = true; + if (scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits > 0) + { + at_least_one_possible_match = true; + } } } - } - if (debug_on) - { - scanlog("First Stage - Max hits by sector\n"); - for (int sector = 0; sector < 256; sector++) + if (debug_on) { - if (scan->matchmap_rank_by_sector[sector] >= 0) - scanlog("Sector %02x, Max at %d with %d\n", sector, scan->matchmap_rank_by_sector[sector], scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits); + scanlog("First Stage - Max hits by sector\n"); + for (int sector = 0; sector < 256; sector++) + { + if (scan->matchmap_rank_by_sector[sector] >= 0) + scanlog("Sector %02x, Max at %d with %d\n", sector, scan->matchmap_rank_by_sector[sector], scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits); + } } - } - if (!at_least_one_possible_match) - { - scanlog("No sector with hits, no match\n"); - } - /* Second state scan, using the rest of the availbles MD5s from the map*/ - else - { - int md5_proceced = 0; - scanlog("-- Second Stage: Looking on the rest of the cathegories -- \n"); - for (int cat = cat_limit_index; cat < MAP_INDIRECTION_CAT_NUMBER ; cat++) + if (!at_least_one_possible_match) + { + scanlog("No sector with hits, no match\n"); + result = MATCH_NONE; + } + /* Second state scan, using the rest of the availbles MD5s from the map*/ + else { - /* travel the cathegories map*/ - for (int item_in_cat = 0; item_in_cat < map_indirection_index[cat]; item_in_cat++) + int md5_proceced = 0; + scanlog("-- Second Stage: Looking on the rest of the cathegories -- \n"); + for (int cat = cat_limit_index; cat < MAP_INDIRECTION_CAT_NUMBER ; cat++) { - int i = map_indirection[cat][item_in_cat]; - uint8_t *md5s = map[i].md5_set + 4; - /* Add each item to the matchmap*/ - for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++) + /* travel the cathegories map*/ + for (int item_in_cat = 0; item_in_cat < map_indirection_index[cat]; item_in_cat++) { - int wfp_p = wfp_index * WFP_REC_LN; - int sector = md5s[wfp_p]; - int sector_max = min_match_hits; - - if (scan->matchmap_rank_by_sector[sector] < 0) - continue; - else - sector_max = scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits; - - if (md5cmp(&md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5)) - { - add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], 0, §or_max, &scan->matchmap_rank_by_sector[sector]); - md5_proceced++; + int i = map_indirection[cat][item_in_cat]; + uint8_t *md5s = map[i].md5_set + 4; + /* Add each item to the matchmap*/ + for (int wfp_index = map_indexes[i]; wfp_index < map[i].size; wfp_index++) + { + int wfp_p = wfp_index * WFP_REC_LN; + int sector = md5s[wfp_p]; + int sector_max = min_match_hits; + + if (scan->matchmap_rank_by_sector[sector] < 0) + continue; + else + sector_max = scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits; + + if (md5cmp(&md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5)) + { + add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], 0, §or_max, &scan->matchmap_rank_by_sector[sector]); + md5_proceced++; + } } } + //limit the quantity of iterations to prevent performance issues. + if (md5_proceced > DEFAULT_MATCHMAP_FILES) + break; } - //limit the quantity of iterations to prevent performance issues. - if (md5_proceced > DEFAULT_MATCHMAP_FILES) - break; + result = MATCH_SNIPPET; } - } - //for debuging - if (debug_on) - { - scanlog("Max hits by sector\n"); - for (int sector = 0; sector < 256; sector++) + //for debuging + if (debug_on) { - if (scan->matchmap_rank_by_sector[sector] >= 0) - scanlog("Sector %02x, Max at %d with %d\n", sector, scan->matchmap_rank_by_sector[sector], scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits); + scanlog("Max hits by sector\n"); + for (int sector = 0; sector < 256; sector++) + { + if (scan->matchmap_rank_by_sector[sector] >= 0) + scanlog("Sector %02x, Max at %d with %d\n", sector, scan->matchmap_rank_by_sector[sector], scan->matchmap[scan->matchmap_rank_by_sector[sector]].hits); + } } } - //Free memory for (int i = 0; i < scan->hash_count; i++) { free(map[i].md5_set); } - if (scan->matchmap_size) - return MATCH_SNIPPET; - - scanlog("Snippet scan has no matches\n"); - return MATCH_NONE; - - + return result; }