Skip to content

Commit

Permalink
Make matchmap size dynamic depending of available information (#61)
Browse files Browse the repository at this point in the history
* make matchmap size dynamic depending of available information. 
* Improve error message if shared lib load fails
  • Loading branch information
mscasso-scanoss authored Dec 15, 2023
1 parent 90ab681 commit b7c721c
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 22 deletions.
3 changes: 2 additions & 1 deletion inc/limits.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
#define MAX_JSON_VALUE_LEN 4096

/* Snippets */
#define MAX_MATCHMAP_FILES 10000 // Max number of files evaluated in snippet matching
#define DEFAULT_MATCHMAP_FILES 10000 // Default number of files evaluated in snippet matching
#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 3) // Max number of files evaluated in snippet matching to prevent performance issues
#define SKIP_SNIPPETS_IF_FILE_BIGGER (1024 * 1024 * 4)
#define SKIP_SNIPPETS_IF_STARTS_WITH (const char*[3]) {"{", "<?xml", "<html"}
#define MAX_SNIPPETS_SCANNED 2500
Expand Down
2 changes: 1 addition & 1 deletion inc/scanoss.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#define WFP_REC_LN 18

/* Log files */
#define SCANOSS_VERSION "5.3.3"
#define SCANOSS_VERSION "5.3.4"
#define SCAN_LOG "/tmp/scanoss_scan.log"
#define MAP_DUMP "/tmp/scanoss_map.dump"
#define SLOW_QUERY_LOG "/tmp/scanoss_slow_query.log"
Expand Down
2 changes: 1 addition & 1 deletion src/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,6 @@ Alternatively, these value can be written in %s\n\
+-------+-------------------------------------------------------+\n\
Example: scanoss -F 12 DIRECTORY (scans DIRECTORY disabling license and dependency data)\n\
\n\
Copyright (C) 2018-2022 SCANOSS.COM\n", MAX_MATCHMAP_FILES, API_URL, ENGINE_FLAGS_FILE);
Copyright (C) 2018-2022 SCANOSS.COM\n", DEFAULT_MATCHMAP_FILES, API_URL, ENGINE_FLAGS_FILE);

}
7 changes: 6 additions & 1 deletion src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,12 @@ bool lib_encoder_load()
/*set decode funtion pointer to NULL*/
lib_encoder_handle = dlopen("libscanoss_encoder.so", RTLD_NOW);
char * err;
if (lib_encoder_handle)
if ((err = dlerror()))
{
scanlog("Lib scanoss-enocder was not detected. %s\n", err);
}

if (lib_encoder_handle)
{
scanlog("Lib scanoss-enocder present\n");
decrypt_data = dlsym(lib_encoder_handle, "scanoss_decode_table");
Expand Down
7 changes: 3 additions & 4 deletions src/report.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,13 @@ bool print_json_component(component_data_t * component)
printf("\"vendor\": \"%s\",", component->vendor);
printf("\"component\": \"%s\",", component->component);

char * version_clean = NULL;
version_clean = version_cleanup(component->version, component->component);
char * version_clean = version_cleanup(component->version, component->component);
printf("\"version\": \"%s\",", version_clean ? version_clean : "");
free(version_clean);

version_clean = version_cleanup(component->latest_version, component->component);
char * lastest_clean = version_cleanup(component->latest_version, component->component);
printf("\"latest\": \"%s\",", version_clean ? version_clean : "");
free(version_clean);
free(lastest_clean);

printf("\"url\": \"%s\",", component->main_url ? component->main_url : component->url);

Expand Down
43 changes: 29 additions & 14 deletions src/snippets.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
#include "match_list.h"
#include "stdlib.h"
int map_rec_len;
int matchmap_max_files = MAX_MATCHMAP_FILES;
int matchmap_max_files = DEFAULT_MATCHMAP_FILES;

/**
* @brief If the extension of the matched file does not match the extension of the scanned file
Expand Down Expand Up @@ -186,7 +186,7 @@ void biggest_snippet(scan_data_t *scan)
* @param ptr //TODO
* @return //TODO
*/
#define MATCHMAP_ITEM_SIZE (matchmap_max_files)
#define MATCHMAP_ITEM_SIZE (matchmap_max_files * 2)
static bool get_all_file_ids(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *data, uint32_t datalen, int iteration, void *ptr)
{
uint8_t *record = (uint8_t *)ptr;
Expand Down Expand Up @@ -575,7 +575,7 @@ static void matchmap_setup(scan_data_t * scan)
if (matchmap_env)
{
int matchmap_max_files_aux = atoi(matchmap_env);
if (matchmap_max_files_aux > MAX_MATCHMAP_FILES / 4 && matchmap_max_files_aux < MAX_MATCHMAP_FILES * 20)
if (matchmap_max_files_aux > DEFAULT_MATCHMAP_FILES / 4 && matchmap_max_files_aux < DEFAULT_MATCHMAP_FILES * 20)
{
scanlog("matchmap size changed by env variable to: %d\n", matchmap_max_files_aux);
matchmap_max_files = matchmap_max_files_aux;
Expand All @@ -589,7 +589,6 @@ static void matchmap_setup(scan_data_t * scan)
matchmap_max_files *=5;
scanlog("matchmap size changed by high accuracy analisys to: %d\n", matchmap_max_files);
}
scan->matchmap = calloc(matchmap_max_files, sizeof(matchmap_entry));
}

typedef struct matchmap_entry_t
Expand Down Expand Up @@ -794,26 +793,36 @@ match_t ldb_scan_snippets(scan_data_t *scan)
the cathegoies with less quantity of MD5s (less popular) will be prioritased*/
int cat_limit = 0;
int cat_limit_index=0;

int hashes_to_process = 0;
for (int i = 0; i < MAP_INDIRECTION_CAT_NUMBER; i++)
{
bool exit = false;
for (int j=0; j < map_indirection_index[i]; j++)
{
if (map[map_indirection[i][j]].size <= 0)
continue;
hashes_to_process++;
cat_limit += map[map_indirection[i][j]].size;
if (cat_limit > matchmap_max_files)
{
cat_limit_index = i;
exit = true;
break;
if (hashes_to_process < scan->hash_count / 10 && cat_limit < MAX_MATCHMAP_FILES)
{
matchmap_max_files += map[map_indirection[i][j]].size;
}
else
{
cat_limit_index = i;
exit = true;
break;
}
}
}
if (exit)
break;
else
cat_limit_index = i+1;
cat_limit_index = i;
}

if (debug_on)
{
scanlog("Cathegories result:\n");
Expand All @@ -822,13 +831,14 @@ match_t ldb_scan_snippets(scan_data_t *scan)
for (int j=0; j < map_indirection_index[i]; j++)
{
uint8_t * wfp = map[map_indirection[i][j]].wfp;
scanlog("Cat :%d - item %d line %d - %02x%02x%02x%02x - size %d\n",i,j,
scanlog("Cat :%d.%d - line %d - %02x%02x%02x%02x - size %d\n",i,j,
map[map_indirection[i][j]].line, wfp[0], wfp[1],wfp[2],wfp[3], map[map_indirection[i][j]].size);
}
}
}

scanlog("Map limit on %d MD5s at %d of %d\n",cat_limit, cat_limit_index, MAP_INDIRECTION_CAT_NUMBER);
matchmap_max_files = cat_limit;
scanlog("Map limit on %d MD5s at %d of %d. Selected hashed: %d/%d - cat_limit_files = %d\n",matchmap_max_files, cat_limit_index, MAP_INDIRECTION_CAT_NUMBER, hashes_to_process, scan->hash_count, cat_limit);
scan->matchmap = calloc(matchmap_max_files, sizeof(matchmap_entry));

int map_indexes[scan->hash_count];
memset(map_indexes, 0, sizeof(map_indexes));
Expand Down Expand Up @@ -895,6 +905,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
/* Second state scan, using the rest of the availbles MD5s from the map*/
else
{
int md5_proceced = 0;
scanlog("-- Second Stage: Looking on the rest of the cathegories -- \n");
for (int cat = cat_limit_index; cat < MAP_INDIRECTION_CAT_NUMBER ; cat++)
{
Expand All @@ -918,9 +929,13 @@ match_t ldb_scan_snippets(scan_data_t *scan)
if (md5cmp(&md5s[wfp_p], scan->matchmap[scan->matchmap_rank_by_sector[sector]].md5))
{
add_file_to_matchmap(scan, &map[i], &md5s[wfp_p], 0, &sector_max, &scan->matchmap_rank_by_sector[sector]);
md5_proceced++;
}
}
}
}
//limit the quantity of iterations to prevent performance issues.
if (md5_proceced > DEFAULT_MATCHMAP_FILES)
break;
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/url.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ bool handle_purl_record(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *d
}

char * c = strchr(purl, '/');
if (!c)
return false;

char purl_type[MAX_FIELD_LN] = "\0";
strncpy(purl_type, purl, c - purl);
uint32_t CRC = string_crc32c(purl_type);
Expand Down

0 comments on commit b7c721c

Please sign in to comment.