From a42a8caad5d375810bf9dd36b9252ee835ad245a Mon Sep 17 00:00:00 2001 From: vsedov Date: Sun, 10 Mar 2024 15:42:07 +0000 Subject: [PATCH] Feat: update db_checker + gen_database.py --- db_checker.py | 18 ------------------ generate_database.py | 44 +++++++++++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/db_checker.py b/db_checker.py index 06081f0d..a3fec17c 100644 --- a/db_checker.py +++ b/db_checker.py @@ -62,24 +62,6 @@ def get_topics_distribution(self): return topics_series.head(self.lengthB) # Only the top 5 topics def print_summary(self): - # self.console.print( - # f"Total plugins: {len(self.plugins_df)}", style="bold green") - # - # stats_df = self.calculate_statistics() - # self.console.print("\nStatistical Summary:", style="bold underline") - # self.print_table(stats_df, ['Metric', 'Mean', 'Standard Deviation']) - # - # lang_dist = self.get_language_distribution() - # self.console.print("\nLanguage distribution:", style="bold underline") - # self.print_table(lang_dist, ['Language', 'Count']) - - # avg_activity_score_by_lang = self.get_average_activity_score_by_language( - # ) - # self.console.print("\nAverage Activity Score by Language:", - # style="bold underline") - # self.print_table(avg_activity_score_by_lang, - # ['Language', 'Average Activity Score']) - topics_distribution = self.get_topics_distribution() self.console.print("\nTop Topics distribution:", style="bold underline") diff --git a/generate_database.py b/generate_database.py index 9d8d3dcb..47144ab5 100644 --- a/generate_database.py +++ b/generate_database.py @@ -134,7 +134,12 @@ def __init__(self): "id_env_var": "CLIENT_ID3", "secret_env_var": "SECRET_ID3" }, + { + "id_env_var": "CLIENT_ID4", + "secret_env_var": "SECRET_ID4" + }, ] + self.current_index = random.randint(0, len(self.client_keys) - 1) self.client_id, self.client_secret = self.get_key() @@ -143,11 +148,11 @@ def get_key(self): self.current_index = (self.current_index + 1) % len(self.client_keys) client_id = os.environ.get(current_keys["id_env_var"]) client_secret = os.environ.get(current_keys["secret_env_var"]) - logging.info(ic.format(f"Using Keys: {current_keys}")) + logging.critical( + ic.format(f"Switching API Key -> Using Keys: {current_keys}")) return client_id, client_secret def switch_api_key(self): - logging.critical(f"\n\nSwitching API Key\n\n") self.client_id, self.client_secret = self.get_key() @@ -219,6 +224,7 @@ def __init__(self, user: str = "budswa", batch_size: int = -1) -> None: self.unwanted_config = [ "lvim", "dotfiles", + "dotfile", "dots", "nvim-dotfiles", "nvim-qt", @@ -471,18 +477,17 @@ def make_jobs(self, base: BaseRequestResponse) -> None: description_mapper = key_mapper("description") # uses d['description'] language_mapper = key_mapper("language") archive_mapper = key_mapper("archived") - # checks if d['full_name'] ends with .nvim, -nvim, .vim - # check if d['name'] starts with '.' + # checks if d['full_name'] ends with .nvim, -nvim, .vim + # check if d['name'] starts with '.' fixed_plugin_conds = [] - fixed_dotfile_conds = [ - ] + fixed_dotfile_conds = [] both_conditions = [ language_mapper( # lambda x, y: x.lower() == "lua", ["lua"] lambda x, y: self.debug_print(x, y), - ["lua"], + ["lua", "vim"], ), # sourcery skip: swap-if-expression archive_mapper(lambda x, _: 1 if not x else 0, ["_"]), @@ -490,14 +495,12 @@ def make_jobs(self, base: BaseRequestResponse) -> None: optional_plugin_conds = [ fullname_mapper( - lambda x, y: x.lower().endswith(y.lower()), - [".nvim", "-nvim", ".vim"], - ), - + lambda x, y: x.lower().endswith(y.lower()), + [".nvim", "-nvim", ".vim"], + ), ] optional_dotfile_conds = [ - name_mapper(lambda x, y: x.lower().startswith(y.lower()), - "."), + name_mapper(lambda x, y: x.lower().startswith(y.lower()), "."), fullname_mapper( lambda x, y: y.lower() in x.lower(), self.unwanted_config ), # checks if any of the unwanted config names are in d['full_name'] @@ -505,7 +508,6 @@ def make_jobs(self, base: BaseRequestResponse) -> None: lambda x, y: y.lower() in x.lower() if x is not None else 0, self.unwanted_config, ), # check if any of the unwanted config names are in d['description'] - ] cases = { # mappings for conditions based on conditions @@ -518,7 +520,10 @@ def custom_case(x): is_plugin = sum(cn(x) for cn in fixed_plugin_conds) optional_plugin = sum(cn(x) for cn in optional_plugin_conds) optional_dotfile = sum(cn(x) for cn in optional_dotfile_conds) - return (0, 1) if optional_dotfile > 0 else (1, 0) if is_plugin + optional_plugin > 1 else (0, 1) + return (0, 1) if optional_dotfile > 0 else ( + 1, 0) if is_plugin + optional_plugin > 1 else (0, 1) + # return (1, 0) if is_plugin + optional_plugin > 1 else (0, 1) + else: return (0, 0) @@ -549,7 +554,16 @@ def make_jobtype(response): filetrees = [x for x in filetrees if x[-1] is not None] for res in filetrees: tree = res[-1] + + # Secondary level filter if "lua" in tree: + # Teriary level filter + for x in self.unwanted_config: + description = res[0].get("description", "") + if res[0]["name"] in x or (description and description in x): + continue + + # Quaternary level filter if any("init" in item and ( item.endswith("lua") or item.endswith("vim")) for item in tree):