From 8c74816c240635e6a06ce8d751d0e505064c2d43 Mon Sep 17 00:00:00 2001
From: k9845 <khatririshi2430@gmail.com>
Date: Wed, 9 Aug 2023 11:35:43 +0545
Subject: [PATCH] Add flake8 in CI

---
 .flake8                                       |   8 +-
 .github/pull_request_template.md              |  25 ++
 .github/workflows/flake8.yml                  |  25 ++
 analysis_module/mockserver.py                 | 363 +++++++++---------
 analysis_module/tests/test_apis.py            |   4 +-
 analysis_module/tests/test_tasks.py           |   7 -
 core/tasks/get_data_old.py                    |   1 -
 core/tasks/nlp_mapping.py                     |  11 +-
 core/tasks/queries.py                         |   1 -
 core/tests/test_apis.py                       |   1 -
 core_server/settings.py                       |   1 -
 docker-compose.yml                            |   1 -
 nlp_scripts/model_monitoring/__init__.py      |   4 +-
 nlp_scripts/model_monitoring/featuredrift.py  |   2 +-
 .../model_monitoring/generate_outputs.py      |   7 +-
 .../model_monitoring/model_performance.py     |  87 ++++-
 16 files changed, 333 insertions(+), 215 deletions(-)
 create mode 100644 .github/pull_request_template.md
 create mode 100644 .github/workflows/flake8.yml

diff --git a/.flake8 b/.flake8
index 7da1f96..5ea7c75 100644
--- a/.flake8
+++ b/.flake8
@@ -1,2 +1,8 @@
 [flake8]
-max-line-length = 100
+ignore = C901, W504
+max-line-length = 125
+exclude = .git,__pycache__,old,build,dist,*/migrations/*.py
+max-complexity = 10
+per-file-ignores =
+    /**/tests/*_mock_data.py: E501
+    **/snap_test_*.py: E501
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..7e078fe
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,25 @@
+Addresses xxxxxx (eg: #1 the-deep-nlp/core-server#1) \
+Depends on xxxxxx (eg: #1 the-deep-nlp/core-server/#1)
+
+## Changes
+
+* Detailed list or prose of changes
+* Breaking changes
+* Changes to configurations
+
+Mention related users here if any.
+
+## This PR doesn't introduce any:
+
+- [ ] temporary files, auto-generated files or secret keys
+- [ ] n+1 queries
+- [ ] flake8 issues
+- [ ] `print`
+- [ ] typos
+- [ ] unwanted comments
+
+## This PR contains valid:
+
+- [ ] tests
+- [ ] permission checks (tests here too)
+- [ ] translations
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
new file mode 100644
index 0000000..b5951fc
--- /dev/null
+++ b/.github/workflows/flake8.yml
@@ -0,0 +1,25 @@
+name: Flake8 check.
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  check:
+    name: Flake8 check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      # Flake8 check
+      - name: Install flake8
+        run: pip install flake8
+      - name: Run flake8
+        uses: suo/flake8-github-action@releases/v1
+        with:
+          # NOTE: this needs to be the same as the job name
+          checkName: 'Flake8 check'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/analysis_module/mockserver.py b/analysis_module/mockserver.py
index 2a1e6c4..2aea71b 100644
--- a/analysis_module/mockserver.py
+++ b/analysis_module/mockserver.py
@@ -5,6 +5,7 @@
 import logging
 import random
 import numpy as np
+
 from random import shuffle
 from math import ceil
 from celery import shared_task
@@ -19,7 +20,7 @@
 from .utils import send_callback_url_request
 
 
-logger = logging.getLogger('__name__')
+logger = logging.getLogger("__name__")
 logger.setLevel(logging.INFO)
 
 
@@ -87,183 +88,183 @@
 ]
 
 MOCK_ENTRY_CLASSIFICATION = {
-  "classifications": [
-    {
-      "client_id": "5",
-      "model_preds": {
-        "2": {
-          "204": {
-            "2402": {
-              "prediction": 0.4069949281240046,
-              "threshold": 0.489,
-              "is_selected": False
-            },
-            "2401": {
-              "prediction": 0.27091098129102825,
-              "threshold": 0.461,
-              "is_selected": False
-            }
-          }
-        }
-      }
-    },
-    {
-      "client_id": "7",
-      "model_preds": {
-        "2": {
-          "204": {
-            "2402": {
-              "prediction": 0.5442236220665992,
-              "threshold": 0.489,
-              "is_selected": True
-            },
-            "2401": {
-              "prediction": 0.4262570897824335,
-              "threshold": 0.461,
-              "is_selected": False
-            }
-          },
-          "202": {
-            "2206": {
-              "prediction": 0.25068859880169236,
-              "threshold": 0.576,
-              "is_selected": False
+    "classifications": [
+        {
+            "client_id": "5",
+            "model_preds": {
+                "2": {
+                    "204": {
+                        "2402": {
+                            "prediction": 0.4069949281240046,
+                            "threshold": 0.489,
+                            "is_selected": False,
+                        },
+                        "2401": {
+                            "prediction": 0.27091098129102825,
+                            "threshold": 0.461,
+                            "is_selected": False,
+                        },
+                    }
+                }
             },
-            "2201": {
-              "prediction": 0.5456802809044823,
-              "threshold": 0.431,
-              "is_selected": True
-            }
-          }
         },
-        "5": {
-          "503": {
-            "5303": {
-              "prediction": 0.12105567270217965,
-              "threshold": 0.438,
-              "is_selected": False
-            },
-            "5306": {
-              "prediction": 0.0934217669913229,
-              "threshold": 0.424,
-              "is_selected": False
-            },
-            "5310": {
-              "prediction": 0.2706523782039786,
-              "threshold": 0.478,
-              "is_selected": False
-            },
-            "5302": {
-              "prediction": 0.10373815047470006,
-              "threshold": 0.44,
-              "is_selected": False
-            },
-            "5307": {
-              "prediction": 0.10675184680643865,
-              "threshold": 0.414,
-              "is_selected": False
-            },
-            "5309": {
-              "prediction": 0.15713495668023825,
-              "threshold": 0.512,
-              "is_selected": False
-            },
-            "5308": {
-              "prediction": 0.2450807941587348,
-              "threshold": 0.475,
-              "is_selected": False
-            },
-            "5301": {
-              "prediction": 0.16692731163052263,
-              "threshold": 0.488,
-              "is_selected": False
-            },
-            "5305": {
-              "prediction": 0.09886651321893601,
-              "threshold": 0.508,
-              "is_selected": False
+        {
+            "client_id": "7",
+            "model_preds": {
+                "2": {
+                    "204": {
+                        "2402": {
+                            "prediction": 0.5442236220665992,
+                            "threshold": 0.489,
+                            "is_selected": True,
+                        },
+                        "2401": {
+                            "prediction": 0.4262570897824335,
+                            "threshold": 0.461,
+                            "is_selected": False,
+                        },
+                    },
+                    "202": {
+                        "2206": {
+                            "prediction": 0.25068859880169236,
+                            "threshold": 0.576,
+                            "is_selected": False,
+                        },
+                        "2201": {
+                            "prediction": 0.5456802809044823,
+                            "threshold": 0.431,
+                            "is_selected": True,
+                        },
+                    },
+                },
+                "5": {
+                    "503": {
+                        "5303": {
+                            "prediction": 0.12105567270217965,
+                            "threshold": 0.438,
+                            "is_selected": False,
+                        },
+                        "5306": {
+                            "prediction": 0.0934217669913229,
+                            "threshold": 0.424,
+                            "is_selected": False,
+                        },
+                        "5310": {
+                            "prediction": 0.2706523782039786,
+                            "threshold": 0.478,
+                            "is_selected": False,
+                        },
+                        "5302": {
+                            "prediction": 0.10373815047470006,
+                            "threshold": 0.44,
+                            "is_selected": False,
+                        },
+                        "5307": {
+                            "prediction": 0.10675184680643865,
+                            "threshold": 0.414,
+                            "is_selected": False,
+                        },
+                        "5309": {
+                            "prediction": 0.15713495668023825,
+                            "threshold": 0.512,
+                            "is_selected": False,
+                        },
+                        "5308": {
+                            "prediction": 0.2450807941587348,
+                            "threshold": 0.475,
+                            "is_selected": False,
+                        },
+                        "5301": {
+                            "prediction": 0.16692731163052263,
+                            "threshold": 0.488,
+                            "is_selected": False,
+                        },
+                        "5305": {
+                            "prediction": 0.09886651321893601,
+                            "threshold": 0.508,
+                            "is_selected": False,
+                        },
+                        "5304": {
+                            "prediction": 0.18824445637496742,
+                            "threshold": 0.444,
+                            "is_selected": False,
+                        },
+                    },
+                    "501": {
+                        "5102": {
+                            "prediction": 0.21789910171917756,
+                            "threshold": 0.541,
+                            "is_selected": False,
+                        },
+                        "5109": {
+                            "prediction": 0.3480727123794051,
+                            "threshold": 0.454,
+                            "is_selected": False,
+                        },
+                        "5106": {
+                            "prediction": 0.23486564947864202,
+                            "threshold": 0.381,
+                            "is_selected": False,
+                        },
+                        "5108": {
+                            "prediction": 0.05966722541108756,
+                            "threshold": 0.527,
+                            "is_selected": False,
+                        },
+                        "5111": {
+                            "prediction": 0.46915922655621894,
+                            "threshold": 0.447,
+                            "is_selected": True,
+                        },
+                        "5107": {
+                            "prediction": 0.3090465321041693,
+                            "threshold": 0.449,
+                            "is_selected": False,
+                        },
+                        "5101": {
+                            "prediction": 0.015221919587000888,
+                            "threshold": 0.47,
+                            "is_selected": False,
+                        },
+                        "5103": {
+                            "prediction": 0.3523940058170018,
+                            "threshold": 0.482,
+                            "is_selected": False,
+                        },
+                        "5104": {
+                            "prediction": 0.003284739766450025,
+                            "threshold": 0.786,
+                            "is_selected": False,
+                        },
+                        "5105": {
+                            "prediction": 0.22805604930227613,
+                            "threshold": 0.534,
+                            "is_selected": False,
+                        },
+                        "5110": {
+                            "prediction": 0.20070979371666908,
+                            "threshold": 0.05,
+                            "is_selected": True,
+                        },
+                    },
+                },
+                "4": {
+                    "401": {
+                        "4102": {
+                            "prediction": 0.004212768160319299,
+                            "threshold": 0.814,
+                            "is_selected": False,
+                        },
+                        "4101": {
+                            "prediction": 0.4228575605351778,
+                            "threshold": 0.422,
+                            "is_selected": True,
+                        },
+                    }
+                },
             },
-            "5304": {
-              "prediction": 0.18824445637496742,
-              "threshold": 0.444,
-              "is_selected": False
-            }
-          },
-          "501": {
-            "5102": {
-              "prediction": 0.21789910171917756,
-              "threshold": 0.541,
-              "is_selected": False
-            },
-            "5109": {
-              "prediction": 0.3480727123794051,
-              "threshold": 0.454,
-              "is_selected": False
-            },
-            "5106": {
-              "prediction": 0.23486564947864202,
-              "threshold": 0.381,
-              "is_selected": False
-            },
-            "5108": {
-              "prediction": 0.05966722541108756,
-              "threshold": 0.527,
-              "is_selected": False
-            },
-            "5111": {
-              "prediction": 0.46915922655621894,
-              "threshold": 0.447,
-              "is_selected": True
-            },
-            "5107": {
-              "prediction": 0.3090465321041693,
-              "threshold": 0.449,
-              "is_selected": False
-            },
-            "5101": {
-              "prediction": 0.015221919587000888,
-              "threshold": 0.47,
-              "is_selected": False
-            },
-            "5103": {
-              "prediction": 0.3523940058170018,
-              "threshold": 0.482,
-              "is_selected": False
-            },
-            "5104": {
-              "prediction": 0.003284739766450025,
-              "threshold": 0.786,
-              "is_selected": False
-            },
-            "5105": {
-              "prediction": 0.22805604930227613,
-              "threshold": 0.534,
-              "is_selected": False
-            },
-            "5110": {
-              "prediction": 0.20070979371666908,
-              "threshold": 0.05,
-              "is_selected": True
-            }
-          }
         },
-        "4": {
-          "401": {
-            "4102": {
-              "prediction": 0.004212768160319299,
-              "threshold": 0.814,
-              "is_selected": False
-            },
-            "4101": {
-              "prediction": 0.4228575605351778,
-              "threshold": 0.422,
-              "is_selected": True
-            }
-          }
-        }
-      }
-    }
-  ]
+    ]
 }
 
 
@@ -427,7 +428,7 @@ def process_topicmodeling(body) -> Any:
     shuffle(excerpt_ids)
 
     data = [
-        excerpt_ids[x: x + ceil(len(excerpt_ids) / clusters)]
+        excerpt_ids[x:x + ceil(len(excerpt_ids) / clusters)]
         for x in range(0, len(excerpt_ids), ceil(len(excerpt_ids) / clusters))
     ]
 
@@ -490,7 +491,9 @@ def shape_geo_entities(entity: dict, excerpt: str):
         data.append({"entry_id": int(entry_id), "entities": entities})
 
     filepath = save_data_local_and_get_url(
-        dir_name="geolocation", client_id=client_id, data=data
+        dir_name="geolocation",
+        client_id=client_id,
+        data=data
     )
 
     send_callback_url_request(
@@ -507,7 +510,9 @@ def geolocation_mock_model(body) -> Any:
 
 
 def text_extraction_mock(body) -> Any:
-    process_extraction_mock.apply_async(args=(body,), countdown=2)  # Trigger task after 2 seconds
+    process_extraction_mock.apply_async(
+        args=(body,), countdown=2
+    )  # Trigger task after 2 seconds
     return json.dumps({"status": "Successfully received the request."}), 200
 
 
@@ -521,7 +526,9 @@ def process_extraction_mock(body) -> Any:
     for document in documents:
         client_id = document["client_id"]
         random_extracted_text = "This is some random extracted text"
-        filepath = save_data_local_and_get_url("extraction", client_id, random_extracted_text)
+        filepath = save_data_local_and_get_url(
+            "extraction", client_id, random_extracted_text
+        )
         callback_data = {
             "text_path": filepath,
             "images_path": [],
diff --git a/analysis_module/tests/test_apis.py b/analysis_module/tests/test_apis.py
index f4c3e31..e2ac4f5 100644
--- a/analysis_module/tests/test_apis.py
+++ b/analysis_module/tests/test_apis.py
@@ -492,7 +492,7 @@ def test_extraction_system_request(self, requests_mock):
         """
         documents = [
             {"url": "someurl", "client_id": self.CLIENT_ID},
-            {"url": "anothersomeurl", "client_id": self.CLIENT_ID+"1"},
+            {"url": "anothersomeurl", "client_id": self.CLIENT_ID + "1"},
         ]
         data = {
             "documents": documents,
@@ -529,7 +529,7 @@ def test_extraction_user_request(self, requests_mock):
         """
         documents = [
             {"url": "someurl", "client_id": self.CLIENT_ID},
-            {"url": "anothersomeurl", "client_id": self.CLIENT_ID+"1"},
+            {"url": "anothersomeurl", "client_id": self.CLIENT_ID + "1"},
         ]
         data = {
             "documents": documents,
diff --git a/analysis_module/tests/test_tasks.py b/analysis_module/tests/test_tasks.py
index 7770e19..50bacb9 100644
--- a/analysis_module/tests/test_tasks.py
+++ b/analysis_module/tests/test_tasks.py
@@ -51,14 +51,7 @@ def test_resend_callback_request_no_callback_url(self, requests_patch):
         assert failed_callback.status == FailedCallback.Status.FAILED
 
     def test_get_failed_callbacks(self):
-        successful_callback = self.create_failed_callback(
-            status=FailedCallback.Status.SUCCESS
-        )
         now = datetime.now()
-        retry_maxed_callback = self.create_failed_callback(
-            status=FailedCallback.Status.RETRY_MAXED_OUT,
-            created_at=now - timedelta(hours=20),
-        )
         old_callback = self.create_failed_callback(
             status=FailedCallback.Status.RETRYING,
         )
diff --git a/core/tasks/get_data_old.py b/core/tasks/get_data_old.py
index e850893..cfa038a 100644
--- a/core/tasks/get_data_old.py
+++ b/core/tasks/get_data_old.py
@@ -145,7 +145,6 @@ def _get_id2label(frame):
     return t
 
 
-
 def _reshape_report(a):
     a = a.split("-")
     real = []
diff --git a/core/tasks/nlp_mapping.py b/core/tasks/nlp_mapping.py
index 8657bcd..c5d2bda 100644
--- a/core/tasks/nlp_mapping.py
+++ b/core/tasks/nlp_mapping.py
@@ -146,9 +146,8 @@ def get_nlp_outputs(
                                 second_last_item = all_items[-2]
                                 mapping_row = hum_mapping_sheet[
                                     hum_mapping_sheet.apply(
-                                        lambda x: second_last_item
-                                        == x["Original first level"]
-                                        and last_item == x["Original second level"],
+                                        lambda x: second_last_item == x["Original first level"] and
+                                        last_item == x["Original second level"],
                                         axis=1,
                                     )
                                 ].copy()
@@ -175,10 +174,8 @@ def get_nlp_outputs(
                                 else:
                                     first_level_mapped_row = hum_mapping_sheet[
                                         hum_mapping_sheet.apply(
-                                            lambda x: x["Original first level"]
-                                            == last_item
-                                            and str(x["Original second level"])
-                                            == "nan",
+                                            lambda x: x["Original first level"] == last_item and
+                                            str(x["Original second level"]) == "nan",
                                             axis=1,
                                         )
                                     ].copy()
diff --git a/core/tasks/queries.py b/core/tasks/queries.py
index 81d64e9..a420322 100644
--- a/core/tasks/queries.py
+++ b/core/tasks/queries.py
@@ -175,7 +175,6 @@
               e.created_at >= '{}' and
               e.excerpt is not NULL and
               e.excerpt <> ''
-              
       ) res
   group by
       res.id
diff --git a/core/tests/test_apis.py b/core/tests/test_apis.py
index 8b8b518..5eac313 100644
--- a/core/tests/test_apis.py
+++ b/core/tests/test_apis.py
@@ -1,5 +1,4 @@
 from core_server.base_test import BaseTestCase
-from rest_framework.authtoken.models import Token
 
 
 class TestApiAuth(BaseTestCase):
diff --git a/core_server/settings.py b/core_server/settings.py
index e8d5592..b8e1bb0 100644
--- a/core_server/settings.py
+++ b/core_server/settings.py
@@ -17,7 +17,6 @@
 from sentry_sdk.integrations.celery import CeleryIntegration
 from sentry_sdk.integrations.redis import RedisIntegration
 
-from core_server.tasks_settings import CELERY_BEAT_SCHEDULE
 from core_server.env import env
 
 # Build paths inside the project like this: BASE_DIR / 'subdir'.
diff --git a/docker-compose.yml b/docker-compose.yml
index cb79368..5469e42 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -29,7 +29,6 @@ x-server: &base-server-config
     AWS_S3_SECRET_ACCESS_KEY: ${AWS_S3_SECRET_ACCESS_KEY:-}
     AWS_S3_BUCKET_NAME: ${AWS_S3_BUCKET_NAME}
     AWS_S3_REGION_NAME: ${AWS_S3_REGION_NAME}
-    USE_S3: ${USE_S3:-}
     # Celery
     CELERY_BROKER_URL: ${CELERY_BROKER_URL:-redis://redis:6379}
     CELERY_RESULT_BACKEND: ${CELERY_RESULT_BACKEND:-redis://redis:6379}
diff --git a/nlp_scripts/model_monitoring/__init__.py b/nlp_scripts/model_monitoring/__init__.py
index abeb9e7..9214196 100644
--- a/nlp_scripts/model_monitoring/__init__.py
+++ b/nlp_scripts/model_monitoring/__init__.py
@@ -1,8 +1,6 @@
 import pandas as pd
 
-from .generate_outputs import ClassificationModelOutput
-
-from core.models import Entry, ClassificationModel
+from core.models import Entry
 from utils.transformations import batched
 
 
diff --git a/nlp_scripts/model_monitoring/featuredrift.py b/nlp_scripts/model_monitoring/featuredrift.py
index 9ae0a18..b0737d1 100644
--- a/nlp_scripts/model_monitoring/featuredrift.py
+++ b/nlp_scripts/model_monitoring/featuredrift.py
@@ -1,7 +1,7 @@
 import datetime
 import warnings
 
-from typing import List, Optional, Any
+from typing import List, Optional
 from tqdm import tqdm
 
 import numpy as np
diff --git a/nlp_scripts/model_monitoring/generate_outputs.py b/nlp_scripts/model_monitoring/generate_outputs.py
index 7e7579f..4897ed0 100644
--- a/nlp_scripts/model_monitoring/generate_outputs.py
+++ b/nlp_scripts/model_monitoring/generate_outputs.py
@@ -1,4 +1,3 @@
-import json
 import logging
 from typing import Dict, Optional, Tuple, Any
 
@@ -93,9 +92,9 @@ def get_model_outputs(self, excerpt: list) -> Tuple[list, list]:
             endpoint_name=self.endpoint_name,
         )
         if (
-            self.prediction_generation
-            and "raw_predictions" in outputs
-            and "thresholds" in outputs
+            self.prediction_generation and
+            "raw_predictions" in outputs and
+            "thresholds" in outputs
         ):
             predictions = outputs["raw_predictions"]
             thresholds = outputs["thresholds"]
diff --git a/nlp_scripts/model_monitoring/model_performance.py b/nlp_scripts/model_monitoring/model_performance.py
index 1f2082e..bbb62b7 100644
--- a/nlp_scripts/model_monitoring/model_performance.py
+++ b/nlp_scripts/model_monitoring/model_performance.py
@@ -42,7 +42,19 @@ class ModelPerformance:
 
     def __init__(self, dataframe: pl.DataFrame):
         """
-        Categories: ["sectors", "pillars_1d", "pillars_2d", "subpillars_1d", "subpillars_2d", "age", "displaced", "gender", "non displaced", "severity", "specific_needs_groups"]
+        Categories: [
+            "sectors",
+            "pillars_1d",
+            "pillars_2d",
+            "subpillars_1d",
+            "subpillars_2d",
+            "age",
+            "displaced",
+            "gender",
+            "non displaced",
+            "severity",
+            "specific_needs_groups"
+        ]
         Input Dataframe: project_id, {category}
         """
         self.dataframe = dataframe
@@ -178,7 +190,19 @@ def projectwise_perf_metrics(
     ) -> pl.DataFrame:
         """
         Calculates the project wise performance metrics
-        Categories: ["sectors", "pillars_1d", "pillars_2d", "subpillars_1d", "subpillars_2d", "age", "displaced", "gender", "non displaced", "severity", "specific_needs_groups"]
+        Categories: [
+            "sectors",
+            "pillars_1d",
+            "pillars_2d",
+            "subpillars_1d",
+            "subpillars_2d",
+            "age",
+            "displaced",
+            "gender",
+            "non displaced",
+            "severity",
+            "specific_needs_groups"
+        ]
         Input Dataframe: project_id, {category}_transformed, {category}_pred_transformed
         Output Dataframe: project_id, {category}_precision, {category}_recall, {category}_f1score, generated_at
         """
@@ -224,7 +248,19 @@ def overall_projects_perf_metrics(
     ) -> Optional[pl.DataFrame]:
         """
         Calculates the overall performance metrics irrespective of the projects
-        Categories: ["sectors", "pillars_1d", "pillars_2d", "subpillars_1d", "subpillars_2d", "age", "displaced", "gender", "non displaced", "severity", "specific_needs_groups"]
+        Categories: [
+            "sectors",
+            "pillars_1d",
+            "pillars_2d",
+            "subpillars_1d",
+            "subpillars_2d",
+            "age",
+            "displaced",
+            "gender",
+            "non displaced",
+            "severity",
+            "specific_needs_groups"
+        ]
         Input Dataframe: {category}_transformed, {category}_pred_transformed
         Output Dataframe: category, precision, recall, f1score, generated_at
         category: List of group as mentioned above
@@ -258,7 +294,19 @@ def overall_projects_perf_metrics(
     def per_tag_perf_metrics(self) -> Optional[pl.DataFrame]:
         """
         Calculates the performance metrics per tag
-        Categories: ["sectors", "pillars_1d", "pillars_2d", "subpillars_1d", "subpillars_2d", "age", "displaced", "gender", "non displaced", "severity", "specific_needs_groups"]
+        Categories: [
+            "sectors",
+            "pillars_1d",
+            "pillars_2d",
+            "subpillars_1d",
+            "subpillars_2d",
+            "age",
+            "displaced",
+            "gender",
+            "non displaced",
+            "severity",
+            "specific_needs_groups"
+        ]
         Input Dataframe: {category}_transformed, {category}_pred_transformed
         Output Dataframe: tags, scores, metrics(type), generated_at
         tags refer to list of tags
@@ -331,7 +379,19 @@ def _tags_matching_ratios(
     def calculate_ratios(self) -> Optional[pl.DataFrame]:
         """
         Calculate the ratios of the categories
-        Categories: ["sectors", "pillars_1d", "pillars_2d", "subpillars_1d", "subpillars_2d", "age", "displaced", "gender", "non displaced", "severity", "specific_needs_groups"]
+        Categories: [
+            "sectors",
+            "pillars_1d",
+            "pillars_2d",
+            "subpillars_1d",
+            "subpillars_2d",
+            "age",
+            "displaced",
+            "gender",
+            "non displaced",
+            "severity",
+            "specific_needs_groups"
+        ]
         Input Dataframe: project_id, {category}_transformed, {category}_pred_transformed
         Output Dataframe: project_id, completely_matched_{category}, missing_{category}, wrong_{category}, generated_at
         """
@@ -361,9 +421,22 @@ def calculate_ratios(self) -> Optional[pl.DataFrame]:
     def per_project_calc_ratios(self) -> Optional[pl.DataFrame]:
         """
         Calculates the per project ratio of the categories
-        Categories: ["sectors", "pillars_1d", "pillars_2d", "subpillars_1d", "subpillars_2d", "age", "displaced", "gender", "non displaced", "severity", "specific_needs_groups"]
+        Categories: [
+            "sectors",
+            "pillars_1d",
+            "pillars_2d",
+            "subpillars_1d",
+            "subpillars_2d",
+            "age",
+            "displaced",
+            "gender",
+            "non displaced",
+            "severity",
+            "specific_needs_groups"
+        ]
         Input Dataframe: project_id, completely_matched_{category}, missing_{category}, wrong_{category},
-        Output Dataframe: project_id, completely_matched_{category}_mean, missing_{category}_mean, wrong_{category}_mean, generated_at
+        Output Dataframe: project_id, completely_matched_{category}_mean,\
+        missing_{category}_mean, wrong_{category}_mean, generated_at
         """
         ratios_df = self.calculate_ratios()
         if ratios_df is None: