From 84155fe737f2cf95e58cc3727cc6cd610bab1d72 Mon Sep 17 00:00:00 2001
From: extreme4all <>
Date: Wed, 7 Feb 2024 00:34:28 +0100
Subject: [PATCH] add retry limit

---
 .vscode/settings.json       |  10 +-
 api/MachineLearning/data.py | 558 ++++++++++++++++++------------------
 api/app.py                  | 416 +++++++++++++--------------
 api/cogs/predict.py         | 222 +++++++-------
 api/cogs/requests.py        | 303 ++++++++++----------
 notes.md                    | 106 +++----
 6 files changed, 810 insertions(+), 805 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 154b205..3c73b4d 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,6 +1,6 @@
-{
-    "[python]": {
-      "editor.formatOnSave": true,
-      "editor.defaultFormatter": "charliermarsh.ruff"
-    }
+{
+    "[python]": {
+      "editor.formatOnSave": true,
+      "editor.defaultFormatter": "charliermarsh.ruff"
+    }
   }
\ No newline at end of file
diff --git a/api/MachineLearning/data.py b/api/MachineLearning/data.py
index a8b34f6..d71f4a2 100644
--- a/api/MachineLearning/data.py
+++ b/api/MachineLearning/data.py
@@ -1,279 +1,279 @@
-import logging
-from typing import List
-
-import numpy as np
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-skills = [
-    "attack",
-    "defence",
-    "strength",
-    "hitpoints",
-    "ranged",
-    "prayer",
-    "magic",
-    "cooking",
-    "woodcutting",
-    "fletching",
-    "fishing",
-    "firemaking",
-    "crafting",
-    "smithing",
-    "mining",
-    "herblore",
-    "agility",
-    "thieving",
-    "slayer",
-    "farming",
-    "runecraft",
-    "hunter",
-    "construction",
-]
-minigames = [
-    "league",
-    "bounty_hunter_hunter",
-    "bounty_hunter_rogue",
-    "cs_all",
-    "cs_beginner",
-    "cs_easy",
-    "cs_medium",
-    "cs_hard",
-    "cs_elite",
-    "cs_master",
-    "lms_rank",
-    "soul_wars_zeal",
-]
-
-
-class hiscoreData:
-    """
-    This class is responsible for cleaning data & creating features.
-    """
-
-    def __init__(self, data: List[dict]) -> None:
-        self.df = pd.DataFrame(data)
-        self.df_clean = self.df.copy()
-
-        self.skills = skills
-        self.minigames = minigames
-
-        self.__clean()
-        self.__skill_ratio()
-        self.__boss_ratio()
-
-    def __clean(self) -> None:
-        """
-        Cleanup the dataframe.
-
-        This method will:
-            - drop unnecessary columns
-            - set the index to the player id
-            - replace -1 with 0
-            - create a list of bosses (not skills or minigames)
-            - create a total xp column
-            - create a total boss kc column
-            - reduces memory of dataframe
-            - fill na with 0
-            - create a dataframe with only low level players (total level < 1_000_000)
-        """
-        col_to_drop = ["id", "timestamp", "ts_date", "name"]
-        col_to_drop = [c for c in self.df_clean.columns if c in col_to_drop]
-        logger.info(f"dropping: {col_to_drop}")
-        self.df_clean.drop(columns=col_to_drop, inplace=True)
-
-        # set index to player id
-        self.df_clean.set_index(["Player_id"], inplace=True)
-
-        # if not on the hiscores it shows -1, replace with 0
-        self.df_clean = self.df_clean.replace(-1, 0)
-
-        # bosses
-        self.bosses = [
-            c for c in self.df_clean.columns if c not in ["total"] + skills + minigames
-        ]
-        # total is not always on hiscores, create a total xp column
-        self.df_clean["total"] = self.df_clean[self.skills].sum(axis=1)
-
-        # create a total boss kc column
-        self.df_clean["boss_total"] = (
-            self.df_clean[self.bosses].sum(axis=1).astype(np.int32)
-        )
-
-        # fillna
-        self.df_clean.fillna(0, inplace=True)
-
-        # apply smaller data types to reduce memory usage
-        non_total_features = [
-            col for col in self.df_clean.columns if "total" not in col
-        ]
-        self.df_clean[non_total_features] = self.df_clean[non_total_features].astype(
-            np.int32
-        )
-
-        # get low lvl players
-        mask = self.df_clean["total"] < 1_000_000
-        self.df_low = self.df_clean[mask].copy()
-
-    def __skill_ratio(self):
-        """
-        Create a dataframe with the ratio of each skill to the total level.
-
-        This method will:
-            - create a dataframe with the index of the original dataframe
-            - create a column for each skill with the ratio of the skill to the total level
-            - fill na with 0
-        """
-        self.skill_ratio = pd.DataFrame(index=self.df_clean.index)
-
-        total = self.df_clean["total"]
-
-        for skill in self.skills:
-            self.skill_ratio[f"{skill}/total"] = (self.df_clean[skill] / total).astype(
-                np.float16
-            )
-
-        self.skill_ratio.fillna(0, inplace=True)
-
-    def __boss_ratio(self):
-        """
-        Create a dataframe with the ratio of each boss to the total boss level.
-
-        This method will:
-            - create a dataframe with the index of the original dataframe
-            - create a column for each boss with the ratio of the boss to the total boss level
-            - fill na with 0
-        """
-        self.boss_ratio = pd.DataFrame(index=self.df_clean.index)
-
-        total = self.df_clean["boss_total"]
-        for boss in self.bosses:
-            self.boss_ratio[f"{boss}/total"] = (self.df_clean[boss] / total).astype(
-                np.float16
-            )
-
-        self.boss_ratio.fillna(0, inplace=True)
-
-    def features(
-        self, base: bool = True, skill_ratio: bool = True, boss_ratio: bool = True
-    ):
-        """
-        Create a dataframe with the features.
-
-        This method will:
-            - create a dataframe with the index of the original dataframe
-            - merge the original dataframe, the skill ratio dataframe and the boss ratio dataframe
-
-        Parameters
-        ----------
-        base : bool, optional
-            Whether to include the original dataframe, by default True
-        skill_ratio : bool, optional
-            Whether to include the skill ratio dataframe, by default True
-        boss_ratio : bool, optional
-            Whether to include the boss ratio dataframe, by default True
-
-        Returns
-        -------
-        pd.DataFrame
-            Dataframe containing the features
-        """
-        features = pd.DataFrame(index=self.df_clean.index)
-        if base:
-            features = features.merge(self.df_clean, left_index=True, right_index=True)
-        if skill_ratio:
-            features = features.merge(
-                self.skill_ratio, left_index=True, right_index=True
-            )
-        if boss_ratio:
-            features = features.merge(
-                self.boss_ratio, left_index=True, right_index=True
-            )
-        return features
-
-
-class playerData:
-    """
-    Class to handle the data from the json files.
-    """
-
-    def __init__(self, player_data: List[dict], label_data: List[dict]) -> None:
-        """
-        Initialize the class.
-
-        Parameters
-        ----------
-        player_data : List[dict]
-            List of dictionaries containing the player data
-        label_data : List[dict]
-            List of dictionaries containing the label data
-        """
-        self.df_players = pd.DataFrame(player_data)
-        self.df_labels = pd.DataFrame(label_data)
-        self.__clean()
-
-    def __clean(self):
-        """
-        Clean the data.
-
-        This method will:
-            - set the index of the player dataframe to the player id
-            - set the index of the label dataframe to the label id
-            - merge the two dataframes
-            - create a binary label column
-        """
-        # clean players
-        self.df_players.set_index("id", inplace=True)
-
-        # reduce memory of player dataframe
-        small_size_columns = [
-            "possible_ban",
-            "confirmed_ban",
-            "confirmed_player",
-            "label_id",
-            "label_jagex",
-        ]
-        self.df_players[small_size_columns] = self.df_players[
-            small_size_columns
-        ].astype(np.int8)
-
-        # clean labels
-        self.df_labels.set_index("id", inplace=True)
-
-        # merge
-        self.df_players = self.df_players.merge(
-            self.df_labels, left_on="label_id", right_index=True
-        )
-        self.df_players.drop(columns=["label_id"], inplace=True)
-
-        # binary label, 1 = bot, 0 = not bot
-        self.df_players["binary_label"] = np.where(
-            self.df_players["label_jagex"] == 2, 1, 0
-        )
-
-    def get(self, binary: bool = False):
-        """
-        Get the target data.
-
-        This method will:
-            - return the binary label or the label column
-
-        Parameters
-        ----------
-        binary : bool, optional
-            Whether to return the binary label or not, by default False
-
-        Returns
-        -------
-        pd.DataFrame
-            Dataframe containing the target data
-        """
-        if binary:
-            out = self.df_players.loc[:, ["binary_label"]].astype(np.int8)
-            out.rename(columns={"binary_label": "target"}, inplace=True)
-        else:
-            out = self.df_players.loc[:, ["label"]].astype("category")
-            out.rename(columns={"label": "target"}, inplace=True)
-
-        return out
+import logging
+from typing import List
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+skills = [
+    "attack",
+    "defence",
+    "strength",
+    "hitpoints",
+    "ranged",
+    "prayer",
+    "magic",
+    "cooking",
+    "woodcutting",
+    "fletching",
+    "fishing",
+    "firemaking",
+    "crafting",
+    "smithing",
+    "mining",
+    "herblore",
+    "agility",
+    "thieving",
+    "slayer",
+    "farming",
+    "runecraft",
+    "hunter",
+    "construction",
+]
+minigames = [
+    "league",
+    "bounty_hunter_hunter",
+    "bounty_hunter_rogue",
+    "cs_all",
+    "cs_beginner",
+    "cs_easy",
+    "cs_medium",
+    "cs_hard",
+    "cs_elite",
+    "cs_master",
+    "lms_rank",
+    "soul_wars_zeal",
+]
+
+
+class hiscoreData:
+    """
+    This class is responsible for cleaning data & creating features.
+    """
+
+    def __init__(self, data: List[dict]) -> None:
+        self.df = pd.DataFrame(data)
+        self.df_clean = self.df.copy()
+
+        self.skills = skills
+        self.minigames = minigames
+
+        self.__clean()
+        self.__skill_ratio()
+        self.__boss_ratio()
+
+    def __clean(self) -> None:
+        """
+        Cleanup the dataframe.
+
+        This method will:
+            - drop unnecessary columns
+            - set the index to the player id
+            - replace -1 with 0
+            - create a list of bosses (not skills or minigames)
+            - create a total xp column
+            - create a total boss kc column
+            - reduces memory of dataframe
+            - fill na with 0
+            - create a dataframe with only low level players (total level < 1_000_000)
+        """
+        col_to_drop = ["id", "timestamp", "ts_date", "name"]
+        col_to_drop = [c for c in self.df_clean.columns if c in col_to_drop]
+        logger.info(f"dropping: {col_to_drop}")
+        self.df_clean.drop(columns=col_to_drop, inplace=True)
+
+        # set index to player id
+        self.df_clean.set_index(["Player_id"], inplace=True)
+
+        # if not on the hiscores it shows -1, replace with 0
+        self.df_clean = self.df_clean.replace(-1, 0)
+
+        # bosses
+        self.bosses = [
+            c for c in self.df_clean.columns if c not in ["total"] + skills + minigames
+        ]
+        # total is not always on hiscores, create a total xp column
+        self.df_clean["total"] = self.df_clean[self.skills].sum(axis=1)
+
+        # create a total boss kc column
+        self.df_clean["boss_total"] = (
+            self.df_clean[self.bosses].sum(axis=1).astype(np.int32)
+        )
+
+        # fillna
+        self.df_clean.fillna(0, inplace=True)
+
+        # apply smaller data types to reduce memory usage
+        non_total_features = [
+            col for col in self.df_clean.columns if "total" not in col
+        ]
+        self.df_clean[non_total_features] = self.df_clean[non_total_features].astype(
+            np.int32
+        )
+
+        # get low lvl players
+        mask = self.df_clean["total"] < 1_000_000
+        self.df_low = self.df_clean[mask].copy()
+
+    def __skill_ratio(self):
+        """
+        Create a dataframe with the ratio of each skill to the total level.
+
+        This method will:
+            - create a dataframe with the index of the original dataframe
+            - create a column for each skill with the ratio of the skill to the total level
+            - fill na with 0
+        """
+        self.skill_ratio = pd.DataFrame(index=self.df_clean.index)
+
+        total = self.df_clean["total"]
+
+        for skill in self.skills:
+            self.skill_ratio[f"{skill}/total"] = (self.df_clean[skill] / total).astype(
+                np.float16
+            )
+
+        self.skill_ratio.fillna(0, inplace=True)
+
+    def __boss_ratio(self):
+        """
+        Create a dataframe with the ratio of each boss to the total boss level.
+
+        This method will:
+            - create a dataframe with the index of the original dataframe
+            - create a column for each boss with the ratio of the boss to the total boss level
+            - fill na with 0
+        """
+        self.boss_ratio = pd.DataFrame(index=self.df_clean.index)
+
+        total = self.df_clean["boss_total"]
+        for boss in self.bosses:
+            self.boss_ratio[f"{boss}/total"] = (self.df_clean[boss] / total).astype(
+                np.float16
+            )
+
+        self.boss_ratio.fillna(0, inplace=True)
+
+    def features(
+        self, base: bool = True, skill_ratio: bool = True, boss_ratio: bool = True
+    ):
+        """
+        Create a dataframe with the features.
+
+        This method will:
+            - create a dataframe with the index of the original dataframe
+            - merge the original dataframe, the skill ratio dataframe and the boss ratio dataframe
+
+        Parameters
+        ----------
+        base : bool, optional
+            Whether to include the original dataframe, by default True
+        skill_ratio : bool, optional
+            Whether to include the skill ratio dataframe, by default True
+        boss_ratio : bool, optional
+            Whether to include the boss ratio dataframe, by default True
+
+        Returns
+        -------
+        pd.DataFrame
+            Dataframe containing the features
+        """
+        features = pd.DataFrame(index=self.df_clean.index)
+        if base:
+            features = features.merge(self.df_clean, left_index=True, right_index=True)
+        if skill_ratio:
+            features = features.merge(
+                self.skill_ratio, left_index=True, right_index=True
+            )
+        if boss_ratio:
+            features = features.merge(
+                self.boss_ratio, left_index=True, right_index=True
+            )
+        return features
+
+
+class playerData:
+    """
+    Class to handle the data from the json files.
+    """
+
+    def __init__(self, player_data: List[dict], label_data: List[dict]) -> None:
+        """
+        Initialize the class.
+
+        Parameters
+        ----------
+        player_data : List[dict]
+            List of dictionaries containing the player data
+        label_data : List[dict]
+            List of dictionaries containing the label data
+        """
+        self.df_players = pd.DataFrame(player_data)
+        self.df_labels = pd.DataFrame(label_data)
+        self.__clean()
+
+    def __clean(self):
+        """
+        Clean the data.
+
+        This method will:
+            - set the index of the player dataframe to the player id
+            - set the index of the label dataframe to the label id
+            - merge the two dataframes
+            - create a binary label column
+        """
+        # clean players
+        self.df_players.set_index("id", inplace=True)
+
+        # reduce memory of player dataframe
+        small_size_columns = [
+            "possible_ban",
+            "confirmed_ban",
+            "confirmed_player",
+            "label_id",
+            "label_jagex",
+        ]
+        self.df_players[small_size_columns] = self.df_players[
+            small_size_columns
+        ].astype(np.int8)
+
+        # clean labels
+        self.df_labels.set_index("id", inplace=True)
+
+        # merge
+        self.df_players = self.df_players.merge(
+            self.df_labels, left_on="label_id", right_index=True
+        )
+        self.df_players.drop(columns=["label_id"], inplace=True)
+
+        # binary label, 1 = bot, 0 = not bot
+        self.df_players["binary_label"] = np.where(
+            self.df_players["label_jagex"] == 2, 1, 0
+        )
+
+    def get(self, binary: bool = False):
+        """
+        Get the target data.
+
+        This method will:
+            - return the binary label or the label column
+
+        Parameters
+        ----------
+        binary : bool, optional
+            Whether to return the binary label or not, by default False
+
+        Returns
+        -------
+        pd.DataFrame
+            Dataframe containing the target data
+        """
+        if binary:
+            out = self.df_players.loc[:, ["binary_label"]].astype(np.int8)
+            out.rename(columns={"binary_label": "target"}, inplace=True)
+        else:
+            out = self.df_players.loc[:, ["label"]].astype("category")
+            out.rename(columns={"label": "target"}, inplace=True)
+
+        return out
diff --git a/api/app.py b/api/app.py
index b195147..307cdc3 100644
--- a/api/app.py
+++ b/api/app.py
@@ -1,208 +1,208 @@
-import asyncio
-import logging
-from typing import List
-
-import pandas as pd
-from fastapi import HTTPException
-from pydantic import BaseModel
-from sklearn.model_selection import train_test_split
-
-from api import config
-from api.cogs import predict
-from api.cogs import requests as req
-from api.MachineLearning import classifier, data
-from datetime import date
-
-app = config.app
-
-binary_classifier = classifier.classifier("binaryClassifier").load()
-multi_classifier = classifier.classifier("multiClassifier").load()
-
-
-class name(BaseModel):
-    id: int
-    name: str
-
-
-logger = logging.getLogger(__name__)
-
-
-@app.on_event("startup")
-async def initial_task():
-    """
-    This function is called when the api starts up.
-    It will load the latest model and start the prediction process.
-    """
-    global binary_classifier, multi_classifier
-    if binary_classifier is None or multi_classifier is None:
-        binary_classifier = classifier.classifier("binaryClassifier")
-        multi_classifier = classifier.classifier("multiClassifier")
-        await train(config.secret_token)
-    await manual_startup(config.secret_token)
-    return
-
-
-@app.get("/")
-async def root():
-    """
-    This endpoint is used to check if the api is running.
-    """
-    return {"detail": "hello worldz"}
-
-
-@app.get("/startup")
-async def manual_startup(secret: str):
-    logger.debug("manual startup")
-    """
-        This endpoint is used to manually start the prediction process.
-        It is used by the detector api to start the prediction process.
-    """
-    # secret token for api's to talk to eachother
-    if secret != config.secret_token:
-        raise HTTPException(status_code=404, detail="insufficient permissions")
-
-    id = 0
-    today = date.today()
-    while True:
-        if today != date.today():
-            logger.info("new day")
-            id, today = 0, date.today()
-
-        hiscores = await req.get_prediction_data(
-            player_id=id, limit=config.BATCH_AMOUNT
-        )
-        _highscores = hiscores[-1]
-        logger.info(_highscores)
-        id = _highscores.get("Player_id")
-        hiscores = pd.DataFrame(hiscores)
-
-        if len(hiscores) == 0:
-            logger.debug("No data: sleeping")
-            await asyncio.sleep(60)
-            continue
-
-        names = hiscores[["Player_id", "name"]]
-        names = names.rename(columns={"Player_id": "id"})
-        hiscores = hiscores[[c for c in hiscores.columns if c != "name"]]
-
-        output = predict.predict(hiscores, names, binary_classifier, multi_classifier)
-
-        logger.debug("Sending response")
-        await req.post_prediction(output)
-
-        if len(hiscores) < config.BATCH_AMOUNT:
-            sleep = 60
-            logger.info(f"{len(hiscores)=} < {config.BATCH_AMOUNT=}, sleeping: {sleep}")
-            await asyncio.sleep(sleep)
-    return {"detail": "ok"}
-
-
-@app.get("/load")
-async def load(secret: str):
-    logger.debug("loading model")
-    global binary_classifier, multi_classifier
-    """
-        load the latest model.
-        This endpoint is used by the detector api to load the latest model.
-    """
-    if secret != config.secret_token:
-        raise HTTPException(status_code=404, detail="insufficient permissions")
-
-    binary_classifier = binary_classifier.load()
-    multi_classifier = multi_classifier.load()
-    return {"detail": "ok"}
-
-
-@app.get("/predict")
-async def predict_player(secret: str, hiscores, name: name) -> List[dict]:
-    """
-    predict one player.
-    This endpoint is used by the detector api to predict one player.
-    """
-    logger.debug(f"predicting player {name}")
-    if secret != config.secret_token:
-        raise HTTPException(status_code=404, detail="insufficient permissions")
-    name = pd.DataFrame(name.dict())
-    output = predict.predict(hiscores, name, binary_classifier, multi_classifier)
-    return output
-
-
-@app.get("/train")
-async def train(secret: str):
-    """
-    train a new model.
-    This endpoint is used by the detector api to train a new model.
-    """
-    logger.debug("training model")
-    if secret != config.secret_token:
-        raise HTTPException(status_code=404, detail="insufficient permissions")
-
-    labels = await req.get_labels()
-    players = []
-    hiscores = []
-
-    for label in labels:
-        if label["label"] not in config.LABELS:
-            continue
-
-        player_data = await req.get_player_data(label_id=label["id"])
-        players.extend(player_data)
-
-        hiscore_data = await req.get_hiscore_data(label_id=label["id"])
-        hiscores.extend(hiscore_data)
-
-    # parse hiscoreData
-    hiscoredata = data.hiscoreData(hiscores)
-    del hiscores
-
-    # get the desired features
-    features = hiscoredata.features()
-    del hiscoredata
-
-    # get players with binary target
-    player_data = data.playerData(players, labels).get(binary=True)
-
-    # merge features with target
-    features_labeled = features.merge(player_data, left_index=True, right_index=True)
-
-    # create train test data
-    x, y = features_labeled.iloc[:, :-1], features_labeled.iloc[:, -1]
-    train_x, test_x, train_y, test_y = train_test_split(
-        x, y, test_size=0.2, random_state=42, stratify=y
-    )
-
-    # train & score the model
-    binary_classifier.fit(train_x, train_y)
-    binary_classifier.score(test_y, test_x)
-
-    # save the model
-    binary_classifier.save()
-
-    # get players with multi target
-    player_data = data.playerData(players, labels).get(binary=False)
-
-    # merge features with target
-    features_labeled = features.merge(player_data, left_index=True, right_index=True)
-
-    # we need at least 100 users
-    to_little_data_labels = (
-        pd.DataFrame(features_labeled.iloc[:, -1].value_counts())
-        .query("target < 100")
-        .index
-    )
-    mask = ~(features_labeled["target"].isin(to_little_data_labels))
-    features_labeled = features_labeled[mask]
-
-    # create train test data
-    x, y = features_labeled.iloc[:, :-1], features_labeled.iloc[:, -1]
-    train_x, test_x, train_y, test_y = train_test_split(
-        x, y, test_size=0.2, random_state=42, stratify=y
-    )
-
-    # train & score the model
-    multi_classifier.fit(train_x, train_y)
-    multi_classifier.score(test_y, test_x)
-
-    # save the model
-    multi_classifier.save()
-    return {"detail": "ok"}
+import asyncio
+import logging
+from typing import List
+
+import pandas as pd
+from fastapi import HTTPException
+from pydantic import BaseModel
+from sklearn.model_selection import train_test_split
+
+from api import config
+from api.cogs import predict
+from api.cogs import requests as req
+from api.MachineLearning import classifier, data
+from datetime import date
+
+app = config.app
+
+binary_classifier = classifier.classifier("binaryClassifier").load()
+multi_classifier = classifier.classifier("multiClassifier").load()
+
+
+class name(BaseModel):
+    id: int
+    name: str
+
+
+logger = logging.getLogger(__name__)
+
+
+@app.on_event("startup")
+async def initial_task():
+    """
+    This function is called when the api starts up.
+    It will load the latest model and start the prediction process.
+    """
+    global binary_classifier, multi_classifier
+    if binary_classifier is None or multi_classifier is None:
+        binary_classifier = classifier.classifier("binaryClassifier")
+        multi_classifier = classifier.classifier("multiClassifier")
+        await train(config.secret_token)
+    await manual_startup(config.secret_token)
+    return
+
+
+@app.get("/")
+async def root():
+    """
+    This endpoint is used to check if the api is running.
+    """
+    return {"detail": "hello worldz"}
+
+
+@app.get("/startup")
+async def manual_startup(secret: str):
+    logger.debug("manual startup")
+    """
+        This endpoint is used to manually start the prediction process.
+        It is used by the detector api to start the prediction process.
+    """
+    # secret token for api's to talk to eachother
+    if secret != config.secret_token:
+        raise HTTPException(status_code=404, detail="insufficient permissions")
+
+    id = 0
+    today = date.today()
+    while True:
+        if today != date.today():
+            logger.info("new day")
+            id, today = 0, date.today()
+
+        hiscores = await req.get_prediction_data(
+            player_id=id, limit=config.BATCH_AMOUNT
+        )
+        _highscores = hiscores[-1]
+        logger.info(_highscores)
+        id = _highscores.get("Player_id")
+        hiscores = pd.DataFrame(hiscores)
+
+        if len(hiscores) == 0:
+            logger.debug("No data: sleeping")
+            await asyncio.sleep(60)
+            continue
+
+        names = hiscores[["Player_id", "name"]]
+        names = names.rename(columns={"Player_id": "id"})
+        hiscores = hiscores[[c for c in hiscores.columns if c != "name"]]
+
+        output = predict.predict(hiscores, names, binary_classifier, multi_classifier)
+
+        logger.debug("Sending response")
+        await req.post_prediction(output)
+
+        if len(hiscores) < config.BATCH_AMOUNT:
+            sleep = 60
+            logger.info(f"{len(hiscores)=} < {config.BATCH_AMOUNT=}, sleeping: {sleep}")
+            await asyncio.sleep(sleep)
+    return {"detail": "ok"}
+
+
+@app.get("/load")
+async def load(secret: str):
+    logger.debug("loading model")
+    global binary_classifier, multi_classifier
+    """
+        load the latest model.
+        This endpoint is used by the detector api to load the latest model.
+    """
+    if secret != config.secret_token:
+        raise HTTPException(status_code=404, detail="insufficient permissions")
+
+    binary_classifier = binary_classifier.load()
+    multi_classifier = multi_classifier.load()
+    return {"detail": "ok"}
+
+
+@app.get("/predict")
+async def predict_player(secret: str, hiscores, name: name) -> List[dict]:
+    """
+    predict one player.
+    This endpoint is used by the detector api to predict one player.
+    """
+    logger.debug(f"predicting player {name}")
+    if secret != config.secret_token:
+        raise HTTPException(status_code=404, detail="insufficient permissions")
+    name = pd.DataFrame(name.dict())
+    output = predict.predict(hiscores, name, binary_classifier, multi_classifier)
+    return output
+
+
+@app.get("/train")
+async def train(secret: str):
+    """
+    train a new model.
+    This endpoint is used by the detector api to train a new model.
+    """
+    logger.debug("training model")
+    if secret != config.secret_token:
+        raise HTTPException(status_code=404, detail="insufficient permissions")
+
+    labels = await req.get_labels()
+    players = []
+    hiscores = []
+
+    for label in labels:
+        if label["label"] not in config.LABELS:
+            continue
+
+        player_data = await req.get_player_data(label_id=label["id"])
+        players.extend(player_data)
+
+        hiscore_data = await req.get_hiscore_data(label_id=label["id"])
+        hiscores.extend(hiscore_data)
+
+    # parse hiscoreData
+    hiscoredata = data.hiscoreData(hiscores)
+    del hiscores
+
+    # get the desired features
+    features = hiscoredata.features()
+    del hiscoredata
+
+    # get players with binary target
+    player_data = data.playerData(players, labels).get(binary=True)
+
+    # merge features with target
+    features_labeled = features.merge(player_data, left_index=True, right_index=True)
+
+    # create train test data
+    x, y = features_labeled.iloc[:, :-1], features_labeled.iloc[:, -1]
+    train_x, test_x, train_y, test_y = train_test_split(
+        x, y, test_size=0.2, random_state=42, stratify=y
+    )
+
+    # train & score the model
+    binary_classifier.fit(train_x, train_y)
+    binary_classifier.score(test_y, test_x)
+
+    # save the model
+    binary_classifier.save()
+
+    # get players with multi target
+    player_data = data.playerData(players, labels).get(binary=False)
+
+    # merge features with target
+    features_labeled = features.merge(player_data, left_index=True, right_index=True)
+
+    # we need at least 100 users
+    to_little_data_labels = (
+        pd.DataFrame(features_labeled.iloc[:, -1].value_counts())
+        .query("target < 100")
+        .index
+    )
+    mask = ~(features_labeled["target"].isin(to_little_data_labels))
+    features_labeled = features_labeled[mask]
+
+    # create train test data
+    x, y = features_labeled.iloc[:, :-1], features_labeled.iloc[:, -1]
+    train_x, test_x, train_y, test_y = train_test_split(
+        x, y, test_size=0.2, random_state=42, stratify=y
+    )
+
+    # train & score the model
+    multi_classifier.fit(train_x, train_y)
+    multi_classifier.score(test_y, test_x)
+
+    # save the model
+    multi_classifier.save()
+    return {"detail": "ok"}
diff --git a/api/cogs/predict.py b/api/cogs/predict.py
index 5dc5f56..f7e8117 100644
--- a/api/cogs/predict.py
+++ b/api/cogs/predict.py
@@ -1,111 +1,111 @@
-import time
-from typing import List
-
-import numpy as np
-import pandas as pd
-from api import config
-from api.MachineLearning import data
-from api.MachineLearning.classifier import classifier
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-def predict(
-    hiscores,
-    names,
-    binary_classifier: classifier,
-    multi_classifier: classifier,
-) -> List[dict]:
-    """
-    This function takes in a list of hiscores, a list of names, and two classifiers.
-    It then predicts the probability of each hiscore being a bot or a real player.
-    It then returns a list of dictionaries with the predictions.
-    The predictions are based on the binary classifier, and the multi classifier.
-    The binary classifier is used to predict if the player is a real player or a bot.
-    The multi classifier is used to predict the type of bot.
-    If the binary classifier predicts that the player is a real player, then the multi classifier is not used.
-    If the binary classifier predicts that the player is a bot, then the multi classifier is used to predict the type of bot.
-    The output is a list of dictionaries with the predictions.
-    """
-    logger.debug("Predicting hiscores for players")
-    hiscores = data.hiscoreData(hiscores)
-    low_level = hiscores.df_low.index
-    hiscores = hiscores.features()
-
-    logger.debug("Predicting binary for players")
-    # binary prediction
-    binary_pred = binary_classifier.predict_proba(hiscores)
-    binary_pred = pd.DataFrame(
-        binary_pred, index=hiscores.index, columns=["Real_Player", "Unknown_bot"]
-    )
-
-    # multi prediction
-    logger.debug("Predicting multi for players")
-    multi_pred = multi_classifier.predict_proba(hiscores)
-    multi_pred = pd.DataFrame(
-        multi_pred, index=hiscores.index, columns=np.unique(config.LABELS)
-    )
-
-    # remove real players from multi
-    logger.debug("Removing real players from multi for players")
-    real_players = binary_pred.query("Real_Player > 0.5").index
-    mask = ~(multi_pred.index.isin(real_players))
-    multi_pred = multi_pred[mask]
-
-    # remove bots from real players
-    logger.debug("Removing bots from binary for players")
-    bots = multi_pred.index
-    mask = ~(binary_pred.index.isin(bots))
-    binary_pred = binary_pred[mask]
-
-    # combine binary & player_pred
-    logger.debug("Combining binary and multi for players")
-    output = pd.DataFrame(names).set_index("id")
-    output = output.merge(binary_pred, left_index=True, right_index=True, how="left")
-
-    output = output.merge(
-        multi_pred,
-        left_index=True,
-        right_index=True,
-        suffixes=["", "_multi"],
-        how="left",
-    )
-
-    # cleanup predictions
-    logger.debug("Cleaning up predictions for players")
-    mask = output["Real_Player"].isna()  # all multi class predictions
-
-    # cleanup multi suffixes
-    output.loc[mask, "Unknown_bot"] = output[mask]["Unknown_bot_multi"]
-    output.loc[mask, "Real_Player"] = output[mask]["Real_Player_multi"]
-
-    output.drop(columns=["Real_Player_multi", "Unknown_bot_multi"], inplace=True)
-    output.fillna(0, inplace=True)
-
-    # add Predictions, Predicted_confidence, created
-    logger.debug("Adding Predictions, Predicted_confidence, created for players")
-    columns = [c for c in output.columns if c != "name"]
-    output["Predicted_confidence"] = round(output[columns].max(axis=1) * 100, 2)
-    output["Prediction"] = output[columns].idxmax(axis=1)
-    output["created"] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
-    output.reset_index(inplace=True)
-
-    # low level player predictions are not accurate
-    logger.debug("Removing low level players for players")
-
-    mask = output["id"].isin(low_level)
-    output.loc[mask, "Prediction"] = "Stats_Too_Low"
-
-    len_too_low_players = len(output[output["Prediction"] == "Stats_Too_Low"])
-    logger.debug(f"Len low level players {len_too_low_players}")
-
-    # cut off name
-    output["name"] = output["name"].astype(str).str[:12]
-
-    # parsing values
-    output[columns] = round(output[columns] * 100, 2)
-
-    # convert output to dict
-    output = output.to_dict(orient="records")
-    return output
+import time
+from typing import List
+
+import numpy as np
+import pandas as pd
+from api import config
+from api.MachineLearning import data
+from api.MachineLearning.classifier import classifier
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def predict(
+    hiscores,
+    names,
+    binary_classifier: classifier,
+    multi_classifier: classifier,
+) -> List[dict]:
+    """
+    This function takes in a list of hiscores, a list of names, and two classifiers.
+    It then predicts the probability of each hiscore being a bot or a real player.
+    It then returns a list of dictionaries with the predictions.
+    The predictions are based on the binary classifier, and the multi classifier.
+    The binary classifier is used to predict if the player is a real player or a bot.
+    The multi classifier is used to predict the type of bot.
+    If the binary classifier predicts that the player is a real player, then the multi classifier is not used.
+    If the binary classifier predicts that the player is a bot, then the multi classifier is used to predict the type of bot.
+    The output is a list of dictionaries with the predictions.
+    """
+    logger.debug("Predicting hiscores for players")
+    hiscores = data.hiscoreData(hiscores)
+    low_level = hiscores.df_low.index
+    hiscores = hiscores.features()
+
+    logger.debug("Predicting binary for players")
+    # binary prediction
+    binary_pred = binary_classifier.predict_proba(hiscores)
+    binary_pred = pd.DataFrame(
+        binary_pred, index=hiscores.index, columns=["Real_Player", "Unknown_bot"]
+    )
+
+    # multi prediction
+    logger.debug("Predicting multi for players")
+    multi_pred = multi_classifier.predict_proba(hiscores)
+    multi_pred = pd.DataFrame(
+        multi_pred, index=hiscores.index, columns=np.unique(config.LABELS)
+    )
+
+    # remove real players from multi
+    logger.debug("Removing real players from multi for players")
+    real_players = binary_pred.query("Real_Player > 0.5").index
+    mask = ~(multi_pred.index.isin(real_players))
+    multi_pred = multi_pred[mask]
+
+    # remove bots from real players
+    logger.debug("Removing bots from binary for players")
+    bots = multi_pred.index
+    mask = ~(binary_pred.index.isin(bots))
+    binary_pred = binary_pred[mask]
+
+    # combine binary & player_pred
+    logger.debug("Combining binary and multi for players")
+    output = pd.DataFrame(names).set_index("id")
+    output = output.merge(binary_pred, left_index=True, right_index=True, how="left")
+
+    output = output.merge(
+        multi_pred,
+        left_index=True,
+        right_index=True,
+        suffixes=["", "_multi"],
+        how="left",
+    )
+
+    # cleanup predictions
+    logger.debug("Cleaning up predictions for players")
+    mask = output["Real_Player"].isna()  # all multi class predictions
+
+    # cleanup multi suffixes
+    output.loc[mask, "Unknown_bot"] = output[mask]["Unknown_bot_multi"]
+    output.loc[mask, "Real_Player"] = output[mask]["Real_Player_multi"]
+
+    output.drop(columns=["Real_Player_multi", "Unknown_bot_multi"], inplace=True)
+    output.fillna(0, inplace=True)
+
+    # add Predictions, Predicted_confidence, created
+    logger.debug("Adding Predictions, Predicted_confidence, created for players")
+    columns = [c for c in output.columns if c != "name"]
+    output["Predicted_confidence"] = round(output[columns].max(axis=1) * 100, 2)
+    output["Prediction"] = output[columns].idxmax(axis=1)
+    output["created"] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
+    output.reset_index(inplace=True)
+
+    # low level player predictions are not accurate
+    logger.debug("Removing low level players for players")
+
+    mask = output["id"].isin(low_level)
+    output.loc[mask, "Prediction"] = "Stats_Too_Low"
+
+    len_too_low_players = len(output[output["Prediction"] == "Stats_Too_Low"])
+    logger.debug(f"Len low level players {len_too_low_players}")
+
+    # cut off name
+    output["name"] = output["name"].astype(str).str[:12]
+
+    # parsing values
+    output[columns] = round(output[columns] * 100, 2)
+
+    # convert output to dict
+    output = output.to_dict(orient="records")
+    return output
diff --git a/api/cogs/requests.py b/api/cogs/requests.py
index 44f8869..b8e7d97 100644
--- a/api/cogs/requests.py
+++ b/api/cogs/requests.py
@@ -1,149 +1,154 @@
-import logging
-import api.config as config
-import aiohttp
-import asyncio
-
-logger = logging.getLogger(__name__)
-
-
-# Define an asynchronous function to make a secure HTTP GET request
-async def make_request(url: str, params: dict, headers: dict = {}) -> list[dict]:
-    # Create a secure copy of the parameters by adding a placeholder for the token
-    _secure_params = params.copy()
-    _secure_params["token"] = "***"
-
-    # Log the URL and secure parameters for debugging
-    logger.info({"url": url.split("/v")[-1], "params": _secure_params})
-
-    # Use aiohttp to make an asynchronous GET request
-    async with aiohttp.ClientSession() as session:
-        async with session.get(url=url, params=params, headers=headers) as resp:
-            # Check if the response status is OK (200)
-            if not resp.ok:
-                error_message = (
-                    f"response status {resp.status} "
-                    f"response body: {await resp.text()}"
-                )
-                # Log the error message and raise a ValueError
-                logger.error(error_message)
-                raise ValueError(error_message)
-
-            # Parse the response JSON and return the data
-            data = await resp.json()
-            return data
-
-
-# Define an asynchronous function to retry a request until it succeeds or raise an exception on failure
-async def retry_request(url: str, params: dict) -> list[dict]:
-    while True:
-        try:
-            # Attempt to make the request
-            data = await make_request(url, params)
-
-            # If data is received, return it
-            if data:
-                return data
-        except Exception as e:
-            # Log the error and wait for 15 seconds before retrying
-            _secure_params = params.copy()
-            _secure_params["token"] = "***"
-            logger.error({"url": url, "params": _secure_params, "error": str(e)})
-            await asyncio.sleep(15)
-
-
-# Define an asynchronous function to get labels from an API
-async def get_labels():
-    # Construct the URL and parameters for the request
-    url = f"{config.detector_api}/v1/label"
-    params = {
-        "token": config.token,
-    }
-
-    # Retry the request until it succeeds and return the data
-    data = await retry_request(url=url, params=params)
-    return data
-
-
-async def get_player_data(label_id: int, limit: int = 5000):
-    url = "http://private-api-svc.bd-prd.svc:5000/v2/player"
-
-    params = {
-        "player_id": 1,
-        "label_id": label_id,
-        "greater_than": 1,
-        "limit": limit,
-    }
-
-    # Initialize a list to store player data
-    players = []
-
-    # Continue making requests until all data is retrieved
-    while True:
-        data = await retry_request(url=url, params=params)
-        players.extend(data)
-
-        logger.info(f"received: {len(data)}, in total {len(players)}")
-
-        # Check if the received data is less than the row count, indicating the end of data
-        if len(data) < limit:
-            break
-
-        # Increment the page parameter for the next request
-        params["player_id"] = data[-1]["id"]
-
-    return players
-
-
-async def get_hiscore_data(label_id: int, limit: int = 5000):
-    url = "http://private-api-svc.bd-prd.svc:5000/v2/highscore/latest"  # TODO: fix hardcoded
-    params = {"player_id": 1, "label_id": label_id, "many": 1, "limit": limit}
-
-    # Initialize a list to store hiscore data
-    hiscores = []
-
-    # Continue making requests until all data is retrieved
-    while True:
-        data = await retry_request(url=url, params=params)
-        hiscores.extend(data)
-
-        logger.info(f"received: {len(data)}, in total {len(hiscores)}")
-
-        # Check if the received data is less than the row count, indicating the end of data
-        if len(data) < limit:
-            break
-
-        # Increment the page parameter for the next request
-        params["player_id"] = data[-1]["Player_id"]
-
-    return hiscores
-
-
-async def get_prediction_data(player_id: int = 0, limit: int = 0):
-    url = "http://private-api-svc.bd-prd.svc:5000/v2/highscore/latest"  # TODO: fix hardcoded
-    params = {"player_id": player_id, "many": 1, "limit": limit}
-
-    data = await retry_request(url=url, params=params)
-    return data
-
-
-async def post_prediction(data: list[dict]):
-    url = f"{config.detector_api}/v1/prediction"
-    params = {"token": config.token}
-
-    while True:
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(url=url, params=params, json=data) as resp:
-                    if not resp.ok:
-                        error_message = (
-                            f"response status {resp.status} "
-                            f"response body: {await resp.text()}"
-                        )
-                        # Log the error message and raise a ValueError
-                        logger.error(error_message)
-                        await asyncio.sleep(15)
-                        continue
-                    break
-        except Exception as e:
-            logger.error(str(e))
-            await asyncio.sleep(60)
+import logging
+import api.config as config
+import aiohttp
+import asyncio
+
+logger = logging.getLogger(__name__)
+
+
+# Define an asynchronous function to make a secure HTTP GET request
+async def make_request(url: str, params: dict, headers: dict = {}) -> list[dict]:
+    # Create a secure copy of the parameters by adding a placeholder for the token
+    _secure_params = params.copy()
+    _secure_params["token"] = "***"
+
+    # Log the URL and secure parameters for debugging
+    logger.info({"url": url.split("/v")[-1], "params": _secure_params})
+
+    # Use aiohttp to make an asynchronous GET request
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url=url, params=params, headers=headers) as resp:
+            # Check if the response status is OK (200)
+            if not resp.ok:
+                error_message = (
+                    f"response status {resp.status} "
+                    f"response body: {await resp.text()}"
+                )
+                # Log the error message and raise a ValueError
+                logger.error(error_message)
+                raise ValueError(error_message)
+
+            # Parse the response JSON and return the data
+            data = await resp.json()
+            return data
+
+
+# Define an asynchronous function to retry a request until it succeeds or raise an exception on failure
+async def retry_request(url: str, params: dict) -> list[dict]:
+    max_retry = 3
+    retry = 0
+    while True:
+        if max_retry == retry:
+            break
+        try:
+            # Attempt to make the request
+            data = await make_request(url, params)
+
+            # If data is received, return it
+            if data:
+                return data
+        except Exception as e:
+            # Log the error and wait for 15 seconds before retrying
+            _secure_params = params.copy()
+            _secure_params["token"] = "***"
+            logger.error({"url": url, "params": _secure_params, "error": str(e)})
+            await asyncio.sleep(15)
+            retry += 1
+
+
+# Define an asynchronous function to get labels from an API
+async def get_labels():
+    # Construct the URL and parameters for the request
+    url = f"{config.detector_api}/v1/label"
+    params = {
+        "token": config.token,
+    }
+
+    # Retry the request until it succeeds and return the data
+    data = await retry_request(url=url, params=params)
+    return data
+
+
+async def get_player_data(label_id: int, limit: int = 5000):
+    url = "http://private-api-svc.bd-prd.svc:5000/v2/player"
+
+    params = {
+        "player_id": 1,
+        "label_id": label_id,
+        "greater_than": 1,
+        "limit": limit,
+    }
+
+    # Initialize a list to store player data
+    players = []
+
+    # Continue making requests until all data is retrieved
+    while True:
+        data = await retry_request(url=url, params=params)
+        players.extend(data)
+
+        logger.info(f"received: {len(data)}, in total {len(players)}")
+
+        # Check if the received data is less than the row count, indicating the end of data
+        if len(data) < limit:
+            break
+
+        # Increment the page parameter for the next request
+        params["player_id"] = data[-1]["id"]
+
+    return players
+
+
+async def get_hiscore_data(label_id: int, limit: int = 5000):
+    url = "http://private-api-svc.bd-prd.svc:5000/v2/highscore/latest"  # TODO: fix hardcoded
+    params = {"player_id": 1, "label_id": label_id, "many": 1, "limit": limit}
+
+    # Initialize a list to store hiscore data
+    hiscores = []
+
+    # Continue making requests until all data is retrieved
+    while True:
+        data = await retry_request(url=url, params=params)
+        hiscores.extend(data)
+
+        logger.info(f"received: {len(data)}, in total {len(hiscores)}")
+
+        # Check if the received data is less than the row count, indicating the end of data
+        if len(data) < limit:
+            break
+
+        # Increment the page parameter for the next request
+        params["player_id"] = data[-1]["Player_id"]
+
+    return hiscores
+
+
+async def get_prediction_data(player_id: int = 0, limit: int = 0):
+    url = "http://private-api-svc.bd-prd.svc:5000/v2/highscore/latest"  # TODO: fix hardcoded
+    params = {"player_id": player_id, "many": 1, "limit": limit}
+
+    data = await retry_request(url=url, params=params)
+    return data
+
+
+async def post_prediction(data: list[dict]):
+    url = f"{config.detector_api}/v1/prediction"
+    params = {"token": config.token}
+
+    while True:
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(url=url, params=params, json=data) as resp:
+                    if not resp.ok:
+                        error_message = (
+                            f"response status {resp.status} "
+                            f"response body: {await resp.text()}"
+                        )
+                        # Log the error message and raise a ValueError
+                        logger.error(error_message)
+                        await asyncio.sleep(15)
+                        continue
+                    break
+        except Exception as e:
+            logger.error(str(e))
+            await asyncio.sleep(60)
diff --git a/notes.md b/notes.md
index 338539f..819573f 100644
--- a/notes.md
+++ b/notes.md
@@ -1,54 +1,54 @@
-
-# api documentation
-```sh
-http://127.0.0.1:8000/docs
-http://127.0.0.1:8000/redoc
-```
-# extra info
-```sh
-POST: to create data.
-GET: to read data.
-PUT: to update data.
-DELETE: to delete data.
-```
-# keeping fork up to date
-```sh
-git checkout develop
-git pull --rebase upstream develop
-git push
-```
-# setup
-## windows
-creating a python venv to work in and install the project requirements
-```sh
-python -m venv .venv
-.venv\Scripts\activate
-python -m pip install --upgrade pip
-pip install -r requirements.txt
-```
-## linux
-```sh
-python3 -m venv .venv
-source .venv/bin/activate
-python -m pip install --upgrade pip
-pip install -r requirements.txt
-```
-# for admin purposes saving & upgrading
-when you added some dependancies update the requirements
-```sh
-venv\Scripts\activate
-call pip freeze > requirements.txt
-```
-when you want to upgrade the dependancies
-```sh
-venv\Scripts\activate
-powershell "(Get-Content requirements.txt) | ForEach-Object { $_ -replace '==', '>=' } | Set-Content requirements.txt"
-call pip install -r requirements.txt --upgrade
-call pip freeze > requirements.txt
-powershell "(Get-Content requirements.txt) | ForEach-Object { $_ -replace '>=', '==' } | Set-Content requirements.txt"
-```
-# branch cleanup
-if your branch gets out of sync and for some reason you have many pushes and pulls, to become insync without pushing some random changes do this
-```sh
-git fetch https://github.com/Bot-detector/bot-detector-ML.git
+
+# api documentation
+```sh
+http://127.0.0.1:8000/docs
+http://127.0.0.1:8000/redoc
+```
+# extra info
+```sh
+POST: to create data.
+GET: to read data.
+PUT: to update data.
+DELETE: to delete data.
+```
+# keeping fork up to date
+```sh
+git checkout develop
+git pull --rebase upstream develop
+git push
+```
+# setup
+## windows
+creating a python venv to work in and install the project requirements
+```sh
+python -m venv .venv
+.venv\Scripts\activate
+python -m pip install --upgrade pip
+pip install -r requirements.txt
+```
+## linux
+```sh
+python3 -m venv .venv
+source .venv/bin/activate
+python -m pip install --upgrade pip
+pip install -r requirements.txt
+```
+# for admin purposes saving & upgrading
+when you added some dependancies update the requirements
+```sh
+venv\Scripts\activate
+call pip freeze > requirements.txt
+```
+when you want to upgrade the dependancies
+```sh
+venv\Scripts\activate
+powershell "(Get-Content requirements.txt) | ForEach-Object { $_ -replace '==', '>=' } | Set-Content requirements.txt"
+call pip install -r requirements.txt --upgrade
+call pip freeze > requirements.txt
+powershell "(Get-Content requirements.txt) | ForEach-Object { $_ -replace '>=', '==' } | Set-Content requirements.txt"
+```
+# branch cleanup
+if your branch gets out of sync and for some reason you have many pushes and pulls, to become insync without pushing some random changes do this
+```sh
+git fetch https://github.com/Bot-detector/bot-detector-ML.git
 ``` 
\ No newline at end of file