Skip to content

Commit

Permalink
progress
Browse files Browse the repository at this point in the history
  • Loading branch information
extreme4all committed Apr 9, 2024
1 parent f429011 commit 6ff660a
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 47 deletions.
13 changes: 8 additions & 5 deletions api/MachineLearning/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,21 @@
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (balanced_accuracy_score, classification_report,
roc_auc_score)
from sklearn.metrics import (
balanced_accuracy_score,
classification_report,
roc_auc_score,
)

logger = logging.getLogger(__name__)


class classifier(RandomForestClassifier):
"""
This class is a wrapper for RandomForestClassifier.
It adds the ability to save and load the model.
"""

working_directory = os.path.dirname(os.path.realpath(__file__))
path = os.path.join(working_directory, "models")
if not os.path.exists(path):
Expand Down Expand Up @@ -56,7 +61,6 @@ def __best_file_path(self, startwith: str):
# add dict to array
files.append(d)


if not files:
return None

Expand Down Expand Up @@ -96,7 +100,6 @@ def save(self):
compress=3,
)


def score(self, test_y, test_x):
"""
Calculate the accuracy and roc_auc score for the classifier.
Expand All @@ -121,4 +124,4 @@ def score(self, test_y, test_x):
labels = ["Not bot", "bot"] if len(labels) == 2 else labels

logger.info(classification_report(test_y, pred_y, target_names=labels))
return self.accuracy, self.roc_auc
return self.accuracy, self.roc_auc
4 changes: 2 additions & 2 deletions api/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import logging
from datetime import date
from typing import List

import pandas as pd
Expand All @@ -11,7 +12,6 @@
from api.cogs import predict
from api.cogs import requests as req
from api.MachineLearning import classifier, data
from datetime import date

app = config.app

Expand Down Expand Up @@ -47,7 +47,7 @@ async def root():
"""
This endpoint is used to check if the api is running.
"""
return {"detail": "hello worldz"}
return {"detail": "hello world"}


@app.get("/startup")
Expand Down
3 changes: 2 additions & 1 deletion api/cogs/predict.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging
import time
from typing import List

import numpy as np
import pandas as pd

from api import config
from api.MachineLearning import data
from api.MachineLearning.classifier import classifier
import logging

logger = logging.getLogger(__name__)

Expand Down
18 changes: 8 additions & 10 deletions api/cogs/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

import aiohttp

import api.config as config

logger = logging.getLogger(__name__)
Expand All @@ -14,17 +15,14 @@ async def make_request(url: str, params: dict, headers: dict = {}) -> list[dict]
_secure_params["token"] = "***"

# Log the URL and secure parameters for debugging
logger.info({"url": url.split("/v")[-1], "params": _secure_params})
logger.info({"url": f"v{url.split('/v')[-1]}", "params": _secure_params})

# Use aiohttp to make an asynchronous GET request
async with aiohttp.ClientSession() as session:
async with session.get(url=url, params=params, headers=headers) as resp:
# Check if the response status is OK (200)
if not resp.ok:
error_message = (
f"response status {resp.status} "
f"response body: {await resp.text()}"
)
error_message = {"status": resp.status, "body": await resp.text()}
# Log the error message and raise a ValueError
logger.error(error_message)
raise ValueError(error_message)
Expand Down Expand Up @@ -53,14 +51,14 @@ async def retry_request(url: str, params: dict) -> list[dict]:
_secure_params = params.copy()
_secure_params["token"] = "***"
logger.error({"url": url, "params": _secure_params, "error": str(e)})
await asyncio.sleep(15)
retry += 1
await asyncio.sleep(15)
retry += 1


# Define an asynchronous function to get labels from an API
async def get_labels():
# Construct the URL and parameters for the request
url = f"{config.detector_api}/label"
url = f"{config.detector_api}/v1/label"
params = {
"token": config.token,
}
Expand Down Expand Up @@ -101,7 +99,7 @@ async def get_player_data(label_id: int, limit: int = 5000):


async def get_hiscore_data(label_id: int, limit: int = 5000):
url = f"{config.private_api}/v3/highscore/latest"
url = f"{config.private_api}/v2/highscore/latest"
params = {"player_id": 1, "label_id": label_id, "many": 1, "limit": limit}

# Initialize a list to store hiscore data
Expand All @@ -125,7 +123,7 @@ async def get_hiscore_data(label_id: int, limit: int = 5000):


async def get_prediction_data(player_id: int = 0, limit: int = 0):
url = f"{config.private_api}/v3/highscore/latest"
url = f"{config.private_api}/v2/highscore/latest"
params = {"player_id": player_id, "many": 1, "limit": limit}

data = await retry_request(url=url, params=params)
Expand Down
43 changes: 29 additions & 14 deletions api/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os
import sys
Expand All @@ -9,33 +10,47 @@
load_dotenv(find_dotenv(), verbose=True)

# get env variables
# TODO: convert to pydantid_settings
token = os.environ.get("token")
detector_api = os.environ.get("detector_api")
secret_token = os.environ.get("secret_token")
private_api = os.environ.get("private_api")

assert token is not None
assert detector_api is not None
assert secret_token is not None
assert private_api is not None

# TODO: move to app.py // rename that to server.py
app = FastAPI()

# TODO: move to logging_config.py
# setup logging
logger = logging.getLogger()
file_handler = logging.FileHandler(filename="error.log", mode="a")
stream_handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(
json.dumps(
{
"ts": "%(asctime)s",
"name": "%(name)s",
"function": "%(funcName)s",
"level": "%(levelname)s",
"msg": json.dumps("%(message)s"),
}
)
)

logging.basicConfig(filename="error.log", level=logging.DEBUG)
stream_handler = logging.StreamHandler(sys.stdout)

# log formatting
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
file_handler.setFormatter(formatter)
stream_handler.setFormatter(formatter)

# add handler
logger.addHandler(file_handler)
logger.addHandler(stream_handler)
handlers = [stream_handler]

logging.basicConfig(level=logging.DEBUG, handlers=handlers)


logging.getLogger("requests").setLevel(logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("uvicorn").setLevel(logging.DEBUG)
logging.getLogger("uvicorn.error").propagate = False
# logging.getLogger("requests").setLevel(logging.DEBUG)
# logging.getLogger("urllib3").setLevel(logging.WARNING)
# logging.getLogger("uvicorn").setLevel(logging.DEBUG)
# logging.getLogger("uvicorn.error").propagate = False

BATCH_AMOUNT = 5_000

Expand Down
17 changes: 9 additions & 8 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '3.8'
services:
mysql:
container_name: database
Expand Down Expand Up @@ -47,7 +46,8 @@ services:
networks:
- botdetector-network
depends_on:
- mysql
mysql_setup:
condition: service_completed_successfully

private_api:
image: quay.io/bot_detector/private-api:4d70b82
Expand All @@ -67,7 +67,8 @@ services:
- POOL_RECYCLE=60
- POOL_TIMEOUT=30
depends_on:
- mysql
mysql_setup:
condition: service_completed_successfully

machine_learning:
container_name: bd-ml
Expand All @@ -80,14 +81,14 @@ services:
api_port: 8000
command: uvicorn api.app:app --host 0.0.0.0 --reload --reload-include api/*
environment:
- token = verify_ban
- secret_token = super_secret_token
- private_api = private_api:5000
- detector_api = core_api:5000
- token=verify_ban
- secret_token=super_secret_token
- private_api=http://private_api:5000
- detector_api=http://core_api:5000
volumes:
- ./api:/project/api
ports:
- 8000:8000
- 5003:8000
networks:
- botdetector-network
depends_on:
Expand Down
35 changes: 31 additions & 4 deletions mysql/docker-entrypoint-initdb.d/01_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,37 @@ CREATE TABLE Labels (
UNIQUE INDEX Unique_label USING BTREE (label) VISIBLE
);

DELIMITER //

CREATE TRIGGER `sd_latest` AFTER INSERT ON `scraper_data` FOR EACH ROW
BEGIN
DECLARE latest_created_at DATETIME;

-- Get the latest created_at from scraper_data_latest for the current player_id
SELECT created_at INTO latest_created_at
FROM scraper_data_latest
WHERE player_id = NEW.player_id;

IF latest_created_at IS NULL THEN
INSERT INTO scraper_data_latest (scraper_id, created_at, player_id)
VALUES (NEW.scraper_id, NEW.created_at, NEW.player_id)
ON DUPLICATE KEY UPDATE
scraper_id = NEW.scraper_id,
created_at = NEW.created_at;
ELSEIF NEW.created_at > latest_created_at THEN
UPDATE scraper_data_latest
SET
scraper_id = NEW.scraper_id,
created_at = NEW.created_at
WHERE player_id = NEW.player_id;
END IF;
END //

DELIMITER ;
-- -----------------------------------------------------
-- Table `playerdata`.`apiPermissions`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `playerdata`.`apiPermissions` (
CREATE TABLE `playerdata`.`apiPermissions` (
`id` INT NOT NULL AUTO_INCREMENT,
`permission` TEXT NOT NULL,
PRIMARY KEY (`id`)
Expand All @@ -464,7 +491,7 @@ CREATE TABLE IF NOT EXISTS `playerdata`.`apiPermissions` (
-- -----------------------------------------------------
-- Table `playerdata`.`apiUser`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `playerdata`.`apiUser` (
CREATE TABLE `playerdata`.`apiUser` (
`id` INT NOT NULL AUTO_INCREMENT,
`username` TINYTEXT NOT NULL,
`token` TINYTEXT NOT NULL,
Expand All @@ -481,7 +508,7 @@ CREATE TABLE IF NOT EXISTS `playerdata`.`apiUser` (
-- -----------------------------------------------------
-- Table `playerdata`.`apiUsage`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `playerdata`.`apiUsage` (
CREATE TABLE `playerdata`.`apiUsage` (
`id` BIGINT NOT NULL AUTO_INCREMENT,
`user_id` INT NOT NULL,
`timestamp` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
Expand All @@ -499,7 +526,7 @@ CREATE TABLE IF NOT EXISTS `playerdata`.`apiUsage` (
-- -----------------------------------------------------
-- Table `playerdata`.`apiUserPerms`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `playerdata`.`apiUserPerms` (
CREATE TABLE `playerdata`.`apiUserPerms` (
`id` INT NOT NULL AUTO_INCREMENT,
`user_id` INT NOT NULL,
`permission_id` INT NOT NULL,
Expand Down
6 changes: 3 additions & 3 deletions mysql_setup/setup_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def random_date():


class Labels(Base):
__tablename__ = "labels"
__tablename__ = "Labels"

id = Column(Integer, primary_key=True)
label = Column(String)
Expand All @@ -142,8 +142,8 @@ class Labels(Base):
label_ids = [id[0] for id in label_ids] # Convert list of tuples to list of ids

# Insert data into Players table
len_players = 250
for i in range(250):
len_players = 500
for i in range(len_players):
print(f"Player_{i}")
# Check if the player already exists
existing_player = session.query(Players).filter_by(name=f"Player_{i}").first()
Expand Down

0 comments on commit 6ff660a

Please sign in to comment.