From 9207ab6aa0fd2cdc12155d6394e5f9b9e7520dd3 Mon Sep 17 00:00:00 2001 From: extreme4all <40169115+extreme4all@users.noreply.github.com> Date: Sun, 1 Sep 2024 20:05:45 +0200 Subject: [PATCH] Bf/v3 data model (#21) * fixed tables * fixed setup_mysql * save * working --- .../docker-entrypoint-initdb.d/01_tables.sql | 343 +++--------------- mysql/docker-entrypoint-initdb.d/02_data.sql | 132 ++----- mysql_setup/__init__.py | 0 mysql_setup/models.py | 179 +++++++++ mysql_setup/setup_mysql.py | 230 +----------- src/api/v3/highscore.py | 74 ++-- src/app/repositories/scraper_data.py | 166 +++++---- src/core/database/models/scraper_data_v3.py | 107 ++++++ 8 files changed, 532 insertions(+), 699 deletions(-) create mode 100644 mysql_setup/__init__.py create mode 100644 mysql_setup/models.py create mode 100644 src/core/database/models/scraper_data_v3.py diff --git a/mysql/docker-entrypoint-initdb.d/01_tables.sql b/mysql/docker-entrypoint-initdb.d/01_tables.sql index 3981893..1ae2a3f 100644 --- a/mysql/docker-entrypoint-initdb.d/01_tables.sql +++ b/mysql/docker-entrypoint-initdb.d/01_tables.sql @@ -1,5 +1,13 @@ USE playerdata; +CREATE TABLE Labels ( + id int NOT NULL AUTO_INCREMENT, + label varchar(50) NOT NULL, + PRIMARY KEY (id), + UNIQUE KEY Unique_label (label) USING BTREE +) +; + CREATE TABLE Players ( id INT PRIMARY KEY AUTO_INCREMENT, name TEXT, @@ -184,300 +192,63 @@ CREATE TABLE playerHiscoreData ( CONSTRAINT FK_Players_id FOREIGN KEY (Player_id) REFERENCES Players (id) ON DELETE RESTRICT ON UPDATE RESTRICT ); -CREATE TABLE playerHiscoreDataLatest ( - id bigint NOT NULL AUTO_INCREMENT, - timestamp datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - ts_date date DEFAULT NULL, - Player_id int NOT NULL, - total bigint DEFAULT NULL, - attack int DEFAULT NULL, - defence int DEFAULT NULL, - strength int DEFAULT NULL, - hitpoints int DEFAULT NULL, - ranged int DEFAULT NULL, - prayer int DEFAULT NULL, - magic int DEFAULT NULL, - cooking int DEFAULT NULL, - woodcutting int DEFAULT NULL, - fletching int DEFAULT NULL, - fishing int DEFAULT NULL, - firemaking int DEFAULT NULL, - crafting int DEFAULT NULL, - smithing int DEFAULT NULL, - mining int DEFAULT NULL, - herblore int DEFAULT NULL, - agility int DEFAULT NULL, - thieving int DEFAULT NULL, - slayer int DEFAULT NULL, - farming int DEFAULT NULL, - runecraft int DEFAULT NULL, - hunter int DEFAULT NULL, - construction int DEFAULT NULL, - league int DEFAULT NULL, - bounty_hunter_hunter int DEFAULT NULL, - bounty_hunter_rogue int DEFAULT NULL, - cs_all int DEFAULT NULL, - cs_beginner int DEFAULT NULL, - cs_easy int DEFAULT NULL, - cs_medium int DEFAULT NULL, - cs_hard int DEFAULT NULL, - cs_elite int DEFAULT NULL, - cs_master int DEFAULT NULL, - lms_rank int DEFAULT NULL, - soul_wars_zeal int DEFAULT NULL, - abyssal_sire int DEFAULT NULL, - alchemical_hydra int DEFAULT NULL, - barrows_chests int DEFAULT NULL, - bryophyta int DEFAULT NULL, - callisto int DEFAULT NULL, - cerberus int DEFAULT NULL, - chambers_of_xeric int DEFAULT NULL, - chambers_of_xeric_challenge_mode int DEFAULT NULL, - chaos_elemental int DEFAULT NULL, - chaos_fanatic int DEFAULT NULL, - commander_zilyana int DEFAULT NULL, - corporeal_beast int DEFAULT NULL, - crazy_archaeologist int DEFAULT NULL, - dagannoth_prime int DEFAULT NULL, - dagannoth_rex int DEFAULT NULL, - dagannoth_supreme int DEFAULT NULL, - deranged_archaeologist int DEFAULT NULL, - general_graardor int DEFAULT NULL, - giant_mole int DEFAULT NULL, - grotesque_guardians int DEFAULT NULL, - hespori int DEFAULT NULL, - kalphite_queen int DEFAULT NULL, - king_black_dragon int DEFAULT NULL, - kraken int DEFAULT NULL, - kreearra int DEFAULT NULL, - kril_tsutsaroth int DEFAULT NULL, - mimic int DEFAULT NULL, - nex int DEFAULT NULL, - nightmare int DEFAULT NULL, - phosanis_nightmare int DEFAULT NULL, - obor int DEFAULT NULL, - phantom_muspah int DEFAULT NULL, - sarachnis int DEFAULT NULL, - scorpia int DEFAULT NULL, - skotizo int DEFAULT NULL, - Tempoross int DEFAULT NULL, - the_gauntlet int DEFAULT NULL, - the_corrupted_gauntlet int DEFAULT NULL, - theatre_of_blood int DEFAULT NULL, - theatre_of_blood_hard int DEFAULT NULL, - thermonuclear_smoke_devil int DEFAULT NULL, - tombs_of_amascut int DEFAULT NULL, - tombs_of_amascut_expert int DEFAULT NULL, - tzkal_zuk int DEFAULT NULL, - tztok_jad int DEFAULT NULL, - venenatis int DEFAULT NULL, - vetion int DEFAULT NULL, - vorkath int DEFAULT NULL, - wintertodt int DEFAULT NULL, - zalcano int DEFAULT NULL, - zulrah int DEFAULT NULL, - rifts_closed int DEFAULT '0', - artio int DEFAULT '0', - calvarion int DEFAULT '0', - duke_sucellus int DEFAULT '0', - spindel int DEFAULT '0', - the_leviathan int DEFAULT '0', - the_whisperer int DEFAULT '0', - vardorvis int DEFAULT '0', - PRIMARY KEY (id), - UNIQUE KEY Unique_player (Player_id) USING BTREE, - UNIQUE KEY idx_playerHiscoreDataLatest_Player_id_timestamp (Player_id,timestamp), - UNIQUE KEY idx_playerHiscoreDataLatest_Player_id_ts_date (Player_id,ts_date), - CONSTRAINT FK_latest_player FOREIGN KEY (Player_id) REFERENCES Players (id) ON DELETE RESTRICT ON UPDATE RESTRICT -); -CREATE TABLE playerHiscoreDataXPChange ( - id bigint NOT NULL AUTO_INCREMENT, - timestamp datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - ts_date date DEFAULT NULL, - Player_id int NOT NULL, - total bigint DEFAULT NULL, - attack int DEFAULT NULL, - defence int DEFAULT NULL, - strength int DEFAULT NULL, - hitpoints int DEFAULT NULL, - ranged int DEFAULT NULL, - prayer int DEFAULT NULL, - magic int DEFAULT NULL, - cooking int DEFAULT NULL, - woodcutting int DEFAULT NULL, - fletching int DEFAULT NULL, - fishing int DEFAULT NULL, - firemaking int DEFAULT NULL, - crafting int DEFAULT NULL, - smithing int DEFAULT NULL, - mining int DEFAULT NULL, - herblore int DEFAULT NULL, - agility int DEFAULT NULL, - thieving int DEFAULT NULL, - slayer int DEFAULT NULL, - farming int DEFAULT NULL, - runecraft int DEFAULT NULL, - hunter int DEFAULT NULL, - construction int DEFAULT NULL, - league int DEFAULT NULL, - bounty_hunter_hunter int DEFAULT NULL, - bounty_hunter_rogue int DEFAULT NULL, - cs_all int DEFAULT NULL, - cs_beginner int DEFAULT NULL, - cs_easy int DEFAULT NULL, - cs_medium int DEFAULT NULL, - cs_hard int DEFAULT NULL, - cs_elite int DEFAULT NULL, - cs_master int DEFAULT NULL, - lms_rank int DEFAULT NULL, - soul_wars_zeal int DEFAULT NULL, - abyssal_sire int DEFAULT NULL, - alchemical_hydra int DEFAULT NULL, - barrows_chests int DEFAULT NULL, - bryophyta int DEFAULT NULL, - callisto int DEFAULT NULL, - cerberus int DEFAULT NULL, - chambers_of_xeric int DEFAULT NULL, - chambers_of_xeric_challenge_mode int DEFAULT NULL, - chaos_elemental int DEFAULT NULL, - chaos_fanatic int DEFAULT NULL, - commander_zilyana int DEFAULT NULL, - corporeal_beast int DEFAULT NULL, - crazy_archaeologist int DEFAULT NULL, - dagannoth_prime int DEFAULT NULL, - dagannoth_rex int DEFAULT NULL, - dagannoth_supreme int DEFAULT NULL, - deranged_archaeologist int DEFAULT NULL, - general_graardor int DEFAULT NULL, - giant_mole int DEFAULT NULL, - grotesque_guardians int DEFAULT NULL, - hespori int DEFAULT NULL, - kalphite_queen int DEFAULT NULL, - king_black_dragon int DEFAULT NULL, - kraken int DEFAULT NULL, - kreearra int DEFAULT NULL, - kril_tsutsaroth int DEFAULT NULL, - mimic int DEFAULT NULL, - nex int DEFAULT NULL, - nightmare int DEFAULT NULL, - obor int DEFAULT NULL, - phantom_muspah int DEFAULT NULL, - phosanis_nightmare int DEFAULT NULL, - sarachnis int DEFAULT NULL, - scorpia int DEFAULT NULL, - skotizo int DEFAULT NULL, - Tempoross int DEFAULT NULL, - the_gauntlet int DEFAULT NULL, - the_corrupted_gauntlet int DEFAULT NULL, - theatre_of_blood int DEFAULT NULL, - theatre_of_blood_hard int DEFAULT NULL, - thermonuclear_smoke_devil int DEFAULT NULL, - tzkal_zuk int DEFAULT NULL, - tztok_jad int DEFAULT NULL, - venenatis int DEFAULT NULL, - vetion int DEFAULT NULL, - vorkath int DEFAULT NULL, - wintertodt int DEFAULT NULL, - zalcano int DEFAULT NULL, - zulrah int DEFAULT NULL, - rifts_closed int DEFAULT '0', - artio int DEFAULT '0', - calvarion int DEFAULT '0', - duke_sucellus int DEFAULT '0', - spindel int DEFAULT '0', - the_leviathan int DEFAULT '0', - the_whisperer int DEFAULT '0', - vardorvis int DEFAULT '0', - PRIMARY KEY (id), - KEY IDX_xpChange_Player_id_timestamp (Player_id,timestamp) USING BTREE, - KEY IDX_xpChange_Player_id_ts_date (Player_id,ts_date) USING BTREE, - CONSTRAINT fk_phd_xp_pl FOREIGN KEY (Player_id) REFERENCES Players (id) ON DELETE RESTRICT ON UPDATE RESTRICT +/* +-- V3 +*/ +CREATE TABLE skill ( + skill_id tinyint unsigned NOT NULL AUTO_INCREMENT, + skill_name varchar(50) NOT NULL, + PRIMARY KEY (skill_id), + UNIQUE KEY unique_skill_name (skill_name) ); -CREATE TABLE `scraper_data` ( - `scraper_id` bigint unsigned NOT NULL AUTO_INCREMENT, - `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - `player_id` int unsigned NOT NULL, - `record_date` date GENERATED ALWAYS AS (cast(`created_at` as date)) STORED, - PRIMARY KEY (`scraper_id`), - UNIQUE KEY `unique_player_per_day` (`player_id`,`record_date`) +CREATE TABLE activity ( + activity_id tinyint unsigned NOT NULL AUTO_INCREMENT, + activity_name varchar(50) NOT NULL, + PRIMARY KEY (activity_id), + UNIQUE KEY unique_activity_name (activity_name) ); -CREATE TABLE `scraper_data_latest` ( - `scraper_id` bigint unsigned NOT NULL AUTO_INCREMENT, - `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, - `record_date` date GENERATED ALWAYS AS (cast(`created_at` as date)) STORED, - `player_id` int unsigned NOT NULL, - PRIMARY KEY (`player_id`), - KEY `idx_scraper_id` (`scraper_id`), - KEY `idx_record_date` (`record_date`) +CREATE TABLE player_skill ( + player_skill_id BIGINT unsigned NOT NULL AUTO_INCREMENT, + skill_id tinyint unsigned NOT NULL, + skill_value int unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (player_skill_id), + UNIQUE KEY unique_skill_value (skill_id, skill_value) ); - - -CREATE TABLE skills ( - skill_id TINYINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, # < 255 - skill_name VARCHAR(50) NOT NULL, - UNIQUE KEY unique_skill_name (skill_name) -); -CREATE TABLE activities ( - activity_id TINYINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, # < 255 - activity_name VARCHAR(50) NOT NULL, - UNIQUE KEY unique_activity_name (activity_name) +CREATE TABLE player_activity ( + player_activity_id bigint unsigned NOT NULL AUTO_INCREMENT, + activity_id tinyint unsigned NOT NULL, + activity_value int unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (player_activity_id), + UNIQUE KEY unique_activity_value (activity_id, activity_value) ); - -CREATE TABLE player_skills ( - scraper_id BIGINT UNSIGNED NOT NULL, - skill_id TINYINT UNSIGNED NOT NULL, - skill_value INT UNSIGNED NOT NULL DEFAULT 0, # < 200 000 000 - FOREIGN KEY (scraper_id) REFERENCES scraper_data(scraper_id) ON DELETE CASCADE, - FOREIGN KEY (skill_id) REFERENCES skills(skill_id) ON DELETE CASCADE, - PRIMARY KEY (scraper_id, skill_id) -); - -CREATE TABLE player_activities ( - scraper_id BIGINT UNSIGNED NOT NULL, - activity_id TINYINT UNSIGNED NOT NULL, - activity_value INT UNSIGNED NOT NULL DEFAULT 0, # some guy could get over 65k kc - FOREIGN KEY (scraper_id) REFERENCES scraper_data(scraper_id) ON DELETE CASCADE, - FOREIGN KEY (activity_id) REFERENCES activities(activity_id) ON DELETE CASCADE, - PRIMARY KEY (scraper_id, activity_id) +CREATE TABLE scraper_data_v3 ( + scrape_id bigint unsigned NOT NULL AUTO_INCREMENT, + scrape_ts DATETIME NOT NULL, + scrape_date DATE NOT NULL, + player_id INT NOT NULL, + PRIMARY KEY (scrape_id), + UNIQUE KEY unique_player_scrape (player_id, scrape_date), + INDEX idx_scrape_ts (scrape_ts) ); - -DELIMITER // - -CREATE TRIGGER `sd_latest` AFTER INSERT ON `scraper_data` FOR EACH ROW -BEGIN - DECLARE latest_created_at DATETIME; - - -- Get the latest created_at from scraper_data_latest for the current player_id - SELECT created_at INTO latest_created_at - FROM scraper_data_latest - WHERE player_id = NEW.player_id; - - IF latest_created_at IS NULL THEN - INSERT INTO scraper_data_latest (scraper_id, created_at, player_id) - VALUES (NEW.scraper_id, NEW.created_at, NEW.player_id) - ON DUPLICATE KEY UPDATE - scraper_id = NEW.scraper_id, - created_at = NEW.created_at; - ELSEIF NEW.created_at > latest_created_at THEN - UPDATE scraper_data_latest - SET - scraper_id = NEW.scraper_id, - created_at = NEW.created_at - WHERE player_id = NEW.player_id; - END IF; -END // - -DELIMITER ; - -CREATE TABLE `Labels` ( - `id` int NOT NULL AUTO_INCREMENT, - `label` varchar(50) NOT NULL, - PRIMARY KEY (`id`), - UNIQUE KEY `Unique_label` (`label`) USING BTREE +CREATE TABLE scraper_player_skill ( + scrape_id BIGINT unsigned NOT NULL, + player_skill_id BIGINT unsigned NOT NULL, + PRIMARY KEY (scrape_id, player_skill_id), + KEY idx_scrape_id (scrape_id), + KEY idx_player_skill_id (player_skill_id) +) +PARTITION BY HASH (scrape_id) PARTITIONS 10; + +CREATE TABLE scraper_player_activity ( + scrape_id BIGINT unsigned NOT NULL, + player_activity_id BIGINT unsigned NOT NULL, + PRIMARY KEY (scrape_id, player_activity_id), + KEY idx_scrape_id (scrape_id), + KEY idx_player_activity_id (player_activity_id) ) -; +PARTITION BY HASH (scrape_id) PARTITIONS 10; diff --git a/mysql/docker-entrypoint-initdb.d/02_data.sql b/mysql/docker-entrypoint-initdb.d/02_data.sql index e8f5f4c..6f088ea 100644 --- a/mysql/docker-entrypoint-initdb.d/02_data.sql +++ b/mysql/docker-entrypoint-initdb.d/02_data.sql @@ -1,109 +1,5 @@ USE playerdata; -INSERT INTO skills (skill_id, skill_name) -VALUES - (2, 'attack'), - (3, 'defence'), - (4, 'strength'), - (5, 'hitpoints'), - (6, 'ranged'), - (7, 'prayer'), - (8, 'magic'), - (9, 'cooking'), - (10, 'woodcutting'), - (11, 'fletching'), - (12, 'fishing'), - (13, 'firemaking'), - (14, 'crafting'), - (15, 'smithing'), - (16, 'mining'), - (17, 'herblore'), - (18, 'agility'), - (19, 'thieving'), - (20, 'slayer'), - (21, 'farming'), - (22, 'runecraft'), - (23, 'hunter'), - (24, 'construction'); - -INSERT INTO activities (activity_id, activity_name) -VALUES - (1, 'abyssal_sire'), - (2, 'alchemical_hydra'), - (3, 'artio'), - (4, 'barrows_chests'), - (5, 'bounty_hunter_hunter'), - (6, 'bounty_hunter_rogue'), - (7, 'bryophyta'), - (8, 'callisto'), - (9, 'calvarion'), - (10, 'cerberus'), - (11, 'chambers_of_xeric'), - ( - 12, 'chambers_of_xeric_challenge_mode' - ), - (13, 'chaos_elemental'), - (14, 'chaos_fanatic'), - (15, 'commander_zilyana'), - (16, 'corporeal_beast'), - (17, 'crazy_archaeologist'), - (18, 'cs_all'), - (19, 'cs_beginner'), - (20, 'cs_easy'), - (21, 'cs_elite'), - (22, 'cs_hard'), - (23, 'cs_master'), - (24, 'cs_medium'), - (25, 'dagannoth_prime'), - (26, 'dagannoth_rex'), - (27, 'dagannoth_supreme'), - (28, 'deranged_archaeologist'), - (29, 'duke_sucellus'), - (30, 'general_graardor'), - (31, 'giant_mole'), - (32, 'grotesque_guardians'), - (33, 'hespori'), - (34, 'kalphite_queen'), - (35, 'king_black_dragon'), - (36, 'kraken'), - (37, 'kreearra'), - (38, 'kril_tsutsaroth'), - (39, 'league'), - (40, 'lms_rank'), - (41, 'mimic'), - (42, 'nex'), - (43, 'nightmare'), - (44, 'obor'), - (45, 'phantom_muspah'), - (46, 'phosanis_nightmare'), - (47, 'rifts_closed'), - (48, 'sarachnis'), - (49, 'scorpia'), - (50, 'skotizo'), - (51, 'soul_wars_zeal'), - (52, 'spindel'), - (53, 'tempoross'), - (54, 'the_corrupted_gauntlet'), - (55, 'the_gauntlet'), - (56, 'the_leviathan'), - (57, 'the_whisperer'), - (58, 'theatre_of_blood'), - (59, 'theatre_of_blood_hard'), - ( - 60, 'thermonuclear_smoke_devil' - ), - (61, 'tombs_of_amascut'), - (62, 'tombs_of_amascut_expert'), - (63, 'tzkal_zuk'), - (64, 'tztok_jad'), - (65, 'vardorvis'), - (66, 'venenatis'), - (67, 'vetion'), - (68, 'vorkath'), - (69, 'wintertodt'), - (70, 'zalcano'), - (71, 'zulrah'); - INSERT INTO Labels (id, label) VALUES (1, 'Real_Player'), @@ -148,4 +44,30 @@ VALUES (110, 'Gauntlet_bot'); INSERT INTO Labels (label) VALUES ("Unkown"); -UPDATE Labels set id=0 where label="Unkown"; +UPDATE Labels set id=0 where label="Unkown"; + + +INSERT INTO skill (skill_name) VALUES +('attack'), ('defence'), ('strength'), ('hitpoints'), ('ranged'), ('prayer'), +('magic'), ('cooking'), ('woodcutting'), ('fletching'), ('fishing'), ('firemaking'), +('crafting'), ('smithing'), ('mining'), ('herblore'), ('agility'), ('thieving'), +('slayer'), ('farming'), ('runecraft'), ('hunter'), ('construction') +; + +INSERT INTO activity (activity_name) VALUES +('league'), ('bounty_hunter_hunter'), ('bounty_hunter_rogue'), ('cs_all'), ('cs_beginner'), +('cs_easy'), ('cs_medium'), ('cs_hard'), ('cs_elite'), ('cs_master'), ('lms_rank'), +('soul_wars_zeal'), ('abyssal_sire'), ('alchemical_hydra'), ('barrows_chests'), ('bryophyta'), +('callisto'), ('cerberus'), ('chambers_of_xeric'), ('chambers_of_xeric_challenge_mode'), +('chaos_elemental'), ('chaos_fanatic'), ('commander_zilyana'), ('corporeal_beast'), +('crazy_archaeologist'), ('dagannoth_prime'), ('dagannoth_rex'), ('dagannoth_supreme'), +('deranged_archaeologist'), ('general_graardor'), ('giant_mole'), ('grotesque_guardians'), +('hespori'), ('kalphite_queen'), ('king_black_dragon'), ('kraken'), ('kreearra'), +('kril_tsutsaroth'), ('mimic'), ('nightmare'), ('nex'), ('phosanis_nightmare'), ('obor'), +('phantom_muspah'), ('sarachnis'), ('scorpia'), ('skotizo'), ('tempoross'), ('the_gauntlet'), +('the_corrupted_gauntlet'), ('theatre_of_blood'), ('theatre_of_blood_hard'), +('thermonuclear_smoke_devil'), ('tombs_of_amascut'), ('tombs_of_amascut_expert'), ('tzkal_zuk'), +('tztok_jad'), ('venenatis'), ('vetion'), ('vorkath'), ('wintertodt'), ('zalcano'), ('zulrah'), +('rifts_closed'), ('artio'), ('calvarion'), ('duke_sucellus'), ('spindel'), ('the_leviathan'), +('the_whisperer'), ('vardorvis') +; diff --git a/mysql_setup/__init__.py b/mysql_setup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mysql_setup/models.py b/mysql_setup/models.py new file mode 100644 index 0000000..a755e20 --- /dev/null +++ b/mysql_setup/models.py @@ -0,0 +1,179 @@ +# script to insert all the data we need +import random +from datetime import datetime + +from sqlalchemy import ( + TIMESTAMP, + BigInteger, + Boolean, + Column, + Date, + DateTime, + ForeignKey, + Index, + Integer, + SmallInteger, + String, + UniqueConstraint, + create_engine, +) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker + +Base = declarative_base() +random.seed(42) + +# Define other SQLAlchemy models for remaining tables in a similar manner + +# Create an engine and bind the base +engine = create_engine("mysql+pymysql://root:root_bot_buster@mysql:3306/playerdata") +Base.metadata.create_all(engine) + +# Create a session +Session = sessionmaker(bind=engine) +session = Session() + + +class Labels(Base): + __tablename__ = "Labels" + + id = Column(Integer, primary_key=True) + label = Column(String) + + +class Players(Base): + __tablename__ = "Players" + + id = Column(Integer, primary_key=True) + name = Column(String) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow) + possible_ban = Column(Boolean, default=True) + confirmed_ban = Column(Boolean, default=False) + confirmed_player = Column(Boolean, default=False) + label_id = Column(Integer) + label_jagex = Column(Integer) + # ironman = Column(Boolean) + # hardcore_ironman = Column(Boolean) + # ultimate_ironman = Column(Boolean) + normalized_name = Column(String) + + +class Report(Base): + __tablename__ = "Reports" + + ID = Column(BigInteger, primary_key=True, autoincrement=True) + created_at = Column(TIMESTAMP) + reportedID = Column(Integer) + reportingID = Column(Integer) + region_id = Column(Integer) + x_coord = Column(Integer) + y_coord = Column(Integer) + z_coord = Column(Integer) + timestamp = Column(TIMESTAMP) + manual_detect = Column(SmallInteger) + on_members_world = Column(Integer) + on_pvp_world = Column(SmallInteger) + world_number = Column(Integer) + equip_head_id = Column(Integer) + equip_amulet_id = Column(Integer) + equip_torso_id = Column(Integer) + equip_legs_id = Column(Integer) + equip_boots_id = Column(Integer) + equip_cape_id = Column(Integer) + equip_hands_id = Column(Integer) + equip_weapon_id = Column(Integer) + equip_shield_id = Column(Integer) + equip_ge_value = Column(BigInteger) + + +class Skill(Base): + __tablename__ = "skill" + + skill_id = Column(SmallInteger(), primary_key=True, autoincrement=True) + skill_name = Column(String(50), nullable=False, unique=True) + + +class Activity(Base): + __tablename__ = "activity" + + activity_id = Column(SmallInteger(), primary_key=True, autoincrement=True) + activity_name = Column(String(50), nullable=False, unique=True) + + +class PlayerSkill(Base): + __tablename__ = "player_skill" + + player_skill_id = Column(BigInteger(), primary_key=True, autoincrement=True) + skill_id = Column(SmallInteger(), nullable=False) + skill_value = Column(Integer(), nullable=False, default=0) + + __table_args__ = ( + UniqueConstraint("skill_id", "skill_value", name="unique_skill_value"), + ) + + +class PlayerActivity(Base): + __tablename__ = "player_activity" + + player_activity_id = Column(BigInteger(), primary_key=True, autoincrement=True) + activity_id = Column(SmallInteger(), nullable=False) + activity_value = Column(Integer(), nullable=False, default=0) + + __table_args__ = ( + UniqueConstraint("activity_id", "activity_value", name="unique_activity_value"), + ) + + +class ScraperDataV3(Base): + __tablename__ = "scraper_data_v3" + + scrape_id = Column(BigInteger(), primary_key=True, autoincrement=True) + scrape_ts = Column(DateTime, nullable=False) + scrape_date = Column(Date, nullable=False) + player_id = Column(Integer, nullable=False) + + __table_args__ = ( + UniqueConstraint("player_id", "scrape_date", name="unique_player_scrape"), + Index("idx_scrape_ts", "scrape_ts"), + ) + + +class ScraperPlayerSkill(Base): + __tablename__ = "scraper_player_skill" + + scrape_id = Column( + BigInteger(), + ForeignKey("scraper_data_v3.scrape_id"), + primary_key=True, + ) + player_skill_id = Column( + BigInteger(), + ForeignKey("player_skill.player_skill_id"), + primary_key=True, + ) + + __table_args__ = ( + Index("idx_scrape_id", "scrape_id"), + Index("idx_player_skill_id", "player_skill_id"), + ) + + +class ScraperPlayerActivity(Base): + __tablename__ = "scraper_player_activity" + + scrape_id = Column( + BigInteger(), + ForeignKey("scraper_data_v3.scrape_id"), + primary_key=True, + ) + player_activity_id = Column( + BigInteger(), + ForeignKey("player_activity.player_activity_id"), + primary_key=True, + ) + + __table_args__ = ( + Index("idx_scrape_id", "scrape_id"), + Index("idx_player_activity_id", "player_activity_id"), + ) diff --git a/mysql_setup/setup_mysql.py b/mysql_setup/setup_mysql.py index 1d8a06a..30e5931 100644 --- a/mysql_setup/setup_mysql.py +++ b/mysql_setup/setup_mysql.py @@ -1,163 +1,24 @@ -# script to insert all the data we need import random from datetime import datetime, timedelta -from sqlalchemy import ( - TIMESTAMP, - BigInteger, - Boolean, - Column, - Date, - DateTime, - ForeignKey, - Integer, - SmallInteger, - String, - create_engine, - func, +from models import ( + Labels, + PlayerActivity, + Players, + PlayerSkill, + Report, + ScraperDataV3, + ScraperPlayerActivity, + ScraperPlayerSkill, + session, ) -from sqlalchemy.dialects.mysql import BIGINT, SMALLINT, TINYINT from sqlalchemy.exc import IntegrityError -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker -Base = declarative_base() -random.seed(42) - -class Players(Base): - __tablename__ = "Players" - - id = Column(Integer, primary_key=True) - name = Column(String) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow) - possible_ban = Column(Boolean, default=True) - confirmed_ban = Column(Boolean, default=False) - confirmed_player = Column(Boolean, default=False) - label_id = Column(Integer) - label_jagex = Column(Integer) - # ironman = Column(Boolean) - # hardcore_ironman = Column(Boolean) - # ultimate_ironman = Column(Boolean) - normalized_name = Column(String) - - -class ScraperData(Base): - __tablename__ = "scraper_data" - - scraper_id = Column(BIGINT, primary_key=True, autoincrement=True) - created_at = Column(DateTime, nullable=False, server_default=func.now()) - player_id = Column(SMALLINT, nullable=False) - record_date = Column(Date, nullable=True, server_default=func.current_date()) - - -class ScraperDataLatest(Base): - __tablename__ = "scraper_data_latest" - - scraper_id = Column(BIGINT) - created_at = Column(DateTime, nullable=False, server_default=func.now()) - player_id = Column(BIGINT, primary_key=True) - record_date = Column(Date, nullable=True, server_default=func.current_date()) - - -class Skills(Base): - __tablename__ = "skills" - - skill_id = Column(TINYINT, primary_key=True, autoincrement=True) - skill_name = Column(String(50), nullable=False) - - -class PlayerSkills(Base): - __tablename__ = "player_skills" - - scraper_id = Column( - BIGINT, - ForeignKey("scraper_data.scraper_id", ondelete="CASCADE"), - primary_key=True, - ) - skill_id = Column( - TINYINT, - ForeignKey("skills.skill_id", ondelete="CASCADE"), - primary_key=True, - ) - skill_value = Column(Integer, nullable=False, default=0) - - -class Activities(Base): - __tablename__ = "activities" - - activity_id = Column(TINYINT, primary_key=True, autoincrement=True) - activity_name = Column(String(50), nullable=False) - - -class PlayerActivities(Base): - __tablename__ = "player_activities" - - scraper_id = Column( - BIGINT, - ForeignKey("scraper_data.scraper_id", ondelete="CASCADE"), - primary_key=True, - ) - activity_id = Column( - TINYINT, - ForeignKey("activities.activity_id", ondelete="CASCADE"), - primary_key=True, - ) - activity_value = Column(Integer, nullable=False, default=0) - - -class Report(Base): - __tablename__ = "Reports" - - ID = Column(BigInteger, primary_key=True, autoincrement=True) - created_at = Column(TIMESTAMP) - reportedID = Column(Integer) - reportingID = Column(Integer) - region_id = Column(Integer) - x_coord = Column(Integer) - y_coord = Column(Integer) - z_coord = Column(Integer) - timestamp = Column(TIMESTAMP) - manual_detect = Column(SmallInteger) - on_members_world = Column(Integer) - on_pvp_world = Column(SmallInteger) - world_number = Column(Integer) - equip_head_id = Column(Integer) - equip_amulet_id = Column(Integer) - equip_torso_id = Column(Integer) - equip_legs_id = Column(Integer) - equip_boots_id = Column(Integer) - equip_cape_id = Column(Integer) - equip_hands_id = Column(Integer) - equip_weapon_id = Column(Integer) - equip_shield_id = Column(Integer) - equip_ge_value = Column(BigInteger) - - -# Define other SQLAlchemy models for remaining tables in a similar manner - -# Create an engine and bind the base -engine = create_engine("mysql+pymysql://root:root_bot_buster@mysql:3306/playerdata") -Base.metadata.create_all(engine) - -# Create a session -Session = sessionmaker(bind=engine) -session = Session() - - -# Define function to generate random date within a year def random_date(): return datetime.utcnow() - timedelta(days=random.randint(0, 365)) -class Labels(Base): - __tablename__ = "Labels" - - id = Column(Integer, primary_key=True) - label = Column(String) - - def get_labels(): # Query the labels table to get all id values label_ids = session.query(Labels.id).all() @@ -188,66 +49,6 @@ def insert_players(len_players, label_ids: list): return -def get_skills(): - # Query the skills table to get all id values - skill_ids = session.query(Skills.skill_id).all() - skill_ids = [id[0] for id in skill_ids] # Convert list of tuples to list of ids - return skill_ids - - -def get_activities(): - # Query the activity table to get all id values - activity_ids = session.query(Activities.activity_id).all() - activity_ids = [ - id[0] for id in activity_ids - ] # Convert list of tuples to list of ids - return activity_ids - - -def insert_scraper_data(len_scraper_data, len_players, skill_ids, activity_ids): - for i in range(1, len_scraper_data + 1): - print(f"scraper_data_{i}") - # pick random player - player_id = random.randint(1, len_players) - - # pick random amount of skills - amount_skills = random.randint(0, len(skill_ids)) - random.shuffle(skill_ids) - skills = skill_ids[:amount_skills] - - # pick random amount of activities - amount_activities = random.randint(0, len(activity_ids)) - random.shuffle(activity_ids) - activities = activity_ids[:amount_activities] - - # scraper data - try: - session.add( - ScraperData(scraper_id=i, player_id=player_id, created_at=random_date()) - ) - session.commit() - for skill in skills: - session.add( - PlayerSkills( - scraper_id=i, - skill_id=skill, - skill_value=random.randint(1, 200_000_000), - ) - ) - for activity in activities: - session.add( - PlayerActivities( - scraper_id=i, - activity_id=activity, - activity_value=random.randint(1, 65_000), - ) - ) - except IntegrityError: - session.rollback() # Rollback the transaction if a duplicate entry is encountered - finally: - session.commit() - - def insert_reports(len_reports, len_players): for i in range(1, len_reports + 1): print(f"Report_{i}") @@ -291,15 +92,16 @@ def insert_reports(len_reports, len_players): session.commit() +def generate_random_scraper_data(len_scrapers, len_players, skill_ids, activity_ids): + # TODO: + ... + + def main(): len_players = 250 label_ids = get_labels() insert_players(len_players, label_ids) - skill_ids = get_skills() - activity_ids = get_activities() - len_scraper_data = len_players * 3 - insert_scraper_data(len_scraper_data, len_players, skill_ids, activity_ids) - insert_reports(len_reports=10000, len_players=len_players) + insert_reports(len_reports=10_000, len_players=len_players) if __name__ == "__main__": diff --git a/src/api/v3/highscore.py b/src/api/v3/highscore.py index ffbf022..c26fba8 100644 --- a/src/api/v3/highscore.py +++ b/src/api/v3/highscore.py @@ -1,8 +1,9 @@ import logging +from collections import defaultdict from fastapi import APIRouter, Depends, Query -from src.app.repositories import PlayerActivityRepo, PlayerSkillsRepo, ScraperDataRepo +from src.app.repositories import ScraperDataRepo from src.app.views.response import ActivityView, ScraperDataView, SkillView from src.core.fastapi.dependencies.session import get_session @@ -11,44 +12,59 @@ router = APIRouter() +def convert_to_scraper_data_view(result_list: list[dict]) -> list[ScraperDataView]: + # Dictionary to hold grouped data by scraper_id + scraper_data_map = defaultdict(lambda: {"skills": [], "activities": []}) + + for row in result_list: + scraper_id = row["scrape_id"] + scraper_data = scraper_data_map[scraper_id] + + # Set shared attributes only once per scraper_id + if "created_at" not in scraper_data: + scraper_data["created_at"] = row["scrape_ts"] + scraper_data["record_date"] = row["scrape_date"] + scraper_data["scraper_id"] = scraper_id + scraper_data["player_id"] = row["player_id"] + + # Append to skills or activities based on hs_type + if row["hs_type"] == "skill": + scraper_data["skills"].append( + SkillView(skill_name=row["hs_name"], skill_value=row["hs_value"]) + ) + elif row["hs_type"] == "activity": + scraper_data["activities"].append( + ActivityView( + activity_name=row["hs_name"], activity_value=row["hs_value"] + ) + ) + + # Convert the grouped data into ScraperDataView instances + return [ + ScraperDataView( + created_at=data["created_at"], + record_date=data["record_date"], + scraper_id=data["scraper_id"], + player_id=data["player_id"], + skills=data["skills"], + activities=data["activities"], + ) + for data in scraper_data_map.values() + ] + + @router.get("/highscore/latest", response_model=list[ScraperDataView]) async def get_highscore_latest( player_id: int, - player_name: str = None, label_id: int = None, many: bool = False, limit: int = Query(default=10, ge=0, le=10_000), session=Depends(get_session), ): repo = ScraperDataRepo(session=session) - repo_skills = PlayerSkillsRepo(session=session) - repo_activities = PlayerActivityRepo(session=session) - - data = await repo.select( - player_name=player_name, + data = await repo.select_latest_scraper_data_v3( player_id=player_id, label_id=label_id, many=many, - limit=limit, - history=False, ) - - results = [] - for d in data: - skills = await repo_skills.select(scraper_id=d.get("scraper_id")) - skills = [SkillView(**s) for s in skills] - - activities = await repo_activities.select(scraper_id=d.get("scraper_id")) - activities = [ActivityView(**a) for a in activities] - - results.append( - ScraperDataView( - created_at=d.get("created_at"), - record_date=d.get("record_date"), - scraper_id=d.get("scraper_id"), - player_id=d.get("player_id"), - skills=skills, - activities=activities, - ) - ) - return results + return convert_to_scraper_data_view(result_list=data) diff --git a/src/app/repositories/scraper_data.py b/src/app/repositories/scraper_data.py index c16413d..92ec763 100644 --- a/src/app/repositories/scraper_data.py +++ b/src/app/repositories/scraper_data.py @@ -1,82 +1,118 @@ -from fastapi.encoders import jsonable_encoder -from sqlalchemy.ext.asyncio import AsyncResult, AsyncSession +from sqlalchemy import func, literal, select, union_all +from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import aliased -from sqlalchemy.sql.expression import Select -from src.app.repositories.abstract_repo import AbstractAPI -from src.core.database.models import Player, ScraperData, ScraperDataLatest +from src.core.database.models.player import Player +from src.core.database.models.scraper_data_v3 import ( + Activity, + PlayerActivity, + PlayerSkill, + ScraperDataV3, + ScraperPlayerActivity, + ScraperPlayerSkill, + Skill, +) -class ScraperDataRepo(AbstractAPI): +class ScraperDataRepo: def __init__(self, session: AsyncSession) -> None: - super().__init__() self.session = session - async def insert(self, id): - raise NotImplementedError - - async def select( + async def select_latest_scraper_data_v3( self, - player_name: str, - player_id: int, - label_id: int, - many: bool, - limit: int, - history: bool = False, - ) -> list[dict]: - table = ( - aliased(ScraperData, name="sd") - if history - else aliased(ScraperDataLatest, name="sdl") + player_id: int = None, + label_id: int = None, + many: bool = True, + ): + # Aliases for tables + SDV = aliased(ScraperDataV3) + P = aliased(Player) + + # skill specific + SPS = aliased(ScraperPlayerSkill) + PS = aliased(PlayerSkill) + S = aliased(Skill) + + # activity specific + SPA = aliased(ScraperPlayerActivity) + PA = aliased(PlayerActivity) + A = aliased(Activity) + + # Subquery to get the latest scrape date for each player + subquery = ( + select(func.max(SDV.scrape_date)) + .where(SDV.player_id == P.id) + .correlate(P) + .scalar_subquery() ) - player = aliased(Player, name="pl") - - sql = Select(player.name, table) - sql = sql.join(player, table.player_id == player.id) - - if player_id: - if many: - sql = sql.where(table.player_id >= player_id) - else: - sql = sql.where(table.player_id == player_id) - if player_name: - sql = sql.where(player.name == player_name) - - if label_id: - sql = sql.where(player.label_id == label_id) - - sql = sql.order_by(table.player_id.asc()) - # sql = sql.order_by(player.id.asc()) # not performant - sql = sql.limit(limit) - - async with self.session: - result: AsyncResult = await self.session.execute(sql) - result = result.fetchall() - data = [{"name": name, **jsonable_encoder(r)} for name, r in result] - return data + # Skill query + skill_query = ( + select( + SDV.scrape_id, + SDV.scrape_ts, + SDV.scrape_date, + SDV.player_id, + P.name, + S.skill_id.label("hs_id"), + S.skill_name.label("hs_name"), + PS.skill_value.label("hs_value"), + literal("skill").label("hs_type"), + ) + .join(P, SDV.player_id == P.id) + .join(SPS, SDV.scrape_id == SPS.scrape_id) + .join(PS, SPS.player_skill_id == PS.player_skill_id) + .join(S, PS.skill_id == S.skill_id) + .where(SDV.scrape_date == subquery) + ) - async def select_history(self, player_name: str, player_id: int, many: bool): - table = ScraperData - sql = Select(table) + # Activity query + activity_query = ( + select( + SDV.scrape_id, + SDV.scrape_ts, + SDV.scrape_date, + SDV.player_id, + P.name, + A.activity_id.label("hs_id"), + A.activity_name.label("hs_name"), + PA.activity_value.label("hs_value"), + literal("activity").label("hs_type"), + ) + .join(P, SDV.player_id == P.id) + .join(SPA, SDV.scrape_id == SPA.scrape_id) + .join(PA, SPA.player_activity_id == PA.player_activity_id) + .join(A, PA.activity_id == A.activity_id) + .where(SDV.scrape_date == subquery) + ) + # Combine skill and activity queries using union_all + combined_query = union_all(skill_query, activity_query) + + # Wrap the combined_query in a new select statement to apply additional filters + final_query = select( + combined_query.c.scrape_id, + combined_query.c.scrape_ts, + combined_query.c.scrape_date, + combined_query.c.player_id, + combined_query.c.name, + combined_query.c.hs_id, + combined_query.c.hs_name, + combined_query.c.hs_value, + combined_query.c.hs_type, + ).select_from(combined_query) + + # Apply filters if provided if player_id: if many: - sql = sql.where(table.player_id >= player_id) + final_query = final_query.where(P.id > player_id) else: - sql = sql.where(table.player_id == player_id) + final_query = final_query.where(P.id == player_id) - if player_name: - sql = sql.join(Player, table.player_id == Player.id) - sql = sql.where(Player.name == player_name) - - async with self.session: - result: AsyncResult = await self.session.execute(sql) - result = result.scalars().all() - return jsonable_encoder(result) - - async def update(self): - raise NotImplementedError + if label_id: + final_query = final_query.where(P.label_id > label_id) - async def delete(self): - raise NotImplementedError + # Execute the final query + result = await self.session.execute(final_query) + result_list = result.mappings().all() + return result_list diff --git a/src/core/database/models/scraper_data_v3.py b/src/core/database/models/scraper_data_v3.py new file mode 100644 index 0000000..4c51043 --- /dev/null +++ b/src/core/database/models/scraper_data_v3.py @@ -0,0 +1,107 @@ +from sqlalchemy import ( + BigInteger, + Column, + Date, + DateTime, + ForeignKey, + Index, + Integer, + SmallInteger, + String, + UniqueConstraint, +) +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class Skill(Base): + __tablename__ = "skill" + + skill_id = Column(SmallInteger(), primary_key=True, autoincrement=True) + skill_name = Column(String(50), nullable=False, unique=True) + + +class Activity(Base): + __tablename__ = "activity" + + activity_id = Column(SmallInteger(), primary_key=True, autoincrement=True) + activity_name = Column(String(50), nullable=False, unique=True) + + +class PlayerSkill(Base): + __tablename__ = "player_skill" + + player_skill_id = Column(BigInteger(), primary_key=True, autoincrement=True) + skill_id = Column(SmallInteger(), nullable=False) + skill_value = Column(Integer(), nullable=False, default=0) + + __table_args__ = ( + UniqueConstraint("skill_id", "skill_value", name="unique_skill_value"), + ) + + +class PlayerActivity(Base): + __tablename__ = "player_activity" + + player_activity_id = Column(BigInteger(), primary_key=True, autoincrement=True) + activity_id = Column(SmallInteger(), nullable=False) + activity_value = Column(Integer(), nullable=False, default=0) + + __table_args__ = ( + UniqueConstraint("activity_id", "activity_value", name="unique_activity_value"), + ) + + +class ScraperDataV3(Base): + __tablename__ = "scraper_data_v3" + + scrape_id = Column(BigInteger(), primary_key=True, autoincrement=True) + scrape_ts = Column(DateTime, nullable=False) + scrape_date = Column(Date, nullable=False) + player_id = Column(Integer, nullable=False) + + __table_args__ = ( + UniqueConstraint("player_id", "scrape_date", name="unique_player_scrape"), + Index("idx_scrape_ts", "scrape_ts"), + ) + + +class ScraperPlayerSkill(Base): + __tablename__ = "scraper_player_skill" + + scrape_id = Column( + BigInteger(), + ForeignKey("scraper_data_v3.scrape_id"), + primary_key=True, + ) + player_skill_id = Column( + BigInteger(), + ForeignKey("player_skill.player_skill_id"), + primary_key=True, + ) + + __table_args__ = ( + Index("idx_scrape_id", "scrape_id"), + Index("idx_player_skill_id", "player_skill_id"), + ) + + +class ScraperPlayerActivity(Base): + __tablename__ = "scraper_player_activity" + + scrape_id = Column( + BigInteger(), + ForeignKey("scraper_data_v3.scrape_id"), + primary_key=True, + ) + player_activity_id = Column( + BigInteger(), + ForeignKey("player_activity.player_activity_id"), + primary_key=True, + ) + + __table_args__ = ( + Index("idx_scrape_id", "scrape_id"), + Index("idx_player_activity_id", "player_activity_id"), + )