diff --git a/data/article_data.csv b/data/article_data.csv index 54bff6a..787a45b 100644 --- a/data/article_data.csv +++ b/data/article_data.csv @@ -1,102 +1,102 @@ -article_id,Economy and Business,Politics and Society,Technology and Culture,Sports and Leisure,Opinion and Analysis -1,0.0,1.0,0.0,0.0,0.0 -2,0.0,1.0,0.0,0.0,0.0 -3,0.0,1.0,0.0,0.0,0.0 -4,0.0,0.0,1.0,0.0,0.0 -5,0.0,0.0,1.0,0.0,0.0 -6,0.0,1.0,0.0,0.0,0.0 -7,0.0,0.0,0.0,1.0,0.0 -8,1.0,0.0,0.0,0.0,0.0 -9,1.0,0.0,0.0,0.0,0.0 -10,0.0,0.0,0.0,1.0,0.0 -11,0.0,1.0,0.0,0.0,0.0 -12,0.0,0.0,1.0,0.0,0.0 -13,1.0,0.0,0.0,0.0,0.0 -14,0.0,0.0,0.0,0.0,1.0 -15,0.0,0.0,0.0,0.0,1.0 -16,0.0,0.0,0.0,1.0,0.0 -17,0.0,1.0,0.0,0.0,0.0 -18,1.0,0.0,0.0,0.0,0.0 -19,0.0,1.0,0.0,0.0,0.0 -20,1.0,0.0,0.0,0.0,0.0 -21,0.0,0.0,0.0,1.0,0.0 -22,0.0,0.0,0.0,1.0,0.0 -23,0.0,0.0,0.0,0.0,1.0 -24,0.0,1.0,0.0,0.0,0.0 -25,0.0,0.0,0.0,0.0,1.0 -26,0.0,1.0,0.0,0.0,0.0 -27,0.0,0.0,0.0,0.0,1.0 -28,0.0,0.0,1.0,0.0,0.0 -29,0.0,0.0,1.0,0.0,0.0 -30,0.0,0.0,1.0,0.0,0.0 -31,0.0,0.0,1.0,0.0,0.0 -32,1.0,0.0,0.0,0.0,0.0 -33,0.0,0.0,1.0,0.0,0.0 -34,0.0,0.0,0.0,1.0,0.0 -35,0.0,0.0,0.0,1.0,0.0 -36,1.0,0.0,0.0,0.0,0.0 -37,0.0,0.0,1.0,0.0,0.0 -38,0.0,0.0,1.0,0.0,0.0 -39,0.0,0.0,1.0,0.0,0.0 -40,0.0,0.0,0.0,0.0,1.0 -41,0.0,1.0,0.0,0.0,0.0 -42,0.0,0.0,0.0,0.0,1.0 -43,0.0,1.0,0.0,0.0,0.0 -44,0.0,0.0,1.0,0.0,0.0 -45,0.0,0.0,1.0,0.0,0.0 -46,0.0,0.0,0.0,0.0,1.0 -47,0.0,0.0,0.0,0.0,1.0 -48,0.0,1.0,0.0,0.0,0.0 -49,0.0,0.0,0.0,1.0,0.0 -50,0.0,1.0,0.0,0.0,0.0 -51,0.0,0.0,0.0,0.0,1.0 -52,0.0,0.0,0.0,0.0,1.0 -53,1.0,0.0,0.0,0.0,0.0 -54,0.0,0.0,0.0,0.0,1.0 -55,1.0,0.0,0.0,0.0,0.0 -56,0.0,0.0,0.0,1.0,0.0 -57,0.0,1.0,0.0,0.0,0.0 -58,0.0,1.0,0.0,0.0,0.0 -59,1.0,0.0,0.0,0.0,0.0 -60,0.0,1.0,0.0,0.0,0.0 -61,0.0,0.0,0.0,0.0,1.0 -62,0.0,0.0,1.0,0.0,0.0 -63,1.0,0.0,0.0,0.0,0.0 -64,0.0,1.0,0.0,0.0,0.0 -65,1.0,0.0,0.0,0.0,0.0 -66,1.0,0.0,0.0,0.0,0.0 -67,0.0,0.0,1.0,0.0,0.0 -68,0.0,0.0,0.0,0.0,1.0 -69,1.0,0.0,0.0,0.0,0.0 -70,0.0,1.0,0.0,0.0,0.0 -71,0.0,0.0,0.0,1.0,0.0 -72,1.0,0.0,0.0,0.0,0.0 -73,1.0,0.0,0.0,0.0,0.0 -74,0.0,0.0,1.0,0.0,0.0 -75,0.0,0.0,0.0,0.0,1.0 -76,0.0,0.0,0.0,1.0,0.0 -77,0.0,1.0,0.0,0.0,0.0 -78,0.0,0.0,0.0,1.0,0.0 -79,0.0,1.0,0.0,0.0,0.0 -80,0.0,0.0,0.0,0.0,1.0 -81,0.0,1.0,0.0,0.0,0.0 -82,0.0,0.0,1.0,0.0,0.0 -83,0.0,0.0,1.0,0.0,0.0 -84,0.0,0.0,1.0,0.0,0.0 -85,0.0,0.0,1.0,0.0,0.0 -86,0.0,0.0,0.0,1.0,0.0 -87,0.0,0.0,0.0,0.0,1.0 -88,0.0,1.0,0.0,0.0,0.0 -89,0.0,1.0,0.0,0.0,0.0 -90,0.0,0.0,1.0,0.0,0.0 -91,0.0,0.0,1.0,0.0,0.0 -92,1.0,0.0,0.0,0.0,0.0 -93,0.0,0.0,0.0,0.0,1.0 -94,0.0,0.0,0.0,1.0,0.0 -95,0.0,1.0,0.0,0.0,0.0 -96,1.0,0.0,0.0,0.0,0.0 -97,1.0,0.0,0.0,0.0,0.0 -98,0.0,1.0,0.0,0.0,0.0 -99,0.0,0.0,0.0,1.0,0.0 -100,1.0,0.0,0.0,0.0,0.0 -101,0.0,1.0,0.0,0.0,0.0 +article_id,Economy and Business,Politics and Society,Technology and Culture,Sports and Leisure,Opinion and Analysis,created at +1,0.0,1.0,0.0,0.0,0.0,2023-01-01 +2,0.0,1.0,0.0,0.0,0.0,2023-01-02 +3,0.0,1.0,0.0,0.0,0.0,2023-01-03 +4,0.0,0.0,1.0,0.0,0.0,2023-01-04 +5,0.0,0.0,1.0,0.0,0.0,2023-01-05 +6,0.0,1.0,0.0,0.0,0.0,2023-01-06 +7,0.0,0.0,0.0,1.0,0.0,2023-01-07 +8,1.0,0.0,0.0,0.0,0.0,2023-01-08 +9,1.0,0.0,0.0,0.0,0.0,2023-01-09 +10,0.0,0.0,0.0,1.0,0.0,2023-01-10 +11,0.0,1.0,0.0,0.0,0.0,2023-01-11 +12,0.0,0.0,1.0,0.0,0.0,2023-01-12 +13,1.0,0.0,0.0,0.0,0.0,2023-01-13 +14,0.0,0.0,0.0,0.0,1.0,2023-01-14 +15,0.0,0.0,0.0,0.0,1.0,2023-01-15 +16,0.0,0.0,0.0,1.0,0.0,2023-01-16 +17,0.0,1.0,0.0,0.0,0.0,2023-01-17 +18,1.0,0.0,0.0,0.0,0.0,2023-01-18 +19,0.0,1.0,0.0,0.0,0.0,2023-01-19 +20,1.0,0.0,0.0,0.0,0.0,2023-01-20 +21,0.0,0.0,0.0,1.0,0.0,2023-01-21 +22,0.0,0.0,0.0,1.0,0.0,2023-01-22 +23,0.0,0.0,0.0,0.0,1.0,2023-01-23 +24,0.0,1.0,0.0,0.0,0.0,2023-01-24 +25,0.0,0.0,0.0,0.0,1.0,2023-01-25 +26,0.0,1.0,0.0,0.0,0.0,2023-01-26 +27,0.0,0.0,0.0,0.0,1.0,2023-01-27 +28,0.0,0.0,1.0,0.0,0.0,2023-01-28 +29,0.0,0.0,1.0,0.0,0.0,2023-01-29 +30,0.0,0.0,1.0,0.0,0.0,2023-01-30 +31,0.0,0.0,1.0,0.0,0.0,2023-01-31 +32,1.0,0.0,0.0,0.0,0.0,2023-02-01 +33,0.0,0.0,1.0,0.0,0.0,2023-02-02 +34,0.0,0.0,0.0,1.0,0.0,2023-02-03 +35,0.0,0.0,0.0,1.0,0.0,2023-02-04 +36,1.0,0.0,0.0,0.0,0.0,2023-02-05 +37,0.0,0.0,1.0,0.0,0.0,2023-02-06 +38,0.0,0.0,1.0,0.0,0.0,2023-02-07 +39,0.0,0.0,1.0,0.0,0.0,2023-02-08 +40,0.0,0.0,0.0,0.0,1.0,2023-02-09 +41,0.0,1.0,0.0,0.0,0.0,2023-02-10 +42,0.0,0.0,0.0,0.0,1.0,2023-02-11 +43,0.0,1.0,0.0,0.0,0.0,2023-02-12 +44,0.0,0.0,1.0,0.0,0.0,2023-02-13 +45,0.0,0.0,1.0,0.0,0.0,2023-02-14 +46,0.0,0.0,0.0,0.0,1.0,2023-02-15 +47,0.0,0.0,0.0,0.0,1.0,2023-02-16 +48,0.0,1.0,0.0,0.0,0.0,2023-02-17 +49,0.0,0.0,0.0,1.0,0.0,2023-02-18 +50,0.0,1.0,0.0,0.0,0.0,2023-02-19 +51,0.0,0.0,0.0,0.0,1.0,2023-02-20 +52,0.0,0.0,0.0,0.0,1.0,2023-02-21 +53,1.0,0.0,0.0,0.0,0.0,2023-02-22 +54,0.0,0.0,0.0,0.0,1.0,2023-02-23 +55,1.0,0.0,0.0,0.0,0.0,2023-02-24 +56,0.0,0.0,0.0,1.0,0.0,2023-02-25 +57,0.0,1.0,0.0,0.0,0.0,2023-02-26 +58,0.0,1.0,0.0,0.0,0.0,2023-02-27 +59,1.0,0.0,0.0,0.0,0.0,2023-02-28 +60,0.0,1.0,0.0,0.0,0.0,2023-03-01 +61,0.0,0.0,0.0,0.0,1.0,2023-03-02 +62,0.0,0.0,1.0,0.0,0.0,2023-03-03 +63,1.0,0.0,0.0,0.0,0.0,2023-03-04 +64,0.0,1.0,0.0,0.0,0.0,2023-03-05 +65,1.0,0.0,0.0,0.0,0.0,2023-03-06 +66,1.0,0.0,0.0,0.0,0.0,2023-03-07 +67,0.0,0.0,1.0,0.0,0.0,2023-03-08 +68,0.0,0.0,0.0,0.0,1.0,2023-03-09 +69,1.0,0.0,0.0,0.0,0.0,2023-03-10 +70,0.0,1.0,0.0,0.0,0.0,2023-03-11 +71,0.0,0.0,0.0,1.0,0.0,2023-03-12 +72,1.0,0.0,0.0,0.0,0.0,2023-03-13 +73,1.0,0.0,0.0,0.0,0.0,2023-03-14 +74,0.0,0.0,1.0,0.0,0.0,2023-03-15 +75,0.0,0.0,0.0,0.0,1.0,2023-03-16 +76,0.0,0.0,0.0,1.0,0.0,2023-03-17 +77,0.0,1.0,0.0,0.0,0.0,2023-03-18 +78,0.0,0.0,0.0,1.0,0.0,2023-03-19 +79,0.0,1.0,0.0,0.0,0.0,2023-03-20 +80,0.0,0.0,0.0,0.0,1.0,2023-03-21 +81,0.0,1.0,0.0,0.0,0.0,2023-03-22 +82,0.0,0.0,1.0,0.0,0.0,2023-03-23 +83,0.0,0.0,1.0,0.0,0.0,2023-03-24 +84,0.0,0.0,1.0,0.0,0.0,2023-03-25 +85,0.0,0.0,1.0,0.0,0.0,2023-03-26 +86,0.0,0.0,0.0,1.0,0.0,2023-03-27 +87,0.0,0.0,0.0,0.0,1.0,2023-03-28 +88,0.0,1.0,0.0,0.0,0.0,2023-03-29 +89,0.0,1.0,0.0,0.0,0.0,2023-03-30 +90,0.0,0.0,1.0,0.0,0.0,2023-03-31 +91,0.0,0.0,1.0,0.0,0.0,2023-04-01 +92,1.0,0.0,0.0,0.0,0.0,2023-04-02 +93,0.0,0.0,0.0,0.0,1.0,2023-04-03 +94,0.0,0.0,0.0,1.0,0.0,2023-04-04 +95,0.0,1.0,0.0,0.0,0.0,2023-04-05 +96,1.0,0.0,0.0,0.0,0.0,2023-04-06 +97,1.0,0.0,0.0,0.0,0.0,2023-04-07 +98,0.0,1.0,0.0,0.0,0.0,2023-04-08 +99,0.0,0.0,0.0,1.0,0.0,2023-04-09 +100,1.0,0.0,0.0,0.0,0.0,2023-04-10 +101,0.0,1.0,0.0,0.0,0.0,2023-04-11 diff --git a/data/interaction_data.csv b/data/interaction_data.csv index f10db4f..87d6b40 100644 --- a/data/interaction_data.csv +++ b/data/interaction_data.csv @@ -1,66 +1,65 @@ -classification_id,article_id,duration_time,user_id -13.0,41,98, -20.0,74,65, -3.0,1,1, -18.0,50,63, -1.0,42,64, -15.0,61,33, -19.0,47,83, -4.0,27,17, -12.0,58,59, -6.0,38,92, -16.0,60,75, -14.0,72,77, -17.0,75,50, -11.0,62,20, -7.0,36,79, -5.0,20,27, -9.0,19,45, -8.0,19,30, -10.0,44,23, -2.0,8,14, -2.0,58,53, -17.0,87,54, -14.0,17,6, -1.0,12,12, -12.0,15,92, -11.0,19,30, -12.0,62,6, -20.0,100,38, -10.0,32,37, -19.0,57,3, -5.0,76,94, -18.0,53,39, -4.0,91,18, -14.0,16,5, -12.0,45,74, -13.0,77,21, -2.0,56,69, -9.0,36,66, -7.0,28,38, -15.0,24,57, -15.0,39,98, -10.0,54,33, -8.0,85,95, -20.0,36,10, -7.0,8,13, -14.0,67,26, -16.0,16,34, -10.0,70,10, -6.0,80,85, -5.0,14,34, -6.0,50,63, -1.0,96,54, -8.0,38,92, -3.0,83,29, -11.0,91,72, -7.0,92,64, -2.0,36,73, -17.0,63,92, -9.0,75,73, -16.0,39,85, -1.0,61,74, -20.0,76,90, -6.0,90,85, -12.0,19,30, -,101,5,101.0 +classification_id,article_id,duration_time +13.0,41,98 +20.0,74,65 +3.0,1,1 +18.0,50,63 +1.0,42,64 +15.0,61,33 +19.0,47,83 +4.0,27,17 +12.0,58,59 +6.0,38,92 +16.0,60,75 +14.0,72,77 +17.0,75,50 +11.0,62,20 +7.0,36,79 +5.0,20,27 +9.0,19,45 +8.0,19,30 +10.0,44,23 +2.0,8,14 +2.0,58,53 +17.0,87,54 +14.0,17,6 +1.0,12,12 +12.0,15,92 +11.0,19,30 +12.0,62,6 +20.0,100,38 +10.0,32,37 +19.0,57,3 +5.0,76,94 +18.0,53,39 +4.0,91,18 +14.0,16,5 +12.0,45,74 +13.0,77,21 +2.0,56,69 +9.0,36,66 +7.0,28,38 +15.0,24,57 +15.0,39,98 +10.0,54,33 +8.0,85,95 +20.0,36,10 +7.0,8,13 +14.0,67,26 +16.0,16,34 +10.0,70,10 +6.0,80,85 +5.0,14,34 +6.0,50,63 +1.0,96,54 +8.0,38,92 +3.0,83,29 +11.0,91,72 +7.0,92,64 +2.0,36,73 +17.0,63,92 +9.0,75,73 +16.0,39,85 +1.0,61,74 +20.0,76,90 +6.0,90,85 +12.0,19,30 \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..9499575 --- /dev/null +++ b/main.py @@ -0,0 +1,31 @@ +from user_classification import user_data_to_classification_id +from recommend_service import RecommendService +import pandas as pd +import asyncio + +async def main(): + user_data_path = 'data/user_data_classified.csv' + user_datas = pd.read_csv(user_data_path) + recommendService = RecommendService() + await recommendService.fit_model() + user_id = 1 + print(user_data_to_classification_id( + user_datas.iloc[user_id]['sex'], + user_datas.iloc[user_id]['issue finder'], + user_datas.iloc[user_id]['lifestyle consumer'], + user_datas.iloc[user_id]['entertainer'], + user_datas.iloc[user_id]['tech specialist'], + user_datas.iloc[user_id]['professionals'] + )) + print(recommendService.get_top_n_articles( + user_data_to_classification_id( + user_datas.iloc[user_id]['sex'], + user_datas.iloc[user_id]['issue finder'], + user_datas.iloc[user_id]['lifestyle consumer'], + user_datas.iloc[user_id]['entertainer'], + user_datas.iloc[user_id]['tech specialist'], + user_datas.iloc[user_id]['professionals'] + ), 5)) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/recommend_service.py b/recommend_service.py index e1036f8..0013139 100644 --- a/recommend_service.py +++ b/recommend_service.py @@ -1,118 +1,148 @@ +import asyncio import warnings -warnings.filterwarnings('ignore') +import pandas as pd +from datetime import datetime +import aiofiles +from concurrent.futures import ThreadPoolExecutor from lightfm import LightFM from lightfm.data import Dataset -from lightfm.evaluation import precision_at_k, recall_at_k, auc_score import numpy as np -from lightfm.cross_validation import random_train_test_split -import os -from scipy.sparse import csr_matrix -import pandas as pd +import io + +warnings.filterwarnings('ignore') class ArticleDataInfo: - def __init__(self, article_id, category): + def __init__(self, article_id, category, created_at): self.article_data = pd.DataFrame({ - 'article_id' : article_id, - 'Economy and Business' : [0], - 'Politics and Society' : [0], - 'Technology and Culture' : [0], - 'Sports and Leisure' : [0], - 'Opinion and Analysis' : [0] - }) + 'article_id': [article_id], + 'Economy and Business': [0], + 'Politics and Society': [0], + 'Technology and Culture': [0], + 'Sports and Leisure': [0], + 'Opinion and Analysis': [0], + 'created at': [created_at] + }) self.article_data.iloc[0][category] = 1 class InteractionDataInfo: def __init__(self, user_id, article_id, duration_time): self.interaction_data = pd.DataFrame({ - 'user_id' : [user_id], - 'article_id' : [article_id], - 'duration_time' : [duration_time] + 'user_id': [user_id], + 'article_id': [article_id], + 'duration_time': [duration_time] }) class RecommendService: def __init__(self): - self.set_user_datas('data/user_classification.csv') - self.set_article_datas('data/article_data.csv') - self.set_interaction_datas('data/interaction_data.csv') - - def set_user_datas(self, user_data_path): + asyncio.run(self.init_data()) + + async def init_data(self): + await self.set_user_datas('data/user_classification.csv') + await self.set_article_datas('data/article_data.csv') + await self.set_interaction_datas('data/interaction_data.csv') + + async def set_user_datas(self, user_data_path): self.user_data_path = user_data_path - self.user_datas = pd.read_csv(user_data_path) - - def set_article_datas(self, article_data_path): + self.user_datas = await self.read_csv(user_data_path) + + async def set_article_datas(self, article_data_path): self.article_data_path = article_data_path - self.article_datas = pd.read_csv(article_data_path) - - def set_interaction_datas(self, interaction_data_path): + self.article_datas = await self.read_csv(article_data_path) + + async def set_interaction_datas(self, interaction_data_path): self.interaction_data_path = interaction_data_path - self.interaction_datas = pd.read_csv(interaction_data_path) - + self.interaction_datas = await self.read_csv(interaction_data_path) + + async def read_csv(self, path): + async with aiofiles.open(path, mode='r') as file: + data = await file.read() + return pd.read_csv(io.StringIO(data)) + def make_dataset(self): self.user_datas = pd.get_dummies(self.user_datas) - self.user_features_col = self.user_datas.drop(columns =['classification_id']).columns.values - self.user_feat = self.user_datas.drop(columns =['classification_id']).to_dict(orient='records') - + self.user_features_col = self.user_datas.drop(columns=['classification_id']).columns.values + self.user_feat = self.user_datas.drop(columns=['classification_id']).to_dict(orient='records') + self.item_features = self.article_datas - self.item_features_col = self.item_features.drop(columns=['article_id']).columns.values - self.item_feat = self.item_features.drop(columns =['article_id']).to_dict(orient='records') - + self.item_features_col = self.item_features.drop(columns=['article_id', 'created at']).columns.values + self.item_feat = self.item_features.drop(columns=['article_id', 'created at']).to_dict(orient='records') + self.dataset = Dataset() - self.dataset.fit(users=[x for x in self.user_datas['classification_id']], items=[x for x in self.article_datas['article_id']], item_features=self.item_features_col, user_features=self.user_features_col) - - self.item_features = self.dataset.build_item_features((x,y) for x,y in zip(self.item_features['article_id'], self.item_feat)) - self.user_features = self.dataset.build_user_features((x,y) for x,y in zip(self.user_datas['classification_id'], self.user_feat)) - - (self.interactions, self.weights) = self.dataset.build_interactions((x, y) - for x,y in zip(self.interaction_datas['classification_id'], self.interaction_datas['article_id'])) - + self.dataset.fit(users=[x for x in self.user_datas['classification_id']], + items=[x for x in self.article_datas['article_id']], + item_features=self.item_features_col, + user_features=self.user_features_col) + + self.item_features = self.dataset.build_item_features((x, y) for x, y in zip(self.item_features['article_id'], self.item_feat)) + self.user_features = self.dataset.build_user_features((x, y) for x, y in zip(self.user_datas['classification_id'], self.user_feat)) + + (self.interactions, self.weights) = self.dataset.build_interactions((x, y, z * self.get_time_weight(y)) + for x, y, z in zip( + self.interaction_datas['classification_id'], + self.interaction_datas['article_id'], + self.interaction_datas['duration_time'])) + num_users, num_items = self.dataset.interactions_shape() print('Num users: {}, num_items {}.'.format(num_users, num_items)) - - def make_model(self, n_components:int = 30, loss:str = 'warp', epoch:int = 30, num_thread:int = 4): + + def make_model(self, n_components: int = 30, loss: str = 'warp', epoch: int = 30, num_thread: int = 4): self.n_components = n_components self.loss = loss self.epoch = epoch self.num_thread = num_thread - self.model = LightFM(no_components= self.n_components, loss=self.loss, random_state = 1616) - - def fit_model(self): + self.model = LightFM(no_components=self.n_components, loss=self.loss, random_state=1616) + + async def fit_model(self): + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as pool: + await loop.run_in_executor(pool, self.sync_fit_model) + + def sync_fit_model(self): self.make_dataset() self.make_model() - # self.train, self.test = random_train_test_split(self.interactions,test_percentage=0.2, random_state=779) - # self.train_w, self.test_w = random_train_test_split(self.weights, test_percentage=0.2, random_state=779) - self.model.fit(self.interactions, user_features= self.user_features, item_features= self.item_features, epochs=self.epoch,num_threads = self.num_thread, sample_weight = self.weights) - - def get_top_n_articles(self, user_id:int, article_num:int): + self.model.fit(self.interactions, user_features=self.user_features, item_features=self.item_features, epochs=self.epoch, num_threads=self.num_thread, sample_weight=self.weights) + + def get_top_n_articles(self, user_id: int, article_num: int): item_ids = np.arange(self.interactions.shape[1]) # 예측할 아이템 ID 배열 predictions = self.model.predict(user_id, item_ids) top_items = self.article_datas.iloc[np.argsort(-predictions)[:article_num]] return top_items - + def similar_items(self, item_id, N=10): - item_bias ,item_representations = self.model.get_item_representations(features=self.item_features) + item_bias, item_representations = self.model.get_item_representations(features=self.item_features) scores = item_representations.dot(item_representations[item_id, :]) best = np.argpartition(scores, -N)[-N:] - + return self.article_datas.iloc[best] - - # def items_for_new_user(self, new_user_data:UserDataInfo, N:int): - # new_user = new_user_data.get_user_data() - # print(new_user) - # new_user = csr_matrix(new_user) - # scores_new_user = self.model.predict(user_ids = 0,item_ids = np.arange(self.interactions.shape[1]), user_features=new_user) - # top_items_new_user = self.article_datas.iloc[np.argsort(-scores_new_user)] - # return top_items_new_user[:N] - - def add_interaction_data(self, interaction_data:InteractionDataInfo): - df = pd.read_csv(self.interaction_data_path) + + def get_time_weight(self, article_id): + today = datetime.now().date() + date_obj = datetime.strptime(self.article_datas[self.article_datas['article_id'] == article_id]['created at'].iloc[0], "%Y-%m-%d").date() + difference = today - date_obj + return max(1 - ((difference.days // 30) / 5), 0) + + async def add_interaction_data(self, interaction_data: InteractionDataInfo): + df = await self.read_csv(self.interaction_data_path) df = pd.concat([df, interaction_data.interaction_data]) - df.to_csv(self.interaction_data_path, index=False) - print("interactin is added") - - def add_article_data(self, article_data:ArticleDataInfo): - df = pd.read_csv(self.article_data_path) + await self.write_csv(df, self.interaction_data_path) + print("interaction is added") + + async def add_article_data(self, article_data: ArticleDataInfo): + df = await self.read_csv(self.article_data_path) df = pd.concat([df, article_data.article_data]) - df.to_csv(self.article_data_path, index=False) + await self.write_csv(df, self.article_data_path) print("article is added") + + async def write_csv(self, df, path): + async with aiofiles.open(path, mode='w') as file: + await file.write(df.to_csv(index=False)) + +# Example usage: +# recommend_service = RecommendService() +# asyncio.run(recommend_service.fit_model()) +# print(recommend_service.get_top_n_articles(1, 5)) +# print(recommend_service.similar_items(1)) +# asyncio.run(recommend_service.add_article_data(ArticleDataInfo(101, 'Politics and Society', '2024-07-01'))) +# asyncio.run(recommend_service.add_interaction_data(InteractionDataInfo(101, 101, 5)))