From ee09c472a0d2921e754bcd322bb69921e6e8308b Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Wed, 10 Jul 2024 19:39:23 +0900
Subject: [PATCH 1/7] =?UTF-8?q?modify=20article=5Fdata.csv=20:=20=EB=82=A0?=
 =?UTF-8?q?=EC=A7=9C=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/article_data.csv | 204 +++++++++++++++++++++---------------------
 1 file changed, 102 insertions(+), 102 deletions(-)

diff --git a/data/article_data.csv b/data/article_data.csv
index 54bff6a..787a45b 100644
--- a/data/article_data.csv
+++ b/data/article_data.csv
@@ -1,102 +1,102 @@
-article_id,Economy and Business,Politics and Society,Technology and Culture,Sports and Leisure,Opinion and Analysis
-1,0.0,1.0,0.0,0.0,0.0
-2,0.0,1.0,0.0,0.0,0.0
-3,0.0,1.0,0.0,0.0,0.0
-4,0.0,0.0,1.0,0.0,0.0
-5,0.0,0.0,1.0,0.0,0.0
-6,0.0,1.0,0.0,0.0,0.0
-7,0.0,0.0,0.0,1.0,0.0
-8,1.0,0.0,0.0,0.0,0.0
-9,1.0,0.0,0.0,0.0,0.0
-10,0.0,0.0,0.0,1.0,0.0
-11,0.0,1.0,0.0,0.0,0.0
-12,0.0,0.0,1.0,0.0,0.0
-13,1.0,0.0,0.0,0.0,0.0
-14,0.0,0.0,0.0,0.0,1.0
-15,0.0,0.0,0.0,0.0,1.0
-16,0.0,0.0,0.0,1.0,0.0
-17,0.0,1.0,0.0,0.0,0.0
-18,1.0,0.0,0.0,0.0,0.0
-19,0.0,1.0,0.0,0.0,0.0
-20,1.0,0.0,0.0,0.0,0.0
-21,0.0,0.0,0.0,1.0,0.0
-22,0.0,0.0,0.0,1.0,0.0
-23,0.0,0.0,0.0,0.0,1.0
-24,0.0,1.0,0.0,0.0,0.0
-25,0.0,0.0,0.0,0.0,1.0
-26,0.0,1.0,0.0,0.0,0.0
-27,0.0,0.0,0.0,0.0,1.0
-28,0.0,0.0,1.0,0.0,0.0
-29,0.0,0.0,1.0,0.0,0.0
-30,0.0,0.0,1.0,0.0,0.0
-31,0.0,0.0,1.0,0.0,0.0
-32,1.0,0.0,0.0,0.0,0.0
-33,0.0,0.0,1.0,0.0,0.0
-34,0.0,0.0,0.0,1.0,0.0
-35,0.0,0.0,0.0,1.0,0.0
-36,1.0,0.0,0.0,0.0,0.0
-37,0.0,0.0,1.0,0.0,0.0
-38,0.0,0.0,1.0,0.0,0.0
-39,0.0,0.0,1.0,0.0,0.0
-40,0.0,0.0,0.0,0.0,1.0
-41,0.0,1.0,0.0,0.0,0.0
-42,0.0,0.0,0.0,0.0,1.0
-43,0.0,1.0,0.0,0.0,0.0
-44,0.0,0.0,1.0,0.0,0.0
-45,0.0,0.0,1.0,0.0,0.0
-46,0.0,0.0,0.0,0.0,1.0
-47,0.0,0.0,0.0,0.0,1.0
-48,0.0,1.0,0.0,0.0,0.0
-49,0.0,0.0,0.0,1.0,0.0
-50,0.0,1.0,0.0,0.0,0.0
-51,0.0,0.0,0.0,0.0,1.0
-52,0.0,0.0,0.0,0.0,1.0
-53,1.0,0.0,0.0,0.0,0.0
-54,0.0,0.0,0.0,0.0,1.0
-55,1.0,0.0,0.0,0.0,0.0
-56,0.0,0.0,0.0,1.0,0.0
-57,0.0,1.0,0.0,0.0,0.0
-58,0.0,1.0,0.0,0.0,0.0
-59,1.0,0.0,0.0,0.0,0.0
-60,0.0,1.0,0.0,0.0,0.0
-61,0.0,0.0,0.0,0.0,1.0
-62,0.0,0.0,1.0,0.0,0.0
-63,1.0,0.0,0.0,0.0,0.0
-64,0.0,1.0,0.0,0.0,0.0
-65,1.0,0.0,0.0,0.0,0.0
-66,1.0,0.0,0.0,0.0,0.0
-67,0.0,0.0,1.0,0.0,0.0
-68,0.0,0.0,0.0,0.0,1.0
-69,1.0,0.0,0.0,0.0,0.0
-70,0.0,1.0,0.0,0.0,0.0
-71,0.0,0.0,0.0,1.0,0.0
-72,1.0,0.0,0.0,0.0,0.0
-73,1.0,0.0,0.0,0.0,0.0
-74,0.0,0.0,1.0,0.0,0.0
-75,0.0,0.0,0.0,0.0,1.0
-76,0.0,0.0,0.0,1.0,0.0
-77,0.0,1.0,0.0,0.0,0.0
-78,0.0,0.0,0.0,1.0,0.0
-79,0.0,1.0,0.0,0.0,0.0
-80,0.0,0.0,0.0,0.0,1.0
-81,0.0,1.0,0.0,0.0,0.0
-82,0.0,0.0,1.0,0.0,0.0
-83,0.0,0.0,1.0,0.0,0.0
-84,0.0,0.0,1.0,0.0,0.0
-85,0.0,0.0,1.0,0.0,0.0
-86,0.0,0.0,0.0,1.0,0.0
-87,0.0,0.0,0.0,0.0,1.0
-88,0.0,1.0,0.0,0.0,0.0
-89,0.0,1.0,0.0,0.0,0.0
-90,0.0,0.0,1.0,0.0,0.0
-91,0.0,0.0,1.0,0.0,0.0
-92,1.0,0.0,0.0,0.0,0.0
-93,0.0,0.0,0.0,0.0,1.0
-94,0.0,0.0,0.0,1.0,0.0
-95,0.0,1.0,0.0,0.0,0.0
-96,1.0,0.0,0.0,0.0,0.0
-97,1.0,0.0,0.0,0.0,0.0
-98,0.0,1.0,0.0,0.0,0.0
-99,0.0,0.0,0.0,1.0,0.0
-100,1.0,0.0,0.0,0.0,0.0
-101,0.0,1.0,0.0,0.0,0.0
+article_id,Economy and Business,Politics and Society,Technology and Culture,Sports and Leisure,Opinion and Analysis,created at
+1,0.0,1.0,0.0,0.0,0.0,2023-01-01
+2,0.0,1.0,0.0,0.0,0.0,2023-01-02
+3,0.0,1.0,0.0,0.0,0.0,2023-01-03
+4,0.0,0.0,1.0,0.0,0.0,2023-01-04
+5,0.0,0.0,1.0,0.0,0.0,2023-01-05
+6,0.0,1.0,0.0,0.0,0.0,2023-01-06
+7,0.0,0.0,0.0,1.0,0.0,2023-01-07
+8,1.0,0.0,0.0,0.0,0.0,2023-01-08
+9,1.0,0.0,0.0,0.0,0.0,2023-01-09
+10,0.0,0.0,0.0,1.0,0.0,2023-01-10
+11,0.0,1.0,0.0,0.0,0.0,2023-01-11
+12,0.0,0.0,1.0,0.0,0.0,2023-01-12
+13,1.0,0.0,0.0,0.0,0.0,2023-01-13
+14,0.0,0.0,0.0,0.0,1.0,2023-01-14
+15,0.0,0.0,0.0,0.0,1.0,2023-01-15
+16,0.0,0.0,0.0,1.0,0.0,2023-01-16
+17,0.0,1.0,0.0,0.0,0.0,2023-01-17
+18,1.0,0.0,0.0,0.0,0.0,2023-01-18
+19,0.0,1.0,0.0,0.0,0.0,2023-01-19
+20,1.0,0.0,0.0,0.0,0.0,2023-01-20
+21,0.0,0.0,0.0,1.0,0.0,2023-01-21
+22,0.0,0.0,0.0,1.0,0.0,2023-01-22
+23,0.0,0.0,0.0,0.0,1.0,2023-01-23
+24,0.0,1.0,0.0,0.0,0.0,2023-01-24
+25,0.0,0.0,0.0,0.0,1.0,2023-01-25
+26,0.0,1.0,0.0,0.0,0.0,2023-01-26
+27,0.0,0.0,0.0,0.0,1.0,2023-01-27
+28,0.0,0.0,1.0,0.0,0.0,2023-01-28
+29,0.0,0.0,1.0,0.0,0.0,2023-01-29
+30,0.0,0.0,1.0,0.0,0.0,2023-01-30
+31,0.0,0.0,1.0,0.0,0.0,2023-01-31
+32,1.0,0.0,0.0,0.0,0.0,2023-02-01
+33,0.0,0.0,1.0,0.0,0.0,2023-02-02
+34,0.0,0.0,0.0,1.0,0.0,2023-02-03
+35,0.0,0.0,0.0,1.0,0.0,2023-02-04
+36,1.0,0.0,0.0,0.0,0.0,2023-02-05
+37,0.0,0.0,1.0,0.0,0.0,2023-02-06
+38,0.0,0.0,1.0,0.0,0.0,2023-02-07
+39,0.0,0.0,1.0,0.0,0.0,2023-02-08
+40,0.0,0.0,0.0,0.0,1.0,2023-02-09
+41,0.0,1.0,0.0,0.0,0.0,2023-02-10
+42,0.0,0.0,0.0,0.0,1.0,2023-02-11
+43,0.0,1.0,0.0,0.0,0.0,2023-02-12
+44,0.0,0.0,1.0,0.0,0.0,2023-02-13
+45,0.0,0.0,1.0,0.0,0.0,2023-02-14
+46,0.0,0.0,0.0,0.0,1.0,2023-02-15
+47,0.0,0.0,0.0,0.0,1.0,2023-02-16
+48,0.0,1.0,0.0,0.0,0.0,2023-02-17
+49,0.0,0.0,0.0,1.0,0.0,2023-02-18
+50,0.0,1.0,0.0,0.0,0.0,2023-02-19
+51,0.0,0.0,0.0,0.0,1.0,2023-02-20
+52,0.0,0.0,0.0,0.0,1.0,2023-02-21
+53,1.0,0.0,0.0,0.0,0.0,2023-02-22
+54,0.0,0.0,0.0,0.0,1.0,2023-02-23
+55,1.0,0.0,0.0,0.0,0.0,2023-02-24
+56,0.0,0.0,0.0,1.0,0.0,2023-02-25
+57,0.0,1.0,0.0,0.0,0.0,2023-02-26
+58,0.0,1.0,0.0,0.0,0.0,2023-02-27
+59,1.0,0.0,0.0,0.0,0.0,2023-02-28
+60,0.0,1.0,0.0,0.0,0.0,2023-03-01
+61,0.0,0.0,0.0,0.0,1.0,2023-03-02
+62,0.0,0.0,1.0,0.0,0.0,2023-03-03
+63,1.0,0.0,0.0,0.0,0.0,2023-03-04
+64,0.0,1.0,0.0,0.0,0.0,2023-03-05
+65,1.0,0.0,0.0,0.0,0.0,2023-03-06
+66,1.0,0.0,0.0,0.0,0.0,2023-03-07
+67,0.0,0.0,1.0,0.0,0.0,2023-03-08
+68,0.0,0.0,0.0,0.0,1.0,2023-03-09
+69,1.0,0.0,0.0,0.0,0.0,2023-03-10
+70,0.0,1.0,0.0,0.0,0.0,2023-03-11
+71,0.0,0.0,0.0,1.0,0.0,2023-03-12
+72,1.0,0.0,0.0,0.0,0.0,2023-03-13
+73,1.0,0.0,0.0,0.0,0.0,2023-03-14
+74,0.0,0.0,1.0,0.0,0.0,2023-03-15
+75,0.0,0.0,0.0,0.0,1.0,2023-03-16
+76,0.0,0.0,0.0,1.0,0.0,2023-03-17
+77,0.0,1.0,0.0,0.0,0.0,2023-03-18
+78,0.0,0.0,0.0,1.0,0.0,2023-03-19
+79,0.0,1.0,0.0,0.0,0.0,2023-03-20
+80,0.0,0.0,0.0,0.0,1.0,2023-03-21
+81,0.0,1.0,0.0,0.0,0.0,2023-03-22
+82,0.0,0.0,1.0,0.0,0.0,2023-03-23
+83,0.0,0.0,1.0,0.0,0.0,2023-03-24
+84,0.0,0.0,1.0,0.0,0.0,2023-03-25
+85,0.0,0.0,1.0,0.0,0.0,2023-03-26
+86,0.0,0.0,0.0,1.0,0.0,2023-03-27
+87,0.0,0.0,0.0,0.0,1.0,2023-03-28
+88,0.0,1.0,0.0,0.0,0.0,2023-03-29
+89,0.0,1.0,0.0,0.0,0.0,2023-03-30
+90,0.0,0.0,1.0,0.0,0.0,2023-03-31
+91,0.0,0.0,1.0,0.0,0.0,2023-04-01
+92,1.0,0.0,0.0,0.0,0.0,2023-04-02
+93,0.0,0.0,0.0,0.0,1.0,2023-04-03
+94,0.0,0.0,0.0,1.0,0.0,2023-04-04
+95,0.0,1.0,0.0,0.0,0.0,2023-04-05
+96,1.0,0.0,0.0,0.0,0.0,2023-04-06
+97,1.0,0.0,0.0,0.0,0.0,2023-04-07
+98,0.0,1.0,0.0,0.0,0.0,2023-04-08
+99,0.0,0.0,0.0,1.0,0.0,2023-04-09
+100,1.0,0.0,0.0,0.0,0.0,2023-04-10
+101,0.0,1.0,0.0,0.0,0.0,2023-04-11

From d46403f8425614652a9b25d034ad4b4aa87f65e5 Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Wed, 10 Jul 2024 19:39:50 +0900
Subject: [PATCH 2/7] =?UTF-8?q?modify=20interaction=5Fdata.csv=20:=20?=
 =?UTF-8?q?=ED=95=84=EC=9A=94=EC=97=86=EB=8A=94=20=EC=97=B4=20=EC=A0=9C?=
 =?UTF-8?q?=EA=B1=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/interaction_data.csv | 131 +++++++++++++++++++-------------------
 1 file changed, 65 insertions(+), 66 deletions(-)

diff --git a/data/interaction_data.csv b/data/interaction_data.csv
index f10db4f..87d6b40 100644
--- a/data/interaction_data.csv
+++ b/data/interaction_data.csv
@@ -1,66 +1,65 @@
-classification_id,article_id,duration_time,user_id
-13.0,41,98,
-20.0,74,65,
-3.0,1,1,
-18.0,50,63,
-1.0,42,64,
-15.0,61,33,
-19.0,47,83,
-4.0,27,17,
-12.0,58,59,
-6.0,38,92,
-16.0,60,75,
-14.0,72,77,
-17.0,75,50,
-11.0,62,20,
-7.0,36,79,
-5.0,20,27,
-9.0,19,45,
-8.0,19,30,
-10.0,44,23,
-2.0,8,14,
-2.0,58,53,
-17.0,87,54,
-14.0,17,6,
-1.0,12,12,
-12.0,15,92,
-11.0,19,30,
-12.0,62,6,
-20.0,100,38,
-10.0,32,37,
-19.0,57,3,
-5.0,76,94,
-18.0,53,39,
-4.0,91,18,
-14.0,16,5,
-12.0,45,74,
-13.0,77,21,
-2.0,56,69,
-9.0,36,66,
-7.0,28,38,
-15.0,24,57,
-15.0,39,98,
-10.0,54,33,
-8.0,85,95,
-20.0,36,10,
-7.0,8,13,
-14.0,67,26,
-16.0,16,34,
-10.0,70,10,
-6.0,80,85,
-5.0,14,34,
-6.0,50,63,
-1.0,96,54,
-8.0,38,92,
-3.0,83,29,
-11.0,91,72,
-7.0,92,64,
-2.0,36,73,
-17.0,63,92,
-9.0,75,73,
-16.0,39,85,
-1.0,61,74,
-20.0,76,90,
-6.0,90,85,
-12.0,19,30,
-,101,5,101.0
+classification_id,article_id,duration_time
+13.0,41,98
+20.0,74,65
+3.0,1,1
+18.0,50,63
+1.0,42,64
+15.0,61,33
+19.0,47,83
+4.0,27,17
+12.0,58,59
+6.0,38,92
+16.0,60,75
+14.0,72,77
+17.0,75,50
+11.0,62,20
+7.0,36,79
+5.0,20,27
+9.0,19,45
+8.0,19,30
+10.0,44,23
+2.0,8,14
+2.0,58,53
+17.0,87,54
+14.0,17,6
+1.0,12,12
+12.0,15,92
+11.0,19,30
+12.0,62,6
+20.0,100,38
+10.0,32,37
+19.0,57,3
+5.0,76,94
+18.0,53,39
+4.0,91,18
+14.0,16,5
+12.0,45,74
+13.0,77,21
+2.0,56,69
+9.0,36,66
+7.0,28,38
+15.0,24,57
+15.0,39,98
+10.0,54,33
+8.0,85,95
+20.0,36,10
+7.0,8,13
+14.0,67,26
+16.0,16,34
+10.0,70,10
+6.0,80,85
+5.0,14,34
+6.0,50,63
+1.0,96,54
+8.0,38,92
+3.0,83,29
+11.0,91,72
+7.0,92,64
+2.0,36,73
+17.0,63,92
+9.0,75,73
+16.0,39,85
+1.0,61,74
+20.0,76,90
+6.0,90,85
+12.0,19,30
\ No newline at end of file

From 306ca692fc5122255bc641ac8a4171c8c3210d80 Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Wed, 10 Jul 2024 19:40:46 +0900
Subject: [PATCH 3/7] =?UTF-8?q?feat=20recommend=5Fservice.py=20:=20?=
 =?UTF-8?q?=EC=B5=9C=EC=8B=A0=20=EA=B8=B0=EC=82=AC=EC=97=90=20=EA=B0=80?=
 =?UTF-8?q?=EC=A4=91=EC=B9=98=EA=B0=80=20=EC=B6=94=EA=B0=80=EB=90=98?=
 =?UTF-8?q?=EB=8F=84=EB=A1=9D=20=EC=BD=94=EB=93=9C=20=EB=B3=80=EA=B2=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 recommend_service.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/recommend_service.py b/recommend_service.py
index e1036f8..5cb2f5c 100644
--- a/recommend_service.py
+++ b/recommend_service.py
@@ -6,18 +6,21 @@
 import numpy as np
 from lightfm.cross_validation import random_train_test_split
 import os
-from scipy.sparse import csr_matrix
+from scipy.sparse import csr_matrix, coo_matrix
 import pandas as pd
 
+from datetime import datetime
+
 class ArticleDataInfo:
-    def __init__(self, article_id, category):
+    def __init__(self, article_id, category, created_at):
         self.article_data = pd.DataFrame({
             'article_id' : article_id,
             'Economy and Business' : [0], 
             'Politics and Society' : [0], 
             'Technology and Culture' : [0], 
             'Sports and Leisure' : [0], 
-            'Opinion and Analysis' : [0]
+            'Opinion and Analysis' : [0],
+            'created at' : [created_at]
             })
         self.article_data.iloc[0][category] = 1
 
@@ -53,8 +56,8 @@ def make_dataset(self):
         self.user_feat = self.user_datas.drop(columns =['classification_id']).to_dict(orient='records')
         
         self.item_features = self.article_datas
-        self.item_features_col = self.item_features.drop(columns=['article_id']).columns.values
-        self.item_feat = self.item_features.drop(columns =['article_id']).to_dict(orient='records')
+        self.item_features_col = self.item_features.drop(columns=['article_id', 'created at']).columns.values
+        self.item_feat = self.item_features.drop(columns =['article_id', 'created at']).to_dict(orient='records')
         
         self.dataset = Dataset()
         self.dataset.fit(users=[x for x in self.user_datas['classification_id']], items=[x for x in self.article_datas['article_id']], item_features=self.item_features_col, user_features=self.user_features_col)
@@ -62,8 +65,11 @@ def make_dataset(self):
         self.item_features = self.dataset.build_item_features((x,y) for x,y in zip(self.item_features['article_id'], self.item_feat))
         self.user_features = self.dataset.build_user_features((x,y) for x,y in zip(self.user_datas['classification_id'], self.user_feat))
         
-        (self.interactions, self.weights) = self.dataset.build_interactions((x, y)
-                                                    for x,y in zip(self.interaction_datas['classification_id'], self.interaction_datas['article_id']))
+        (self.interactions, self.weights) = self.dataset.build_interactions((x, y, z * self.get_time_weight(y))
+                                                    for x,y, z in zip(
+                                                        self.interaction_datas['classification_id'], 
+                                                        self.interaction_datas['article_id'], 
+                                                        self.interaction_datas['duration_time']))
     
         num_users, num_items = self.dataset.interactions_shape()
         print('Num users: {}, num_items {}.'.format(num_users, num_items))
@@ -78,9 +84,7 @@ def make_model(self, n_components:int = 30, loss:str = 'warp', epoch:int = 30, n
     def fit_model(self): 
         self.make_dataset()
         self.make_model()
-        # self.train, self.test = random_train_test_split(self.interactions,test_percentage=0.2, random_state=779)
-        # self.train_w, self.test_w = random_train_test_split(self.weights, test_percentage=0.2, random_state=779)
-        self.model.fit(self.interactions,  user_features= self.user_features, item_features= self.item_features, epochs=self.epoch,num_threads = self.num_thread, sample_weight = self.weights)
+        self.model.fit(self.interactions, user_features= self.user_features, item_features= self.item_features, epochs=self.epoch,num_threads = self.num_thread, sample_weight = self.weights)
         
     def get_top_n_articles(self, user_id:int, article_num:int):
         item_ids = np.arange(self.interactions.shape[1])  # 예측할 아이템 ID 배열
@@ -104,6 +108,12 @@ def similar_items(self, item_id, N=10):
     #     scores_new_user = self.model.predict(user_ids = 0,item_ids = np.arange(self.interactions.shape[1]), user_features=new_user)
     #     top_items_new_user = self.article_datas.iloc[np.argsort(-scores_new_user)]
     #     return top_items_new_user[:N]
+    def get_time_weight(self, article_id):
+        today = datetime.now().date()
+        date_obj = datetime.strptime(self.article_datas[self.article_datas['article_id'] == article_id]['created at'].iloc[0], "%Y-%m-%d").date()
+        difference = today - date_obj
+        return max(1 - ((difference.days//30)/5), 0)
+            
         
     def add_interaction_data(self, interaction_data:InteractionDataInfo):
         df = pd.read_csv(self.interaction_data_path)

From df02ba28053d3333df54f35e93edf3a052f8390b Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Wed, 10 Jul 2024 19:41:31 +0900
Subject: [PATCH 4/7] =?UTF-8?q?feat=20recommend=5Fservice.py=20:=20article?=
 =?UTF-8?q?=EC=9D=B4=20=EC=B6=94=EA=B0=80=EB=90=A0=20=EB=95=8C=20=EB=B6=80?=
 =?UTF-8?q?=EB=B6=84=EC=A0=81=EC=9D=B8=20model=20fit=20=ED=95=A8=EC=88=98?=
 =?UTF-8?q?=20=EA=B5=AC=ED=98=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 recommend_service.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/recommend_service.py b/recommend_service.py
index 5cb2f5c..47725b6 100644
--- a/recommend_service.py
+++ b/recommend_service.py
@@ -114,6 +114,9 @@ def get_time_weight(self, article_id):
         difference = today - date_obj
         return max(1 - ((difference.days//30)/5), 0)
             
+    def fit_model_partialy(self):
+        self.make_dataset
+        self.model.fit_partial(self.interactions, item_features=self.item_features)
         
     def add_interaction_data(self, interaction_data:InteractionDataInfo):
         df = pd.read_csv(self.interaction_data_path)

From dea2cbdc804511433855d7ac8a319213bf393ca7 Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Wed, 10 Jul 2024 19:41:54 +0900
Subject: [PATCH 5/7] =?UTF-8?q?refact=20recommend=5Fservice.py=20:=20?=
 =?UTF-8?q?=ED=95=84=EC=9A=94=EC=97=86=EB=8A=94=20=EC=BD=94=EB=93=9C=20?=
 =?UTF-8?q?=EC=A0=9C=EA=B1=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 recommend_service.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/recommend_service.py b/recommend_service.py
index 47725b6..0a432f3 100644
--- a/recommend_service.py
+++ b/recommend_service.py
@@ -101,13 +101,6 @@ def similar_items(self, item_id, N=10):
         
         return self.article_datas.iloc[best]
     
-    # def items_for_new_user(self, new_user_data:UserDataInfo, N:int):
-    #     new_user = new_user_data.get_user_data()
-    #     print(new_user)
-    #     new_user = csr_matrix(new_user)
-    #     scores_new_user = self.model.predict(user_ids = 0,item_ids = np.arange(self.interactions.shape[1]), user_features=new_user)
-    #     top_items_new_user = self.article_datas.iloc[np.argsort(-scores_new_user)]
-    #     return top_items_new_user[:N]
     def get_time_weight(self, article_id):
         today = datetime.now().date()
         date_obj = datetime.strptime(self.article_datas[self.article_datas['article_id'] == article_id]['created at'].iloc[0], "%Y-%m-%d").date()

From 78090d56b02e032b09501c402810e1e95b0b76f8 Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Mon, 5 Aug 2024 22:56:12 +0900
Subject: [PATCH 6/7] =?UTF-8?q?feat(recommend=5Fservice.py)=20:=20?=
 =?UTF-8?q?=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 recommend_service.py | 168 ++++++++++++++++++++++++-------------------
 1 file changed, 96 insertions(+), 72 deletions(-)

diff --git a/recommend_service.py b/recommend_service.py
index 0a432f3..0013139 100644
--- a/recommend_service.py
+++ b/recommend_service.py
@@ -1,124 +1,148 @@
+import asyncio
 import warnings
-warnings.filterwarnings('ignore')
+import pandas as pd
+from datetime import datetime
+import aiofiles
+from concurrent.futures import ThreadPoolExecutor
 from lightfm import LightFM
 from lightfm.data import Dataset
-from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
 import numpy as np
-from lightfm.cross_validation import random_train_test_split
-import os
-from scipy.sparse import csr_matrix, coo_matrix
-import pandas as pd
+import io
 
-from datetime import datetime
+warnings.filterwarnings('ignore')
 
 class ArticleDataInfo:
     def __init__(self, article_id, category, created_at):
         self.article_data = pd.DataFrame({
-            'article_id' : article_id,
-            'Economy and Business' : [0], 
-            'Politics and Society' : [0], 
-            'Technology and Culture' : [0], 
-            'Sports and Leisure' : [0], 
-            'Opinion and Analysis' : [0],
-            'created at' : [created_at]
-            })
+            'article_id': [article_id],
+            'Economy and Business': [0],
+            'Politics and Society': [0],
+            'Technology and Culture': [0],
+            'Sports and Leisure': [0],
+            'Opinion and Analysis': [0],
+            'created at': [created_at]
+        })
         self.article_data.iloc[0][category] = 1
 
 class InteractionDataInfo:
     def __init__(self, user_id, article_id, duration_time):
         self.interaction_data = pd.DataFrame({
-            'user_id' : [user_id],
-            'article_id' : [article_id],
-            'duration_time' : [duration_time]
+            'user_id': [user_id],
+            'article_id': [article_id],
+            'duration_time': [duration_time]
         })
 
 class RecommendService:
     def __init__(self):
-        self.set_user_datas('data/user_classification.csv')
-        self.set_article_datas('data/article_data.csv')
-        self.set_interaction_datas('data/interaction_data.csv')
-        
-    def set_user_datas(self, user_data_path):
+        asyncio.run(self.init_data())
+
+    async def init_data(self):
+        await self.set_user_datas('data/user_classification.csv')
+        await self.set_article_datas('data/article_data.csv')
+        await self.set_interaction_datas('data/interaction_data.csv')
+
+    async def set_user_datas(self, user_data_path):
         self.user_data_path = user_data_path
-        self.user_datas = pd.read_csv(user_data_path)
-    
-    def set_article_datas(self, article_data_path):
+        self.user_datas = await self.read_csv(user_data_path)
+
+    async def set_article_datas(self, article_data_path):
         self.article_data_path = article_data_path
-        self.article_datas = pd.read_csv(article_data_path)
-        
-    def set_interaction_datas(self, interaction_data_path):
+        self.article_datas = await self.read_csv(article_data_path)
+
+    async def set_interaction_datas(self, interaction_data_path):
         self.interaction_data_path = interaction_data_path
-        self.interaction_datas = pd.read_csv(interaction_data_path)
-        
+        self.interaction_datas = await self.read_csv(interaction_data_path)
+
+    async def read_csv(self, path):
+        async with aiofiles.open(path, mode='r') as file:
+            data = await file.read()
+        return pd.read_csv(io.StringIO(data))
+
     def make_dataset(self):
         self.user_datas = pd.get_dummies(self.user_datas)
-        self.user_features_col = self.user_datas.drop(columns =['classification_id']).columns.values
-        self.user_feat = self.user_datas.drop(columns =['classification_id']).to_dict(orient='records')
-        
+        self.user_features_col = self.user_datas.drop(columns=['classification_id']).columns.values
+        self.user_feat = self.user_datas.drop(columns=['classification_id']).to_dict(orient='records')
+
         self.item_features = self.article_datas
         self.item_features_col = self.item_features.drop(columns=['article_id', 'created at']).columns.values
-        self.item_feat = self.item_features.drop(columns =['article_id', 'created at']).to_dict(orient='records')
-        
+        self.item_feat = self.item_features.drop(columns=['article_id', 'created at']).to_dict(orient='records')
+
         self.dataset = Dataset()
-        self.dataset.fit(users=[x for x in self.user_datas['classification_id']], items=[x for x in self.article_datas['article_id']], item_features=self.item_features_col, user_features=self.user_features_col)
-        
-        self.item_features = self.dataset.build_item_features((x,y) for x,y in zip(self.item_features['article_id'], self.item_feat))
-        self.user_features = self.dataset.build_user_features((x,y) for x,y in zip(self.user_datas['classification_id'], self.user_feat))
-        
+        self.dataset.fit(users=[x for x in self.user_datas['classification_id']],
+                         items=[x for x in self.article_datas['article_id']],
+                         item_features=self.item_features_col,
+                         user_features=self.user_features_col)
+
+        self.item_features = self.dataset.build_item_features((x, y) for x, y in zip(self.item_features['article_id'], self.item_feat))
+        self.user_features = self.dataset.build_user_features((x, y) for x, y in zip(self.user_datas['classification_id'], self.user_feat))
+
         (self.interactions, self.weights) = self.dataset.build_interactions((x, y, z * self.get_time_weight(y))
-                                                    for x,y, z in zip(
-                                                        self.interaction_datas['classification_id'], 
-                                                        self.interaction_datas['article_id'], 
-                                                        self.interaction_datas['duration_time']))
-    
+                                                                            for x, y, z in zip(
+                self.interaction_datas['classification_id'],
+                self.interaction_datas['article_id'],
+                self.interaction_datas['duration_time']))
+
         num_users, num_items = self.dataset.interactions_shape()
         print('Num users: {}, num_items {}.'.format(num_users, num_items))
-        
-    def make_model(self, n_components:int = 30, loss:str = 'warp', epoch:int = 30, num_thread:int = 4):
+
+    def make_model(self, n_components: int = 30, loss: str = 'warp', epoch: int = 30, num_thread: int = 4):
         self.n_components = n_components
         self.loss = loss
         self.epoch = epoch
         self.num_thread = num_thread
-        self.model = LightFM(no_components= self.n_components, loss=self.loss, random_state = 1616)
-        
-    def fit_model(self): 
+        self.model = LightFM(no_components=self.n_components, loss=self.loss, random_state=1616)
+
+    async def fit_model(self):
+        loop = asyncio.get_event_loop()
+        with ThreadPoolExecutor() as pool:
+            await loop.run_in_executor(pool, self.sync_fit_model)
+
+    def sync_fit_model(self):
         self.make_dataset()
         self.make_model()
-        self.model.fit(self.interactions, user_features= self.user_features, item_features= self.item_features, epochs=self.epoch,num_threads = self.num_thread, sample_weight = self.weights)
-        
-    def get_top_n_articles(self, user_id:int, article_num:int):
+        self.model.fit(self.interactions, user_features=self.user_features, item_features=self.item_features, epochs=self.epoch, num_threads=self.num_thread, sample_weight=self.weights)
+
+    def get_top_n_articles(self, user_id: int, article_num: int):
         item_ids = np.arange(self.interactions.shape[1])  # 예측할 아이템 ID 배열
 
         predictions = self.model.predict(user_id, item_ids)
         top_items = self.article_datas.iloc[np.argsort(-predictions)[:article_num]]
         return top_items
-    
+
     def similar_items(self, item_id, N=10):
-        item_bias ,item_representations = self.model.get_item_representations(features=self.item_features)
+        item_bias, item_representations = self.model.get_item_representations(features=self.item_features)
 
         scores = item_representations.dot(item_representations[item_id, :])
         best = np.argpartition(scores, -N)[-N:]
-        
+
         return self.article_datas.iloc[best]
-    
+
     def get_time_weight(self, article_id):
         today = datetime.now().date()
         date_obj = datetime.strptime(self.article_datas[self.article_datas['article_id'] == article_id]['created at'].iloc[0], "%Y-%m-%d").date()
         difference = today - date_obj
-        return max(1 - ((difference.days//30)/5), 0)
-            
-    def fit_model_partialy(self):
-        self.make_dataset
-        self.model.fit_partial(self.interactions, item_features=self.item_features)
-        
-    def add_interaction_data(self, interaction_data:InteractionDataInfo):
-        df = pd.read_csv(self.interaction_data_path)
+        return max(1 - ((difference.days // 30) / 5), 0)
+
+    async def add_interaction_data(self, interaction_data: InteractionDataInfo):
+        df = await self.read_csv(self.interaction_data_path)
         df = pd.concat([df, interaction_data.interaction_data])
-        df.to_csv(self.interaction_data_path, index=False)
-        print("interactin is added")
-        
-    def add_article_data(self, article_data:ArticleDataInfo):
-        df = pd.read_csv(self.article_data_path)
+        await self.write_csv(df, self.interaction_data_path)
+        print("interaction is added")
+
+    async def add_article_data(self, article_data: ArticleDataInfo):
+        df = await self.read_csv(self.article_data_path)
         df = pd.concat([df, article_data.article_data])
-        df.to_csv(self.article_data_path, index=False)
+        await self.write_csv(df, self.article_data_path)
         print("article is added")
+
+    async def write_csv(self, df, path):
+        async with aiofiles.open(path, mode='w') as file:
+            await file.write(df.to_csv(index=False))
+
+# Example usage:
+# recommend_service = RecommendService()
+# asyncio.run(recommend_service.fit_model())
+# print(recommend_service.get_top_n_articles(1, 5))
+# print(recommend_service.similar_items(1))
+# asyncio.run(recommend_service.add_article_data(ArticleDataInfo(101, 'Politics and Society', '2024-07-01')))
+# asyncio.run(recommend_service.add_interaction_data(InteractionDataInfo(101, 101, 5)))

From 2dbeef4a83d6ad9d3bb702690c7679d2c47ad7f8 Mon Sep 17 00:00:00 2001
From: mandu <jys0972@gmail.com>
Date: Mon, 5 Aug 2024 22:56:32 +0900
Subject: [PATCH 7/7] =?UTF-8?q?feat(main.py)=20:=20=ED=85=8C=EC=8A=A4?=
 =?UTF-8?q?=ED=8A=B8=EC=9A=A9=20=EB=A1=9C=EC=A7=81=20=EA=B5=AC=ED=98=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 main.py

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..9499575
--- /dev/null
+++ b/main.py
@@ -0,0 +1,31 @@
+from user_classification import user_data_to_classification_id
+from recommend_service import RecommendService
+import pandas as pd
+import asyncio
+
+async def main():
+    user_data_path = 'data/user_data_classified.csv'
+    user_datas = pd.read_csv(user_data_path)
+    recommendService = RecommendService()
+    await recommendService.fit_model()
+    user_id = 1
+    print(user_data_to_classification_id(
+            user_datas.iloc[user_id]['sex'],
+            user_datas.iloc[user_id]['issue finder'],
+            user_datas.iloc[user_id]['lifestyle consumer'],
+            user_datas.iloc[user_id]['entertainer'],
+            user_datas.iloc[user_id]['tech specialist'],
+            user_datas.iloc[user_id]['professionals']
+        ))
+    print(recommendService.get_top_n_articles(
+        user_data_to_classification_id(
+            user_datas.iloc[user_id]['sex'],
+            user_datas.iloc[user_id]['issue finder'],
+            user_datas.iloc[user_id]['lifestyle consumer'],
+            user_datas.iloc[user_id]['entertainer'],
+            user_datas.iloc[user_id]['tech specialist'],
+            user_datas.iloc[user_id]['professionals']
+        ), 5))
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file