-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMLmodel.py
75 lines (61 loc) · 2.64 KB
/
MLmodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
import sys
import json
from scipy.sparse import csr_matrix
import pymysql
from CollaborativeFiltering import CollaborativeFiltering
# Step 1: Load Interaction Data from MySQL
def load_interaction_data(host, user, password, database):
"""
Load user-article interactions from the database.
:param host: Database host
:param user: Database user
:param password: Database password
:param database: Database name
:return: DataFrame with columns [user_id, article_id, interaction]
"""
conn = pymysql.connect(host=host, user=user, password=password, database=database)
query = "SELECT user_id, article_id, interaction FROM user_article_interactions"
df = pd.read_sql_query(query, conn)
conn.close()
# Ensure interaction data is numeric
df['interaction'] = pd.to_numeric(df['interaction'], errors='coerce')
df.dropna(subset=['interaction'], inplace=True)
return df
# Step 2: Create User-Item Matrix
def create_user_item_matrix(df):
user_map = {user_id: idx for idx, user_id in enumerate(df['user_id'].unique())}
article_map = {article_id: idx for idx, article_id in enumerate(df['article_id'].unique())}
df['user_idx'] = df['user_id'].map(user_map)
df['article_idx'] = df['article_id'].map(article_map)
user_item_matrix = csr_matrix((df['interaction'], (df['user_idx'], df['article_idx'])))
return user_item_matrix, user_map, article_map
# Step 3: main.Main function to run everything
if __name__ == "__main__":
# Check if user_id is provided as an argument
# if len(sys.argv) < 2:
# print("Please provide user_id as an argument.")
# sys.exit(1)
# Get user_id from command-line argument
user_id = 1#int(sys.argv[1])
# Database credentials
host = "localhost"
user = "root"
password = "amna"
database = "news_scraper"
# Load interaction data
interaction_data = load_interaction_data(host, user, password, database)
#print("Loaded Interaction Data:")
#print(interaction_data.head())
# Create user-item matrix
user_item_matrix, user_map, article_map = create_user_item_matrix(interaction_data)
# Initialize Collaborative Filtering
cf = CollaborativeFiltering(user_item_matrix, user_map, article_map)
# Compute user similarity
cf.compute_similarity()
# Generate recommendations for a user
recommendations = cf.recommend(user_id, top_n=5)
# Reverse map article indices to IDs
reverse_article_map = {idx: article_id for article_id, idx in article_map.items()}
recommendations = [reverse_article_map[article_idx] for article_idx in recommendations]
print(json.dumps(recommendations))