-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_model.py
43 lines (32 loc) · 1.5 KB
/
train_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import json
import pickle
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from text_processing import preprocess_dataframe
def train_save_model(data, model_store_file):
# Load the JSON data into a Python list
# data = json.loads(data) # If data is in JSON format as a string, uncomment this line
# Convert the data to a Pandas DataFrame for easier manipulation
df = pd.DataFrame(data)
# Preprocess DataFrame using the function from text_processing module
df = preprocess_dataframe(df)
# Vectorize text data using Bag-of-Words (BoW)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Text'])
# Split data into Features (X) and Labels (y)
y = df['Sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build and Train the Sentiment Analysis Model (Example: Logistic Regression)
model = LogisticRegression()
model.fit(X_train, y_train)
# Evaluate Model
accuracy = model.score(X_test, y_test)
print("Model Accuracy:", accuracy)
# Save the trained model to a file
with open(model_store_file, 'wb') as model_store_file:
pickle.dump(model, model_store_file)
# Execute training and model saving if this script is directly run
if __name__ == "__main__":
print("Error. This file needs to be run from another file, with necessary arguments.")