website: Use separate model and dataset files

* This should allow the live website to use the files
tedgravlin · Dec 2, 2023 · 91f30d1 · 91f30d1
1 parent 9b02edf
commit 91f30d1
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 9 deletions.
diff --git a/docs/dataset/testdataset.csv b/docs/dataset/testdataset.csv
@@ -0,0 +1,18 @@
+Label,Num Of Sections,TLD,TLD Length,Domain,Domain Length,URL
+good,2,com,3,google,6,google.com
+good,2,nl,2,google,6,google.nl
+good,2,edu,3,emich,5,emich.edu
+good,3,edu,3,emich,5,canvas.emich.edu
+good,3,com,3,www.theverge,8,www.theverge.com
+good,2,com,3,theverge,8,theverge.com
+bad,2,com,3,824555,6,824555.com
+bad,2,com,3,retajconsultancy,16,retajconsultancy.com
+bad,2,info,4,9779,4,9779.info
+bad,2,com,3,chinacxyy,9,chinacxyy.com
+bad,3,vn,2,com,3,grasslandhotel.com.vn
+bad,3,com,3,readmyweather,13,microencapsulation.readmyweather.com
+bad,2,com,3,0068555,7,0068555.com
+bad,2,com,3,haishundl,9,haishundl.com
+bad,2,nl,2,zoetekroon,10,zoetekroon.nl
+bad,2,com,3,socialsocietyedu,16,socialsocietyedu.com
+bad,2,ru,2,3cf,3,3cf.ru
diff --git a/docs/main.py b/docs/main.py
@@ -4,8 +4,8 @@
 from pyscript import display
 from sklearn.preprocessing import StandardScaler
 from scipy.sparse import hstack
-import os
 from pyweb import pydom
+from os.path import exists
 
 # Get the input container element
 input_container = pydom['#input-container']
@@ -21,13 +21,9 @@ def load_files():
     tfidf = joblib.load('tfidf.pkl')
     test_dataset = pd.read_csv("testdataset.csv")
 
-    progress_text.innerText = "Model load complete."
-
     return model, tfidf, test_dataset
 
 def test_model(model, tfidf, test_dataset):
-    progress_text.innerText = "Testing URL against model..."
-
     # Turn the test dataset into a pandas data frame
     dataframe = pd.DataFrame(test_dataset)
     x = dataframe[['Num Of Sections', 'TLD', 'TLD Length', 'Domain', 'Domain Length', 'URL']]

diff --git a/docs/models/model.pkl b/docs/models/model.pkl
diff --git a/docs/models/tfidf.pkl b/docs/models/tfidf.pkl
diff --git a/docs/pyscript.json b/docs/pyscript.json
@@ -5,8 +5,8 @@
     "scikit-learn"
   ],
   "files": {
-    "../models/model.pkl": "model.pkl",
-    "../models/tfidf.pkl": "tfidf.pkl",
-    "../dataset/testdataset.csv": "testdataset.csv"
+    "./models/model.pkl": "model.pkl",
+    "./models/tfidf.pkl": "tfidf.pkl",
+    "./dataset/testdataset.csv": "testdataset.csv"
   }
 }
diff --git a/models/readme.md b/models/readme.md