diff --git a/DNoiSe.py b/DNoiSe.py index b7923ef..1aad50b 100644 --- a/DNoiSe.py +++ b/DNoiSe.py @@ -9,7 +9,6 @@ import pandas import urllib import random -import zipfile import sqlite3 import datetime import requests @@ -56,18 +55,6 @@ def download_domains(): print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Can't download the domain list. Quitting." exit() - # Unzip the list - try: - print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Unzipping…" - zip_ref = zipfile.ZipFile(working_directory+"domains.zip", "r") - zip_ref.extractall(working_directory) - zip_ref.close() - - os.remove(working_directory+"domains.zip") - except: - print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Extraction failed. Quitting." - exit() - # Create a SQLite database try: db = sqlite3.connect(working_directory+"domains.sqlite") @@ -75,7 +62,7 @@ def download_domains(): # Load the CSV into our database print >> log_file, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(datetime.datetime.now().timetuple())))+" Importing to sqlite…" - df = pandas.read_csv(working_directory+"top-1m.csv", names = ["ID", "Domain"]) + df = pandas.read_csv(working_directory + "domains.zip", compression = 'zip', names = ["ID", "Domain"]) df.to_sql("Domains", db, if_exists = "append", index = False) db.close()