-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·123 lines (100 loc) · 3.73 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import imaplib
import email
from email.header import decode_header
import mysql.connector
import re
import os
mydb = mysql.connector.connect(
host=os.environ["DB_HOST"],
user=os.environ["DB_USER"],
password=os.environ["DB_PASSWORD"],
database=os.environ["DB_DATABASE"],
)
imap_server = "imap.gmail.com"
myEmail = os.environ["myEmail"]
myPass = os.environ["myPass"]
try:
imap = imaplib.IMAP4_SSL(imap_server)
imap.login(myEmail, myPass)
print(f"Logged into {myEmail}")
imap.select("Music")
_, data = imap.search(None, "ALL")
email_ids = data[0].split()
total_messages = len(email_ids)
cursor = mydb.cursor()
cursor.execute("SELECT body FROM main")
existing_urls = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT subject FROM main")
existing_subjects = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT body FROM archive")
existing_link = [row[0] for row in cursor.fetchall()]
cursor.close()
for message_number in range(1, total_messages + 1):
print(f"Processing email {message_number}/{total_messages}")
_, message_data = imap.fetch(email_ids[message_number - 1], "(BODY[])")
message = message_data[0][1]
email_message = email.message_from_bytes(message)
sender = decode_header(email_message["From"])[0][0]
if isinstance(sender, bytes):
try:
sender = sender.decode(errors="ignore")
except UnicodeDecodeError:
sender = str(sender)
subject = decode_header(email_message["Subject"])[0][0]
if isinstance(subject, bytes):
try:
subject = subject.decode(errors="ignore")
except UnicodeDecodeError:
subject = str(subject)
bad_subject1 = re.findall(r"Download Now:", subject)
bad_subject2 = re.findall(r"Weekly Digest:", subject)
if bad_subject1 or bad_subject2:
continue
date = email_message["Date"]
parsed_date = email.utils.parsedate_to_datetime(date)
formatted_date = parsed_date.strftime("%Y-%m-%d")
current_email = {
"From": sender,
"Subject": subject,
"Date": formatted_date,
"Body": "",
}
for part in email_message.walk():
if part.get_content_type() == "text/plain":
try:
body_text = part.get_payload(decode=True).decode(errors="ignore")
bodyURL = re.findall("https?:\/\/[^\s]+", body_text)[0]
except UnicodeDecodeError:
error_info = {
"Subject": current_email["Subject"],
"Date": current_email["Date"],
}
break
current_email["Body"] = bodyURL
if bodyURL in existing_urls:
print(f"Skipping duplicate email with URL: {bodyURL}")
break
else:
command2 = (
"INSERT INTO main (sender, subject, body, date) VALUES (%s, %s, %s, %s)"
)
values2 = (
current_email["From"],
current_email["Subject"],
current_email["Body"],
formatted_date,
)
cursor = mydb.cursor()
cursor.execute(command2, values2)
mydb.commit()
cursor.close()
imap.store(email_ids[message_number - 1], "+FLAGS", "\\Deleted")
imap.expunge()
imap.close()
imap.logout()
if total_messages != 0:
print("Emails downloaded successfully.")
else:
print("No new emails")
except imaplib.IMAP4.error:
print(f"Failed to login or retrieve emails.")