-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlessons_get.py
443 lines (400 loc) · 26.3 KB
/
lessons_get.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# -*- coding = utf-8 -*-
# @Project -> File: Fafu_lessons -> mian
# @Time : 2022/11/8 18:20
# @Author : 田某人
# @Software: PyCharm
import re
from datetime import time, datetime
from time import sleep
from bs4 import BeautifulSoup
import requests
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from lxml import etree
from pymysql import Connection
# 发送单个用户用这个
qq_mail = ["[email protected]"]
# 发送新用户用这个
qq_mail_new = ["[email protected]"]
# vip群发位
mail_list_vip = ["[email protected]", "[email protected]"]
# 群发多个用户用这个(康正,师姐,治方师兄,彩钰,张伟,常总,大师兄,学委,智伟师兄)
# mail_list=["[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"]
# mail_all_list=["[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"]
# 起始发送邮箱
smtp_mail = "[email protected]"
# 起始发送邮箱的授权码
smtp_self = ""
# tbxUserID_self = ""
# InputPwd_self = ""
tbxUserID_self = ""
InputPwd_self = ""
# 邮件发送函数
def send_qqEmail(to, code):
mail_address = "smtp.qq.com"
mail_port = "25"
mail_user = smtp_mail
mail_pass = smtp_self
from_address = mail_user
to_address = to
msg = MIMEText('当前检测到: ' + code, 'plain', 'utf-8')
msg['From'] = "学术会议小助手"
# 群发时要和sendmail函数中的发送地址保持一个是列表一个是字符串的区别
msg['To'] = ",".join(to_address)
subject = "学术会议信息提示"
msg['Subject'] = Header(subject, 'utf-8')
try:
smtp = smtplib.SMTP()
smtp.connect(mail_address, mail_port)
smtp.login(mail_user, mail_pass)
# smtp.send_message(msg, from_address, to_address)
# 这里一定要注意msg要是字符串类型
smtp.sendmail(from_address, to, str(msg))
smtp.quit()
except smtplib.SMTPException as e:
print(e)
return False
return True
mysql_db_name = "consumer_test"
mysql_db_chart_name="test_org"
mysql_db_chart_name_backups="test_backups"
mysql_db_chart_name_test="test"
mysql_db_chart_name_test_backups="copy_test"
mysql_db_chart_name2 = "test_lessons"
conn = Connection(
host='localhost', # 主机名(ip)
port=3306, # 端口
user='root', # 账户名
password='root', # 密码
autocommit=True # 设置自动提交
)
# 该函数负责向数据库添加课程名称和加入的时间
# def mysql_insert(insert_name,time):
# # 检查连接是否断开,如果断开就进行重连
# conn.ping(reconnect=True)
# cursor = conn.cursor()
# conn.select_db(mysql_db_name)
# # print(f"{time_naw.hour},{time_naw.minute},{time_naw.second}")
# # print(time_naw)
# print(f"名字为:{insert_name},时间为:{time}")
# cursor.execute(
# # 改写法可适用于正常插入时,若数据库中已有存此项数据信息,则对需要修改的项进行更新
# # f"insert into {mysql_db_chart_name}(consumer,begin_time,end_time,mail) values('{insert_name}','{begin_time}','{end_time}','{mail}') on duplicate key update end_time='{end_time}',mail='{mail}'")
# f"insert into {mysql_db_chart_name2}(name,time) values('{insert_name}','{time}')"
# )
# # print("1")
# print(f"已添加可报名会议进入数据库,名字为:{insert_name},时间为:{time}")
# 该函数负责向数据库添加课程名称和加入的时间,地点
def mysql_insert_update(insert_name,time,place):
# 检查连接是否断开,如果断开就进行重连
conn.ping(reconnect=True)
cursor = conn.cursor()
conn.select_db(mysql_db_name)
# print(f"{time_naw.hour},{time_naw.minute},{time_naw.second}")
# print(time_naw)
# print(f"名字为:{insert_name},时间为:{time},会议地点为:{place}")
cursor.execute(
# 改写法可适用于正常插入时,若数据库中已有存此项数据信息,则对需要修改的项进行更新
# f"insert into {mysql_db_chart_name}(consumer,begin_time,end_time,mail) values('{insert_name}','{begin_time}','{end_time}','{mail}') on duplicate key update end_time='{end_time}',mail='{mail}'")
f"insert into {mysql_db_chart_name2}(name,time,place) values('{insert_name}','{time}','{place}')"
)
# print("1")
print(f"已添加可报名会议进入数据库,名字为:{insert_name},时间为:{time},会议地点为:{place}")
# 该函数最终以列表形式返回数据库中存储的邮件
def mysql_monitor():
conn.ping(reconnect=True)
cursor = conn.cursor()
conn.select_db(mysql_db_name)
cursor.execute(f"select * from {mysql_db_chart_name}")
email_list = []
results: tuple = cursor.fetchall()
for r in results:
get_num=r[5]+1
cursor.execute(
f"update {mysql_db_chart_name} set get_num='{get_num}' where consumer='{r[0]}'")
cursor.execute(
f"update {mysql_db_chart_name_backups} set get_num='{get_num}' where consumer='{r[0]}'")
# 将多个字符串类型的邮箱地址依次存储进列表,列表中各元素仍为字符串的写法如下,若只是单个则可参考mysql_insert的写法
email_list.append(list(r)[3])
# print(r[4]+"类型"+str(type(r[4])))
print(f"当前存储邮箱数目为:{len(email_list)},已作为群发email地址")
conn.close()
return email_list
# 该函数负责返回1或0,1为有重复
def mysql_monitor_lessons(name,time):
conn.ping(reconnect=True)
cursor = conn.cursor()
conn.select_db(mysql_db_name)
cursor.execute(f"select * from {mysql_db_chart_name2}")
results: tuple = cursor.fetchall()
for r in results:
if str(name)==str(r[0]) and str(time)==str(r[1]) :
print(f"此课程已添加过,无需群发邮件通知")
return 1
print(f"此课程未添加过,需要进行群发邮件通知")
return 0
# headers={
# "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
# "cookie":"ASP.NET_SessionId=23amui45chyrpybnmzjwa145; sdmenu_my_menu=00000000000001000",
# "Referer":"http://yjsjyglxt.fafu.edu.cn/tbbmgl/bmx_xsbm.aspx?lasturl=/tbbmgl/xs_bmxx_xs.aspx",
# "user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
# "connection":"keep-alive",
# }
# url ="http://yjsjyglxt.fafu.edu.cn/tbbmgl/bmx_xsbm.aspx?page=1&pageSize=10"
# rep = requests.get(url, headers=headers).content.decode('utf-8')
# print(rep)
# 工作起始时间
DAY_START = time(7, 00)
# 工作结束时间
DAY_END = time(23, 30)
# 加班工作起始时间
DAY_START1 = time(0, 00)
# 加班工作结束时间
DAY_END1 = time(1, 00)
# 程序暂停时间/秒
sleep_time = 1800
# 安全启动次数
safe_line = 4
# session安全建立次数
safe_session_line = 2
# 判断是否进入发送下班的判断语句
b = False
# 爬虫之间时间间隔系数,单位每10分钟
num = 1
# 调试的时候一定要把下面这句话注释掉!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# send_qqEmail(qq_mail_new,"\n感谢您的使用,若检测到有野生会议将立刻通知您")
# send_qqEmail(mail_all_list,"\n本学期已结束,感谢您对本脚本的支持。\n11月中旬上线至今,教务系统先后公布了15次学术会议,由于个别时间会议公布过于密集或脚本维护等因素影响,脚本实际已为您捕获并通知了10次学术会议。\n待新学期伊始,学术会议小助手将继续为您服务,期待再次相见!")
j = 0
try:
while j < safe_line:
try:
k = 0
while k < safe_session_line:
# 当前时间的时和分,类型为datetime
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
if DAY_START <= rel_time <= DAY_END or DAY_START1 <= rel_time <= DAY_END1:
print(rel_time)
print("上班啦")
# 创建session对象
session = requests.Session()
# 管理系统login页面地址
url1 = 'http://yjsjyglxt.fafu.edu.cn/login.aspx'
# 伪装游客首次读取login页面
html_data = session.post(url1)
soup = BeautifulSoup(html_data.content, 'html.parser')
# 获取必要的动态信息传入到request_data字典中
view_state = soup.find(id="__VIEWSTATE")["value"]
view_state_encrypted = soup.find(id="__VIEWSTATEENCRYPTED")["value"]
# 若未能获取到动态信息说明本次session创建失败,中断程序
if not soup.find(id="__VIEWSTATE")["value"]:
# 当前时间的时和分,类型为datetime
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute, current_time.second)
print(
"Session建立失败,未获取到对应的属性值,程序被迫中断,后续进入安全启动步骤,当前已进行%d次搜索,程序运行时间:%0.2f小时,当前时间为:%s" % (
num, num * 10 / 60, str(rel_time)))
# send_qqEmail(qq_mail,
# "Session建立失败,程序被迫中断,当前已进行%d次搜索,程序运行时间:%0.2f小时,当前时间为:%s" % (
# num, num * 10 / 60,str(rel_time)))
break
# post 要用到的data信息
request_data = {
"__VIEWSTATE": view_state,
"__VIEWSTATEENCRYPTED": view_state_encrypted,
"tbxUserID": tbxUserID_self,
# "btnLogin.x": "47",#也是动态的,可写死
# "btnLogin.y": "28",
"btnLogin.x": "0", # 也是动态的,可写死
"btnLogin.y": "0",
"InputPwd": InputPwd_self,
}
# 模拟登入login页面
response_data = session.post(url1, data=request_data)
# 会议报名页面地址
url3 = "http://yjsjyglxt.fafu.edu.cn/tbbmgl/bmx_xsbm.aspx?lasturl=/tbbmgl/xs_bmxx_xs.aspx"
# 模拟进入会议报名页面获取页面源代码
rep = session.get(url3).content.decode('utf-8')
print(rep)
k += 1
# 监测该页面是否出现可报名的会议
while re.search("若报名不参加,请于报名截止日期前一天退出报名,否则将倒扣学术活动分",
rep) is not None and (
DAY_START <= rel_time <= DAY_END or DAY_START1 <= rel_time <= DAY_END1):
# 成功爬取页面时减少一次安全重启次数
if j >= 1:
j -= 1
# 成功爬取页面时减少一次session安全建立次数
if k >= 1:
k -= 1
if b == False:
b = True
soup = etree.HTML(rep)
# print(soup)
# 此变量存着各个会议的所有信息
page = soup.xpath(
"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover]/td/a/text()")
# context_name = soup.xpath(
# "/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][2]/td[1]/text()")
# context_classtime = soup.xpath(
# "/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][2]/td[2]/text()")
# print(context_name)
# send_qqEmail("[email protected]",
# f"野生会议已出现!!!\n名字为:{context_name[0]} \n会议开始时间为:{context_classtime[0]} \n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
# break
print(page)
# 此变量负责学术报名表格行号,第一个学术会议序号从1开始
n = 1
for i in page:
if n<4 and i!='报名':
context_name = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[1]/text()")
print(f"n为:{n},i为:{i},会议名为:{context_name[0]}")
else:
print(f"n为:{n},i为:{i}")
print(f"当前时间为:{rel_time}")
# 测试时可以把报名改成详细信息,记得下面面的n-=1的if语句要注释掉
if re.search("报名", i) is not None:
# 正常情况i值为详细信息,并与n一一对应,但当有可报名会议时,报名也占一个位数,因此要减一,测试时可以注释掉
if(n>1):
n-=1
# print("1111111111111")
# 可报名会议的开会名称,类型为列表,只有一个元素
context_name = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[1]/text()")
# print(f"{str(context_name[0])}")
# 可报名会议的开会时间,类型为列表,只有一个元素
context_classtime = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[2]/text()")
# print(context_classtime)
# 可报名会议的开会地点,类型为列表,只有一个元素
context_location = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[3]/text()")
# print(context_location)
# 可报名会议的开会学分,类型为列表,只有一个元素
context_grade = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[7]/text()")
# print(context_grade)
# 剩余可报名人数
context_people_total = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[8]/text()")
# print(context_people_total[0])
context_people_now = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[9]/text()")
# print(context_people_now[0])
context_people=int(context_people_total[0])-int(context_people_now[0])
# print(context_people)
# print(f"{context_people[0]}")
# 可报名会议的开会其他说明(比如限定学院优先),类型为列表,只有一个元素
context_ortherins = soup.xpath(
f"/html/body/form/div[@id='mainframeDiv']/div[@id='divContent']/table[@border='1px']/tr[@onmouseover][{n}]/td[10]/text()")
# print(f"{str(context_ortherins[0])}")
# print(f"野生会议已出现!!!\n名字为:{str(context_name[0])}")
is_notice=mysql_monitor_lessons(str(context_name[0]),str(context_classtime[0]))
if is_notice==0:
# mysql_insert(str(context_name[0]),str(context_classtime[0]))
mysql_insert_update(str(context_name[0]),str(context_classtime[0]),str(context_location[0]))
print(n)
try:
try:
send_qqEmail(mail_list_vip,
f"野生会议已出现!!!\n名字为:{context_name[0]}\n会议时间为:{context_classtime[0]}\n会议地点为:{context_location[0]}\n会议学分为:{context_grade[0]}\n剩余可报人数为:{context_people}\n其他说明为:{context_ortherins[0]}\n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
send_qqEmail(mysql_monitor(),
f"野生会议已出现!!!\n名字为:{context_name[0]}\n会议时间为:{context_classtime[0]}\n会议地点为:{context_location[0]}\n会议学分为:{context_grade[0]}\n剩余可报人数为:{context_people}\n其他说明为:{context_ortherins[0]}\n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
except Exception:
send_qqEmail(mail_list_vip,
f"野生会议已出现!!!\n名字为:{context_name[0]}\n会议时间为:{context_classtime[0]}\n会议地点为:{context_location[0]}\n会议学分为:{context_grade[0]}\n剩余可报人数为:{context_people}\n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
send_qqEmail(mysql_monitor(),
f"野生会议已出现!!!\n名字为:{context_name[0]}\n会议时间为:{context_classtime[0]}\n会议地点为:{context_location[0]}\n会议学分为:{context_grade[0]}\n剩余可报人数为:{context_people}\n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
# print(n)
except Exception:
send_qqEmail(qq_mail, f"野生会议已出现,快枪!!! ")
if context_grade[0] > 0.1:
send_qqEmail(mysql_monitor(),
f"野生会议已出现但其学分属性较高!!!\n名字为:{context_name[0]} ")
# print(n)
else:
send_qqEmail(mysql_monitor(), f"野生会议已出现!!!\n名字为:{context_name[0]} ")
# print(n)
finally:
print("已检测到会议")
# 测试代码,可将上面发邮件的全注释掉解开下面的注释
# try:
# print(
# f"野生会议已出现!!!\n名字为:{context_name[0]}\n会议时间为:{context_classtime[0]}\n会议地点为:{context_location[0]}\n会议学分为:{context_grade[0]}\n剩余报名人数为:{context_people}\n其他说明为:{context_ortherins[0]}\n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
# send_qqEmail(qq_mail,
# f"野生会议已出现!!!\n名字为:{context_name[0]}\n会议时间为:{context_classtime[0]}\n会议地点为:{context_location[0]}\n会议学分为:{context_grade[0]}\n剩余可报人数为:{context_people}\n请点击链接登入教育管理系统捕获该会议:http://yjsjyglxt.fafu.edu.cn/login.aspx")
#
# finally:
# print("查到会议")
elif is_notice==1:
print(n)
print("无需进行添加")
if(n<10):
n+=1
# 当前时间的时和分,类型为datetime
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
if DAY_START <= rel_time <= DAY_END or DAY_START1 <= rel_time <= DAY_END1:
if conn.open:
conn.close()
print("搜索冷却cd中")
# 程序每次爬取的时间间隔
if DAY_START <= rel_time <= DAY_END:
sleep(600)
rep = session.get(url3).content.decode('utf-8')
num += 1
print("当前已进行%d次搜索,程序运行时间:%0.2f小时" % (num, num * 10 / 60))
elif DAY_START1 <= rel_time <= DAY_END1:
sleep(300)
rep = session.get(url3).content.decode('utf-8')
num += 0.5
print("当前已进行%d次搜索,程序运行时间:%0.2f小时" % (num, num * 10 / 60))
# 当前时间的时和分,类型为datetime
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
if DAY_START <= rel_time <= DAY_END or DAY_START1 <= rel_time <= DAY_END1:
print(f"Session已失效,程序重新建立session,当前session安全建立次数为{k}次,当前时间为:{rel_time}")
print("当前已进行%d次搜索,程序运行时间:%0.2f小时" % (num, num * 10 / 60))
num+=1
# send_qqEmail(qq_mail,"Session已失效,程序重新建立session,当前已进行%d次搜索,程序运行时间:%0.2f小时"% (num, num * 10 / 60))
elif current_time > DAY_END1 and b == True:
send_qqEmail(qq_mail,
f"已经下班啦,今日下班时间为{rel_time}" )
b = False
print(f"已经下班啦,,今日下班时间为{rel_time}")
# 当前时间的时和分,类型为datetime
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
print(f"session重新建立次数超过安全范围,{sleep_time / 60}分钟后重新建立session,当前时间为{rel_time}")
except Exception as e:
j += 1
# 当前时间的时和分,类型为datetime
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
# send_qqEmail(qq_mail,
# "Session建立失败,程序被迫中断,1小时后尝试自动重启,当前已进行%d次搜索,程序运行时间:%0.2f小时,当前时间为:%s,安全重启次数为:%d次" % (
# num, num * 10 / 60, str(rel_time),safe_line))
b = True
finally:
if DAY_START <= rel_time <= DAY_END:
print(
f"Session建立失败,程序被迫中断,{sleep_time / 60}分钟后尝试自动重启,当前已进行{num}次搜索,程序运行时间:{(num * 10) / 60}小时,当前时间为:{rel_time},安全重启次数为:{j}次")
sleep(sleep_time)
elif DAY_START1 <= rel_time <= DAY_END1:
send_qqEmail(qq_mail,
f"紧急情况\nSession建立失败,程序被迫中断\n{sleep_time / 6/60 }分钟后尝试自动重启\n当前已进行{num}次搜索,程序运行时间:{(num * 10) / 60}小时\n当前时间为:{rel_time}\n安全重启次数为:{j}次\n若无手动操作将于{sleep_time /6/60}分钟后,自动重建session")
sleep(sleep_time / 6)
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
send_qqEmail(qq_mail,
"安全重启次数达到上限,程序出现不可逆错误请管理员手动修复后重新运行本程序。\n程序运行时间:%0.2f小时\n当前时间为:%s" % (
num * 10 / 60, str(rel_time)))
print("session建立失败,程序结束")
except Exception:
current_time = datetime.now().time()
rel_time = time(current_time.hour, current_time.minute)
send_qqEmail(qq_mail,
"未知问题,程序出现不可逆错误请管理员手动修复后重新运行本程序。\n程序运行时间:%0.2f小时\n当前时间为:%s" % (
num * 10 / 60, str(rel_time)))