-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scrapper.py
97 lines (87 loc) · 4.54 KB
/
Scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import sys
import datetime
import pendulum
from SysUtils import SysUtils
class Scrapper:
iAmUrWaiter = SysUtils()
### Load Page ###
def loadPage(self, browser, url, idToFind):
browser.get(url)
try:
wait = WebDriverWait(browser, 15)
condition = EC.presence_of_element_located((By.ID, idToFind))
wait.until(condition)
except TimeoutException as e:
print(e)
print("Can't load page with url : " + url + "\n")
self.iAmUrWaiter.cleanUp(browser)
sys.exit()
### Go to class's planning ###
def goToPlanning(self, browser, date):
url = 'https://edtmobiliteng.wigorservices.net//WebPsDyn.aspx?action=posEDTBEECOME&serverid=C&Tel=' + sys.argv[1] + '.' + sys.argv[2] + '&date=' + date
idToFind = 'DivBody'
self.loadPage(browser, url, idToFind)
### Get course link ###
def getLastLinkTeams(self, browser):
try:
links = browser.find_elements(By.CSS_SELECTOR, "a[href^='https://edtmobiliteng.wigorservices.net']")
if(len(links)>0):
# To get the Principal Link Teams
crossLink = links[len(links) - 7]
# Get parent element to retrieve course name
parentElement = crossLink.find_element(By.XPATH, "./../..")
courseNameElement = self.getCourseName(parentElement.get_attribute("innerHTML"))
# Go on the link in planning, to acces to the real link of Teams
url = crossLink.get_attribute('href')
idToFind = 'container'
self.loadPage(browser, url, idToFind)
urlTeams = browser.current_url
# If course not already sent/saved, we gonna save the link
if(not self.iAmUrWaiter.isAlreadySent(urlTeams)):
# Display the alert box to open Teams directly
urlTeams = urlTeams.replace('&suppressPrompt=true', '&suppressPrompt=false')
self.iAmUrWaiter.saveLink(urlTeams)
# If is not distancing we dont return anything
isDistancing = True if self.isDistancingCourse(courseNameElement) else False
courseNameElement = courseNameElement.replace("DISTANCIEL", "").replace("&", "&")
print("courseNameElement ", courseNameElement)
urlMLB, courseName = self.iAmUrWaiter.getLinkMLB(courseNameElement)
print("courseName ", courseName)
if isDistancing is False: return None, urlMLB, courseName, None
print("Course : " + courseNameElement)
print("We got a new link : " + urlTeams)
return urlTeams, urlMLB, courseName, None
else:
print("We already have this link : " + urlTeams)
except Exception as e:
print("An error occured, method : getLastLinkTeams() -> ", e, "\n")
return None, None, None, None
return None, None, None, True
### Take a screenshot of the planning ###
def takeScreenshot(self, browser, date):
nameScreenshot = "./" + sys.argv[4] + "/edt_" + date + ".png"
browser.save_screenshot(nameScreenshot)
print("Screenshot " + nameScreenshot + " saved.\n")
return nameScreenshot
### Get the course name ###
def getCourseName(self, stringElement):
print(stringElement.split('<div class="Presence">')[0] + stringElement.split('</div>')[-1])
return stringElement.split('<div class="Presence">')[0] + stringElement.split('</div>')[-1]
### Get the course name ###
def isDistancingCourse(self, courseName):
return True if "DISTANCIEL" in courseName else False
### Are we in a school or entreprise period next week ? ###
def isSchoolPeriodNextWeek(self, browser):
next_week = pendulum.now().next(pendulum.MONDAY).strftime('%m-%d-%Y')
self.goToPlanning(browser, next_week)
isNotSchool = browser.find_elements(By.XPATH, "//*[contains(text(), 'Pas de cours cette semaine')]")
if(isNotSchool):
self.takeScreenshot(browser, next_week)
return False, next_week
return True, None