forked from msarnacki/flashscore-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
h2h.py
89 lines (64 loc) · 2.41 KB
/
h2h.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import time
from bs4 import BeautifulSoup
from selenium import webdriver
#function getting h2h matches info
def get_matches_info(matches, how_many):
for i, match in enumerate(matches):
# breaks a loop when how_many matches are scraped
if i == how_many:
break
#getting names
teams = match.find_all(class_ = 'name')
team1 = teams[0].text
team2 = teams[1].text
#getting score
score = match.find(class_ = 'score').text
print(str(i) + '. ' + team1 + ' ' + score + ' ' + team2)
driver = webdriver.Chrome()
#driver = webdriver.Firefox()
#driver.fullscreen_window()
#driver.maximize_window()
#example scheduled match info from first round of next Premier League season
url_h2h = 'https://www.flashscore.com/match/j9iuZEuo/#h2h;overall'
driver = webdriver.Chrome()
driver.get(url_h2h)
time.sleep(3)
#closing cookies
try:
button_cookies = driver.find_element_by_xpath("//button[@id='onetrust-accept-btn-handler']")
button_cookies.click()
except:
print("cookies already closed")
#get source code of page
page = driver.page_source
soup = BeautifulSoup(page, 'html.parser')
### CLICKING 4 TIMES
#this part is not necessary because even matches that are not shown shown are in the source code
for i in range(2):
#gets list of elements with arrows (arrows are always with "Show more matches")
show_more = driver.find_elements_by_class_name('arrow')
#click first more and wait a second
show_more[0].click()
time.sleep(1)
#click second more and wait a second
show_more[1].click()
time.sleep(1)
###
page = driver.page_source
soup = BeautifulSoup(page, 'html.parser')
#this line takes only matches from {Overall} section in H2H and not from {HOMETEAM - Home} or {AWAYTEAM - Away}
overall = soup.find(id = 'tab-h2h-overall')
#get lists of matches
# find chooses table
# find_all gets every row with matches from that table
home_matches = overall.find(class_ = 'h2h_home').find_all(class_ = 'highlight')
away_matches = overall.find(class_ = 'h2h_away').find_all(class_ = 'highlight')
mutual_matches = overall.find(class_ = 'h2h_mutual').find_all(class_ = 'highlight')
#print(matches)
print("Home team last matches")
get_matches_info(home_matches, 15)
print("Away team last matches")
get_matches_info(away_matches, 15)
print("VS each other last matches")
get_matches_info(mutual_matches, 5)
driver.quit()