-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsel_Test.py
141 lines (91 loc) · 3.08 KB
/
sel_Test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# from selenium import webdriver
from selenium import webdriver
from time import sleep
# browser = webdriver.Safari()
from selenium import webdriver
from time import sleep
from selenium.webdriver.firefox.options import Options
from collections import deque
lt_iit = deque([])
lt_nit = deque([])
lt_iiit = deque([])
lt_queue = [lt_iit,lt_nit,lt_iiit]
global_list = []
# lt_iiit.append("arun")
# lt_nit.append("manish")
# lt_iit.append("maqsood")
link_iit = "https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=iit"
link_nit = "https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=nit"
link_iiit = "https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=iiit"
link_list= [link_iit ,link_nit,link_iiit]
list_bool = [True,True,True]
options = Options()
# options.headless = True
browser = webdriver.Firefox(options=options)
# browser.get(link_iit)
def parsing(link_index):
browser.get(link_list[link_index])
#run scrape code and fill the list by appending list_queue[list_index]
# def get_list():
try:
if browser.find_element_by_class_name('gs_btnPR'):
browser.find_element_by_class_name('gs_btnPR').click()
sleep(1)
except NoSuchElementException:
list_bool[list_index] = False
link_list[link_index] = browser.current_url
#code to fill global_list
def fill_global_list():
if global_list>0:
return global_list
while len(global_list)<10:
if len(lt_queue[0])>0:
iiit = lt_queue[0].popleft()
global_list.append(iiit)
if len(lt_queue[1])>0:
nit = lt_queue[1].popleft()
global_list.append(nit)
if len(lt_queue[2])>0:
iit = lt_queue[2].popleft()
global_list.append(iit)
if len(lt_queue[0])<=0 and len(lt_queue[1])<=0 and len(lt_queue[2])<=0:
break
if len(lt_queue[2])<=0:
if list_bool[2]==True:
parsing(2)
if len(lt_queue[0])<=0:
if list_bool[0]==True:
parsing(0)
if len(lt_queue[1])<=0:
if list_bool[1]==True:
parsing(1)
if len(global_list)<=0:
fill_global_list()
if True not in list_bool and (len(lt_queue[0])<=0 and len(lt_queue[1])<=0 and len(lt_queue[2])<=0):
return None
return global_list
print(global_list)
browser.quit()
# from time import sleep
# from selenium.webdriver.firefox.options import Options
# from selenium.common.exceptions import NoSuchElementException
# options = Options()
# # options.headless = True
# # browser = webdriver.Safari()
# # op = webdriver.ChromeOptions()
# # op.add_argument('headless')
# link = "https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=iit"
# browser = webdriver.Firefox(options=options)
# browser.get(link)
# # browser.maximize_window()
# # sample = "deep learning"
# # browser.find_element_by_id("keywords").send_keys(sample)
# while True:
# try:
# if browser.find_element_by_class_name('gs_btnPR'):
# browser.find_element_by_class_name('gs_btnPR').click()
# sleep(1)
# except NoSuchElementException:
# break
# # finally:
# browser.quit()