-
-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: 🐛 Hotfix. Change selectors due to new html structure #905
Changes from all commits
91c7b17
e3b861c
b64eb03
69e45b6
b8d7ffa
3a9e5b7
8c10dcd
ed337c7
6fc96ed
be54105
72027a8
6280296
306fca6
1c1b7f3
fd69fc0
c5955bd
0330af9
e18e2e6
e038183
1f68c27
a349a4a
48481b5
f21530d
eb3e74a
218af31
3772d6b
51fe7eb
86ef242
101a42e
a2bfb04
d7aeabf
2dd187a
76b06ae
dba7b10
170c413
8411cf8
22f2c3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -244,7 +244,6 @@ def start_applying(self): | |
def get_jobs_from_page(self, scroll=False): | ||
|
||
try: | ||
|
||
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand') | ||
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower(): | ||
logger.debug("No matching jobs found on this page, skipping.") | ||
|
@@ -255,22 +254,23 @@ def get_jobs_from_page(self, scroll=False): | |
|
||
try: | ||
# XPath query to find the ul tag with class scaffold-layout__list-container | ||
job_results_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]" | ||
job_results = self.driver.find_element(By.XPATH, job_results_xpath_query) | ||
jobs_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]" | ||
jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query) | ||
|
||
if scroll: | ||
job_results_scrolableElament = job_results.find_element(By.XPATH,"..") | ||
logger.warning(f'is scrollable: {browser_utils.is_scrollable(job_results_scrolableElament)}') | ||
jobs_container_scrolableElement = jobs_container.find_element(By.XPATH,"..") | ||
surapuramakhil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger.warning(f'is scrollable: {browser_utils.is_scrollable(jobs_container_scrolableElement)}') | ||
|
||
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement) | ||
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True) | ||
|
||
browser_utils.scroll_slow(self.driver, job_results_scrolableElament) | ||
browser_utils.scroll_slow(self.driver, job_results_scrolableElament, step=300, reverse=True) | ||
job_element_list = jobs_container.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say that rely on "ember-view" instead of "data-job-id" more likely to cause problems again so I would revert this change |
||
|
||
job_list_elements = job_results.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]") | ||
if not job_list_elements: | ||
if not job_element_list: | ||
logger.debug("No job class elements found on page, skipping.") | ||
return [] | ||
|
||
return job_list_elements | ||
return job_element_list | ||
|
||
except NoSuchElementException as e: | ||
logger.warning(f'No job results found on the page. \n expection: {traceback.format_exc()}') | ||
|
@@ -281,20 +281,9 @@ def get_jobs_from_page(self, scroll=False): | |
return [] | ||
|
||
def read_jobs(self): | ||
try: | ||
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand') | ||
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower(): | ||
raise Exception("No more jobs on this page") | ||
except NoSuchElementException: | ||
pass | ||
|
||
job_results = self.driver.find_element(By.CLASS_NAME, "jobs-search-results-list") | ||
browser_utils.scroll_slow(self.driver, job_results) | ||
browser_utils.scroll_slow(self.driver, job_results, step=300, reverse=True) | ||
job_list_elements = self.driver.find_elements(By.CLASS_NAME, 'scaffold-layout__list-container')[0].find_elements(By.CLASS_NAME, 'jobs-search-results__list-item') | ||
if not job_list_elements: | ||
raise Exception("No job class elements found on page") | ||
job_list = [self.job_tile_to_job(job_element) for job_element in job_list_elements] | ||
|
||
job_element_list = self.get_jobs_from_page() | ||
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list] | ||
for job in job_list: | ||
if self.is_blacklisted(job.title, job.company, job.link, job.location): | ||
logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...") | ||
|
@@ -307,21 +296,9 @@ def read_jobs(self): | |
continue | ||
|
||
def apply_jobs(self): | ||
try: | ||
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand') | ||
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower(): | ||
logger.debug("No matching jobs found on this page, skipping") | ||
return | ||
except NoSuchElementException: | ||
pass | ||
|
||
job_list_elements = self.get_jobs_from_page() | ||
job_element_list = self.get_jobs_from_page() | ||
|
||
if not job_list_elements: | ||
logger.debug("No job class elements found on page, skipping") | ||
return | ||
|
||
job_list = [self.job_tile_to_job(job_element) for job_element in job_list_elements] | ||
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list] | ||
|
||
for job in job_list: | ||
|
||
|
@@ -494,7 +471,7 @@ def job_tile_to_job(self, job_tile) -> Job: | |
logger.debug(f"Job link extracted: {job.link}") | ||
except NoSuchElementException: | ||
logger.warning("Job link is missing.") | ||
|
||
try: | ||
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not robust. Also advise to revert |
||
logger.debug(f"Job company extracted: {job.company}") | ||
|
@@ -517,11 +494,17 @@ def job_tile_to_job(self, job_tile) -> Job: | |
except NoSuchElementException: | ||
logger.warning("Job location is missing.") | ||
|
||
|
||
try: | ||
job.apply_method = job_tile.find_element(By.CLASS_NAME, 'job-card-container__apply-method').text | ||
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]").text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't work. Return empty array all of the time. Revert |
||
except NoSuchElementException as e: | ||
job.apply_method = "Applied" | ||
logger.warning(f'Apply method not found, assuming \'Applied\'. {e} {traceback.format_exc()}') | ||
try: | ||
# Fetching state when apply method is not found | ||
job_state = job_tile.find_element(By.XPATH, ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]").text | ||
job.apply_method = "Applied" | ||
logger.warning(f'Apply method not found, state {job_state}. {e} {traceback.format_exc()}') | ||
except NoSuchElementException as e: | ||
logger.warning(f'Apply method and state not found. {e} {traceback.format_exc()}') | ||
|
||
return job | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They unlikely will keep this class with not ul tag so both approaches are equal