Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: 🐛 Hotfix. Change selectors due to new html structure #905

Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
91c7b17
Added integration with aiml
waterstark Nov 5, 2024
e3b861c
Update README.md
surapuramakhil Nov 11, 2024
b64eb03
Add GroqAIModel support
innovatorved Nov 12, 2024
69e45b6
Merge pull request #824 from AIHawk-co/release/v11.15.2024
surapuramakhil Nov 12, 2024
b8d7ffa
README.md updated for GROQ API
innovatorved Nov 13, 2024
3a9e5b7
Update llm_manager.py
surapuramakhil Nov 13, 2024
8c10dcd
Resolve conflicts
waterstark Nov 13, 2024
ed337c7
Merge branch 'release/v11.15.2024' into integration-with-aiml
surapuramakhil Nov 13, 2024
6fc96ed
Merge branch 'release/v3.1.0' into integration-with-aiml
surapuramakhil Nov 13, 2024
be54105
Merge pull request #757 from waterstark/integration-with-aiml
surapuramakhil Nov 13, 2024
72027a8
Merge remote-tracking branch 'upstream/release/v3.1.0'
innovatorved Nov 14, 2024
6280296
groq added in constants
innovatorved Nov 14, 2024
306fca6
just groq version added in requirements
innovatorved Nov 14, 2024
1c1b7f3
Merge pull request #822 from innovatorved/main
surapuramakhil Nov 14, 2024
fd69fc0
perf: :zap: Optimize prompt for better caching
chakaponi Nov 16, 2024
c5955bd
Add AI/ML API info
OctavianTheI Nov 18, 2024
0330af9
Merge pull request #882 from OctavianTheI/patch-1
surapuramakhil Nov 18, 2024
e18e2e6
Merge branch 'release/v4.1.0' into fix/improve-prompts-caching
surapuramakhil Nov 18, 2024
e038183
Merge pull request #872 from chakaponi/fix/improve-prompts-caching
surapuramakhil Nov 18, 2024
1f68c27
"Log time in Job application result"
NamamiShanker Nov 21, 2024
a349a4a
"Refactor logging current time"
NamamiShanker Nov 21, 2024
48481b5
Add files via upload
Tgenz1213 Nov 21, 2024
f21530d
Update pull_request_template.md
Tgenz1213 Nov 21, 2024
eb3e74a
Merge pull request #910 from Tgenz1213/PR_Template
surapuramakhil Nov 22, 2024
218af31
Merge branch 'release/v4.1.0' into feature/log_time
surapuramakhil Nov 22, 2024
3772d6b
Merge pull request #907 from NamamiShanker/feature/log_time
surapuramakhil Nov 22, 2024
51fe7eb
Merge pull request #913 from AIHawk-FOSS/main
surapuramakhil Nov 22, 2024
86ef242
fix: :bug: Change selectors due to new html structure
chakaponi Nov 23, 2024
101a42e
Merge branch 'hotfix/v2024.11.23' into bug/incorrect-element-selectors
surapuramakhil Nov 23, 2024
a2bfb04
test: :white_check_mark: Fix test after removing other developer's hack
chakaponi Nov 23, 2024
d7aeabf
Merge branch 'bug/incorrect-element-selectors' of https://github.com/…
chakaponi Nov 23, 2024
2dd187a
fix: Add classes for temporary solution
chakaponi Nov 24, 2024
76b06ae
Merge branch 'bug/incorrect-element-selectors' into hotfix/v2024.11.23
chakaponi Nov 24, 2024
dba7b10
Rewrite test, merge solution with changes, deduplicate code
chakaponi Nov 24, 2024
170c413
Merge branch 'hotfix/v2024.11.23' into bug/incorrect-element-selectors
chakaponi Nov 24, 2024
8411cf8
style: :art: Fix naming back
chakaponi Nov 25, 2024
22f2c3b
reveiw changes
surapuramakhil Nov 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 23 additions & 36 deletions src/ai_hawk/job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,6 @@ def start_applying(self):
def get_jobs_from_page(self, scroll=False):

try:

no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
logger.debug("No matching jobs found on this page, skipping.")
Expand All @@ -254,23 +253,22 @@ def get_jobs_from_page(self, scroll=False):
pass

try:
# XPath query to find the ul tag with class scaffold-layout__list-container
job_results_xpath_query = "//ul[contains(@class, 'scaffold-layout__list-container')]"
job_results = self.driver.find_element(By.XPATH, job_results_xpath_query)
jobs_container = self.driver.find_element(By.CLASS_NAME, 'scaffold-layout__list-container')
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved

if scroll:
job_results_scrolableElament = job_results.find_element(By.XPATH,"..")
logger.warning(f'is scrollable: {browser_utils.is_scrollable(job_results_scrolableElament)}')
jobs_container_scrolableElement = jobs_container.find_element(By.XPATH,"..")
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved
logger.warning(f'is scrollable: {browser_utils.is_scrollable(jobs_container_scrolableElement)}')

browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement)
browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement, step=300, reverse=True)

browser_utils.scroll_slow(self.driver, job_results_scrolableElament)
browser_utils.scroll_slow(self.driver, job_results_scrolableElament, step=300, reverse=True)
job_element_list = jobs_container.find_elements(By.CSS_SELECTOR, 'div[data-job-id]')
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved

job_list_elements = job_results.find_elements(By.XPATH, ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]")
if not job_list_elements:
if not job_element_list:
logger.debug("No job class elements found on page, skipping.")
return []

return job_list_elements
return job_element_list

except NoSuchElementException as e:
logger.warning(f'No job results found on the page. \n expection: {traceback.format_exc()}')
Expand All @@ -288,13 +286,14 @@ def read_jobs(self):
except NoSuchElementException:
pass

job_results = self.driver.find_element(By.CLASS_NAME, "jobs-search-results-list")
browser_utils.scroll_slow(self.driver, job_results)
browser_utils.scroll_slow(self.driver, job_results, step=300, reverse=True)
job_list_elements = self.driver.find_elements(By.CLASS_NAME, 'scaffold-layout__list-container')[0].find_elements(By.CLASS_NAME, 'jobs-search-results__list-item')
if not job_list_elements:
raise Exception("No job class elements found on page")
job_list = [self.job_tile_to_job(job_element) for job_element in job_list_elements]
jobs_container = self.driver.find_element(By.CLASS_NAME, 'scaffold-layout__list-container')
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved
browser_utils.scroll_slow(self.driver, jobs_container)
browser_utils.scroll_slow(self.driver, jobs_container, step=300, reverse=True)

job_element_list = jobs_container.find_elements(By.CSS_SELECTOR, 'div[data-job-id]')
if not job_element_list:
raise Exception("No job elements found on page")
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]
for job in job_list:
if self.is_blacklisted(job.title, job.company, job.link, job.location):
logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...")
Expand All @@ -307,21 +306,9 @@ def read_jobs(self):
continue

def apply_jobs(self):
try:
no_jobs_element = self.driver.find_element(By.CLASS_NAME, 'jobs-search-two-pane__no-results-banner--expand')
if 'No matching jobs found' in no_jobs_element.text or 'unfortunately, things aren' in self.driver.page_source.lower():
logger.debug("No matching jobs found on this page, skipping")
return
except NoSuchElementException:
pass

job_list_elements = self.get_jobs_from_page()

if not job_list_elements:
logger.debug("No job class elements found on page, skipping")
return
job_element_list = self.get_jobs_from_page()

job_list = [self.job_tile_to_job(job_element) for job_element in job_list_elements]
job_list = [self.job_tile_to_job(job_element) for job_element in job_element_list]

for job in job_list:

Expand Down Expand Up @@ -494,9 +481,9 @@ def job_tile_to_job(self, job_tile) -> Job:
logger.debug(f"Job link extracted: {job.link}")
except NoSuchElementException:
logger.warning("Job link is missing.")

try:
job.company = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span").text
job.company = job_tile.find_element(By.XPATH, './/span[contains(normalize-space(), " · ")]').text.split(' · ')[0].strip()
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved
logger.debug(f"Job company extracted: {job.company}")
except NoSuchElementException as e:
logger.warning(f'Job company is missing. {e} {traceback.format_exc()}')
Expand All @@ -513,12 +500,12 @@ def job_tile_to_job(self, job_tile) -> Job:
logger.warning(f"Failed to extract job ID: {e}", exc_info=True)

try:
job.location = job_tile.find_element(By.CLASS_NAME, 'job-card-container__metadata-item').text
job.location = job_tile.find_element(By.XPATH, './/span[contains(normalize-space(), " · ")]').text.split(' · ')[-1].strip()
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved
except NoSuchElementException:
logger.warning("Job location is missing.")

try:
job.apply_method = job_tile.find_element(By.CLASS_NAME, 'job-card-container__apply-method').text
job.apply_method = job_tile.find_element(By.XPATH, ".//div[contains(@class, 'job-card-container__job-insight-text') and normalize-space() = 'Easy Apply']").text
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved
except NoSuchElementException as e:
job.apply_method = "Applied"
logger.warning(f'Apply method not found, assuming \'Applied\'. {e} {traceback.format_exc()}')
Expand Down
4 changes: 2 additions & 2 deletions src/ai_hawk/linkedIn_easy_applier.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,8 @@ def fill_up(self, job_context : JobContext) -> None:
EC.presence_of_element_located((By.CLASS_NAME, 'jobs-easy-apply-content'))
)

pb4_elements = easy_apply_content.find_elements(By.CLASS_NAME, 'pb4')
for element in pb4_elements:
input_elements = easy_apply_content.find_elements(By.CLASS_NAME, 'jobs-easy-apply-form-section__grouping')
for element in input_elements:
self._process_form_element(element, job_context)
except Exception as e:
logger.error(f"Failed to find form elements: {e}")
Expand Down
54 changes: 25 additions & 29 deletions tests/test_aihawk_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,33 @@ def test_get_jobs_from_page_no_jobs(mocker, job_manager):

def test_get_jobs_from_page_with_jobs(mocker, job_manager):
"""Test get_jobs_from_page when job elements are found."""
# Mock the no_jobs_element to behave correctly
mock_no_jobs_element = mocker.Mock()
mock_no_jobs_element.text = "No matching jobs found"
# Mock no_jobs_element to simulate the absence of "No matching jobs found" banner
no_jobs_element = mocker.Mock()
surapuramakhil marked this conversation as resolved.
Show resolved Hide resolved
no_jobs_element.text = "" # Empty text means "No matching jobs found" is not present

# Mocking the find_element to return the mock no_jobs_element
mocker.patch.object(job_manager.driver, 'find_element',
return_value=mock_no_jobs_element)
# Mock the driver to simulate the page source
mocker.patch.object(job_manager.driver, 'page_source', return_value="")

# Mock the page_source
mocker.patch.object(job_manager.driver, 'page_source',
return_value="some page content")
# Mock the outer find_element
container_mock = mocker.Mock()

# Ensure jobs are returned as empty list due to "No matching jobs found"
jobs = job_manager.get_jobs_from_page()
assert jobs == [] # No jobs expected due to "No matching jobs found"
# Mock the inner find_elements to return job list items
job_element_mock = mocker.Mock()
# Simulating two job items
job_elements_list = [job_element_mock, job_element_mock]

# Return the container mock, which itself returns the job elements list
container_mock.find_elements.return_value = job_elements_list
mocker.patch.object(job_manager.driver, 'find_element', side_effect=[
no_jobs_element,
container_mock
])

job_manager.get_jobs_from_page()

assert job_manager.driver.find_element.call_count == 2
assert container_mock.find_elements.call_count == 1



def test_apply_jobs_with_no_jobs(mocker, job_manager):
Expand All @@ -94,9 +106,6 @@ def test_apply_jobs_with_no_jobs(mocker, job_manager):
mock_element = mocker.Mock()
mock_element.text = "No matching jobs found"

# Mock the driver to simulate the page source
mocker.patch.object(job_manager.driver, 'page_source', return_value="")

# Mock the driver to return the mock element when find_element is called
mocker.patch.object(job_manager.driver, 'find_element',
return_value=mock_element)
Expand All @@ -111,26 +120,13 @@ def test_apply_jobs_with_no_jobs(mocker, job_manager):
def test_apply_jobs_with_jobs(mocker, job_manager):
"""Test apply_jobs when jobs are present."""

# Mock no_jobs_element to simulate the absence of "No matching jobs found" banner
no_jobs_element = mocker.Mock()
no_jobs_element.text = "" # Empty text means "No matching jobs found" is not present
mocker.patch.object(job_manager.driver, 'find_element',
return_value=no_jobs_element)

# Mock the page_source to simulate what the page looks like when jobs are present
mocker.patch.object(job_manager.driver, 'page_source',
return_value="some job content")

# Mock the outer find_elements (scaffold-layout__list-container)
container_mock = mocker.Mock()

# Mock the inner find_elements to return job list items
# Simulating two job elements
job_element_mock = mocker.Mock()
# Simulating two job items
job_elements_list = [job_element_mock, job_element_mock]

mocker.patch.object(job_manager.driver, 'find_elements',
return_value=[container_mock])

mocker.patch.object(job_manager, 'get_jobs_from_page', return_value=job_elements_list)

Expand Down
Loading