Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP solve amazon captcha with 2captcha
Browse files Browse the repository at this point in the history
Use coordinate method
Only search for relevant info on page
Meatplay authored and codders committed Dec 17, 2024
1 parent 44a8037 commit 19caf9e
Showing 9 changed files with 104 additions and 14 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -35,6 +35,7 @@ gunicorn = "23.0.0"
flask-api = {editable = true, ref = "develop", git = "git+https://github.com/flask-api/flask-api.git"}
setuptools = "==75.6.0"
certifi = "==2024.12.14"
2captcha-python = "*"

[dev-packages]
exceptiongroup = "*"
19 changes: 14 additions & 5 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 62 additions & 1 deletion flathunter/abstract_crawler.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,9 @@
from typing import Optional, Any
import json

from io import BytesIO
import base64

import backoff
import requests
# pylint: disable=unused-import
@@ -13,10 +16,11 @@
from bs4 import BeautifulSoup

from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver import Chrome
from selenium.webdriver import Chrome, Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains

from flathunter import proxies
from flathunter.captcha.captcha_solver import CaptchaUnsolvableError
@@ -196,6 +200,7 @@ def resolve_geetest(self, driver):
driver.refresh()
raise

# pylint: disable=too-many-locals
@backoff.on_exception(wait_gen=backoff.constant,
exception=CaptchaUnsolvableError,
max_tries=3)
@@ -268,6 +273,62 @@ def log_filter(log_):
driver.refresh()
raise

@backoff.on_exception(wait_gen=backoff.constant,
exception=CaptchaUnsolvableError,
max_tries=3)
def resolve_amazon(self, driver):
"""Resolve Amazon Captcha"""
try:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(3)
shadowelement = driver.execute_script(
"return document.querySelector('awswaf-captcha').shadowRoot"
)
my_img = shadowelement.find_element(By.ID, "root")
size = my_img.size
select_l = my_img.find_element(By.TAG_NAME, "select")
select_l.click()
sleep(1)
select_l.send_keys(Keys.DOWN)
sleep(3)
shadowelement = driver.execute_script(
"return document.querySelector('awswaf-captcha').shadowRoot"
)
my_img = shadowelement.find_element(By.ID, "root")
screenshot = my_img.screenshot_as_png
screenshot_bytes = BytesIO(screenshot)
base64_screenshot = base64.b64encode(screenshot_bytes.getvalue()).decode('utf-8')
# Send image in 2captcha service
result = self.captcha_solver.solve_amazon(base64_screenshot)
logger.info(result.token)
l = result.token.split(':')[1].split(';')
l = [[int(val.split('=')[1]) for val in coord.split(',')] for coord in l]
button_coord = [size['width'] - 30, size['height'] - 30]
l.append(button_coord)
actions = ActionChains(driver)
for i in l:
actions.move_to_element_with_offset(my_img, i[0] - 160, i[1] - 211).click()
actions.perform()
sleep(0.5)
actions.reset_actions()
sleep(1)
try:
confirm_button = my_img.find_element(By.ID, "amzn-btn-verify-internal")
actions.move_to_element_with_offset(confirm_button, 40, 15).click()
actions.perform()
sleep(4)
except NoSuchElementException:
pass
try:
driver.find_element(By.TAG_NAME, "awswaf-captcha")
except NoSuchElementException:
logger.info("Captcha solved")
else:
raise CaptchaUnsolvableError()
except Exception as ex:
driver.refresh()
raise CaptchaUnsolvableError() from ex

@backoff.on_exception(wait_gen=backoff.constant,
exception=CaptchaUnsolvableError,
max_tries=3)
5 changes: 2 additions & 3 deletions flathunter/captcha/capmonster_solver.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Captcha solver for CapMonster Captcha Solving Service (https://capmonster.cloud)"""
import json
from typing import Dict
from time import sleep
import backoff
@@ -8,8 +7,6 @@
from flathunter.logging import logger
from flathunter.captcha.captcha_solver import (
CaptchaSolver,
CaptchaBalanceEmpty,
CaptchaUnsolvableError,
GeetestResponse,
AwsAwfResponse,
RecaptchaResponse,
@@ -26,6 +23,8 @@ def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaRespo
"""Should be implemented in subclass"""
raise NotImplementedError("Recaptcha captcha solving is not implemented for Capmonster")

# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
def solve_awswaf(
self,
sitekey: str,
3 changes: 2 additions & 1 deletion flathunter/captcha/captcha_solver.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,6 @@ class AwsAwfResponse:
"""Response from AWS WAF"""
token: str


class CaptchaSolver:
"""Interface for Captcha solvers"""

@@ -39,6 +38,8 @@ def solve_geetest(self, geetest: str, challenge: str, page_url: str) -> GeetestR
"""Should be implemented in subclass"""
raise NotImplementedError()

# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
def solve_awswaf(
self,
sitekey: str,
2 changes: 2 additions & 0 deletions flathunter/captcha/imagetyperz_solver.py
Original file line number Diff line number Diff line change
@@ -59,6 +59,8 @@ def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaRespo
)
return RecaptchaResponse(self.__retrieve_imagetyperz_result(captcha_id))

# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
def solve_awswaf(
self,
sitekey: str,
18 changes: 16 additions & 2 deletions flathunter/captcha/twocaptcha_solver.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
from time import sleep
import backoff
import requests
from twocaptcha import TwoCaptcha

from flathunter.logging import logger
from flathunter.captcha.captcha_solver import (
@@ -47,6 +48,8 @@ def solve_recaptcha(self, google_site_key: str, page_url: str) -> RecaptchaRespo
captcha_id = self.__submit_2captcha_request(params)
return RecaptchaResponse(self.__retrieve_2captcha_result(captcha_id))

# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
def solve_awswaf(
self,
sitekey: str,
@@ -56,8 +59,19 @@ def solve_awswaf(
captcha_script: str,
page_url: str
) -> AwsAwfResponse:
"""Should be implemented at some point"""
raise NotImplementedError("AWS WAF captchas not supported for 2Captcha")
"""Using the `solve_amazon` method instead"""
raise NotImplementedError()

def solve_amazon(
self,
image_b64: str
) -> AwsAwfResponse:
"""Solve AWS WAF by processing an image"""
solver = TwoCaptcha(self.api_key, defaultTimeout=60, pollingInterval=5)
result = solver.coordinates(image_b64, lang='en')
if result is None:
raise CaptchaUnsolvableError("Got None from 2captcha solve")
return AwsAwfResponse(result["code"])

@backoff.on_exception(**CaptchaSolver.backoff_options)
def __submit_2captcha_request(self, params: Dict[str, str]) -> str:
5 changes: 3 additions & 2 deletions flathunter/crawler/immobilienscout.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,6 @@
from flathunter.abstract_crawler import Crawler
from flathunter.logging import logger
from flathunter.chrome_wrapper import get_chrome_driver
from flathunter.captcha.twocaptcha_solver import TwoCaptchaSolver
from flathunter.exceptions import DriverLoadException

STATIC_URL_PATTERN = re.compile(r'https://www\.immobilienscout24\.de')
@@ -35,7 +34,7 @@ class Immobilienscout(Crawler):

URL_PATTERN = STATIC_URL_PATTERN

JSON_PATH_PARSER_ENTRIES = parse("$..['resultlist.realEstate']")
JSON_PATH_PARSER_ENTRIES = parse("$..['resultlistEntries']..['resultlist.realEstate']")
JSON_PATH_PARSER_IMAGES = parse("$..galleryAttachments"
"..attachment[?'@xsi.type'=='common:Picture']"
"..['@href'].`sub(/(.*\\\\.jpe?g).*/, \\\\1)`")
@@ -117,6 +116,8 @@ def get_results(self, search_url, max_pages=None):

def get_entries_from_javascript(self):
"""Get entries from JavaScript"""
if "Warum haben wir deine Anfrage blockiert?" in self.get_driver_force().page_source:
self.resolve_amazon(self.get_driver_force())
try:
result_json = self.get_driver_force().execute_script('return window.IS24.resultList;')
except JavascriptException:
2 changes: 2 additions & 0 deletions flathunter/gmaps_duration_processor.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,8 @@ def process_expose(self, expose):

def get_formatted_durations(self, address):
"""Return a formatted list of GoogleMaps durations"""
if address is None:
return ""
out = ""
for duration in self.config.get('durations', []):
if 'destination' in duration and 'name' in duration:

0 comments on commit 19caf9e

Please sign in to comment.