Skip to content

Commit

Permalink
Merge pull request #20 from ThePhaseless/seleniumbase
Browse files Browse the repository at this point in the history
Seleniumbase
  • Loading branch information
ThePhaseless authored Nov 24, 2024
2 parents 462e73a + c4cb6e0 commit f141fc3
Show file tree
Hide file tree
Showing 16 changed files with 1,091 additions and 558 deletions.
26 changes: 2 additions & 24 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,5 @@
FROM python:3.12

# Inspired by https://github.com/Hudrolax/uc-docker-alpine/
RUN apt update && apt upgrade -y && apt install -y chromium xvfb

# Install build dependencies
RUN apt update && apt upgrade -y && apt install -y\
curl \
wget \
unzip \
gnupg \
bash \
stow

# Install dependencies
RUN apt install -y \
xvfb \
x11vnc \
fluxbox \
xterm \
git \
ca-certificates \
pipx \
chromium

RUN pipx install poetry
ENV DISPLAY=:0
# RUN poetry config virtualenvs.in-project true
RUN curl -sSL https://install.python-poetry.org | python3 -
18 changes: 8 additions & 10 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"build": {
"dockerfile": "Dockerfile"
},
"runArgs": [
"-p",
"8181:8191",
"--cap-add",
"SYS_ADMIN"
],
"customizations": {
"vscode": {
"extensions": [
Expand All @@ -27,13 +20,18 @@
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
"forwardPorts": [
5900
]
// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip3 install --user -r requirements.txt",
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
"name": "Byparr Dev Container",
"runArgs": [
"-p",
"8181:8191"
],
"features": {
"ghcr.io/devcontainers-extra/features/act:1": {}
}
}
46 changes: 37 additions & 9 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,43 @@ env:
IMAGE_NAME: ${{ github.repository }}

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Set up Poetry
run: pip install poetry

- name: Setup a local virtual environment (if no poetry.toml file)
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
- uses: actions/cache@v4
name: Define a cache for the virtual environment based on the dependencies lock file
with:
path: ./.venv
key: venv-${{ hashFiles('poetry.lock') }}

- name: Install dependencies
run: |
poetry install
sudo apt update
sudo apt install -y xvfb scrot python3-tk
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
sudo apt install -y ./google-chrome-stable_current_amd64.deb
rm ./google-chrome-stable_current_amd64.deb
- name: Run tests
run: poetry run pytest

build:
needs: test
runs-on: ubuntu-latest
permissions:
contents: read
Expand All @@ -31,14 +67,6 @@ jobs:
# with sigstore/fulcio when running outside of PRs.
id-token: write

strategy:
fail-fast: false
matrix:
platform:
- linux/amd64
- linux/arm64
- linux/arm64/v8

steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand Down Expand Up @@ -92,7 +120,7 @@ jobs:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: ${{ matrix.platform }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: GITHUB_BUILD=true
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,10 @@ cython_debug/
#.idea/

.extentions/
core
core

# Screenshots
*.png

# Downloaded files
downloaded_files/
52 changes: 10 additions & 42 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,45 +1,11 @@
FROM python:3.12-alpine
FROM python:3.12-slim

# Inspired by https://github.com/Hudrolax/uc-docker-alpine/

ARG GITHUB_BUILD=false
ENV GITHUB_BUILD=${GITHUB_BUILD}

# Install build dependencies
RUN apk update && apk upgrade && \
apk add --no-cache --virtual .build-deps \
alpine-sdk \
curl \
wget \
unzip \
gnupg

# Install dependencies
RUN apk add --no-cache \
xvfb \
x11vnc \
fluxbox \
xterm \
libffi-dev \
openssl-dev \
zlib-dev \
bzip2-dev \
readline-dev \
git \
nss \
freetype \
freetype-dev \
harfbuzz \
ca-certificates \
ttf-freefont \
pipx \
chromium \
chromium-chromedriver

WORKDIR /app
EXPOSE 8191

# python
ENV HOME=/root
ENV \
DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
Expand All @@ -50,13 +16,15 @@ ENV \
POETRY_VIRTUALENVS_IN_PROJECT=true \
DISPLAY=:0

RUN pipx install poetry
ENV PATH="/root/.local/bin:$PATH"
WORKDIR /app
EXPOSE 8191
RUN apt update &&\
apt install -y xvfb scrot python3-tk curl chromium

RUN curl -sSL https://install.python-poetry.org | python3 -
ENV PATH="${HOME}/.local/bin:$PATH"
COPY pyproject.toml poetry.lock ./
RUN poetry install

COPY fix_nodriver.py ./
RUN . /app/.venv/bin/activate && python fix_nodriver.py
COPY . .
RUN ./run_vnc.sh && . /app/.venv/bin/activate && poetry run pytest
CMD ["./entrypoint.sh"]
CMD [". .venv/bin/activate && python3 main.py"]
6 changes: 0 additions & 6 deletions entrypoint.sh

This file was deleted.

87 changes: 46 additions & 41 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from __future__ import annotations

import asyncio
import logging
import time

import uvicorn
import uvicorn.config
from bs4 import BeautifulSoup
from fastapi import FastAPI, HTTPException
from fastapi.responses import RedirectResponse
from sbase import SB, BaseCase

from src.models.requests import LinkRequest, LinkResponse
import src
import src.utils
import src.utils.consts
from src.models.requests import LinkRequest, LinkResponse, Solution
from src.utils import logger
from src.utils.browser import bypass_cloudflare, new_browser
from src.utils.consts import LOG_LEVEL

app = FastAPI(debug=LOG_LEVEL == logging.DEBUG, log_level=LOG_LEVEL)
Expand All @@ -28,50 +30,53 @@ def read_root():
async def health_check():
"""Health check endpoint."""
logger.info("Health check")
browser = await new_browser()
await browser.grant_all_permissions()
page = await browser.get("https://google.com")
await page.bring_to_front()
browser.stop()
# browser: Chrome = await new_browser()
# browser.get("https://google.com")
# browser.stop()
return {"status": "ok"}


@app.post("/v1")
async def read_item(request: LinkRequest):
def read_item(request: LinkRequest):
"""Handle POST requests."""
start_time = int(time.time() * 1000)
# request.url = "https://nowsecure.nl"
logger.info(f"Request: {request}")
start_time = int(time.time() * 1000)
browser = await new_browser()
await browser.grant_all_permissions()
await asyncio.sleep(1)
page = await browser.get(request.url)
await page.bring_to_front()
timeout = request.maxTimeout
if timeout == 0:
timeout = None
try:
challenged = await asyncio.wait_for(bypass_cloudflare(page), timeout=timeout)
except asyncio.TimeoutError as e:
logger.info("Timed out bypassing Cloudflare")
browser.stop()
raise HTTPException(
detail="Timed out bypassing Cloudflare", status_code=408
) from e
except Exception as e:
logger.error(e)
browser.stop()
raise HTTPException(detail="Couldn't bypass", status_code=500) from e

logger.info(f"Got webpage: {request.url}")

response = await LinkResponse.create(
page=page,
start_timestamp=start_time,
challenged=challenged,
)

browser.stop()
response: LinkResponse

# start_time = int(time.time() * 1000)
with SB(uc=True, locale_code="en", test=False, xvfb=True, ad_block=True) as sb:
sb: BaseCase
sb.uc_open_with_reconnect(request.url)
sb.uc_gui_click_captcha()
logger.info(f"Got webpage: {request.url}")
sb.save_screenshot("screenshot.png")
logger.info(f"Got webpage: {request.url}")

source = sb.get_page_source()
source_bs = BeautifulSoup(source, "html.parser")
title_tag = source_bs.title
if title_tag is None:
raise HTTPException(status_code=500, detail="Title tag not found")

if title_tag.string in src.utils.consts.CHALLENGE_TITLES:
raise HTTPException(status_code=500, detail="Could not bypass challenge")

title = title_tag.string
logger.info(f"Title: {title}")
response = LinkResponse(
message="Success",
solution=Solution(
userAgent=sb.get_user_agent(),
url=sb.get_current_url(),
status=200,
cookies=sb.get_cookies(),
headers={},
response=source,
),
startTimestamp=start_time,
)

return response


Expand Down
Loading

0 comments on commit f141fc3

Please sign in to comment.