Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/browser initialization, improved UI #27

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
21 changes: 0 additions & 21 deletions .env.example

This file was deleted.

Empty file added .gitattributes
Empty file.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ This project builds upon the foundation of the [browser-use](https://github.com/

<video src="https://github.com/user-attachments/assets/58c0f59e-02b4-4413-aba8-6184616bf181" controls="controls" width="500" height="300" >Your browser does not support playing this video!</video>

**Changelog**
- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).


## Environment Installation

1. **Python Version:** Ensure you have Python 3.11 or higher installed.
Expand Down
1 change: 1 addition & 0 deletions run.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python webui.py --ip 127.0.0.1 --port 7788
4 changes: 2 additions & 2 deletions src/agent/custom_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,10 @@ async def run(self, max_steps: int = 100) -> AgentHistoryList:
if not await self._validate_output():
continue

logger.info('Task completed successfully')
logger.info('Task completed successfully')
break
else:
logger.info('❌ Failed to complete task in maximum steps')
logger.info('Task failed to complete within maximum steps')

return self.history

Expand Down
74 changes: 70 additions & 4 deletions src/browser/custom_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,82 @@
# @ProjectName: browser-use-webui
# @FileName: browser.py

import logging
from typing import Optional
import playwright.async_api
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig, BrowserContext

from browser_use.browser.context import BrowserContext, BrowserContextConfig, BrowserSession
from .custom_context import CustomBrowserContext

logger = logging.getLogger(__name__)

class CustomBrowser(Browser):
def __init__(self, config: BrowserConfig):
super().__init__(config)
self._browser = None
self._playwright = None

@property
def browser(self):
return self._browser

async def launch(self):
"""Launch the browser with configured settings"""
if not self._playwright:
import playwright.async_api
self._playwright = await playwright.async_api.async_playwright().start()

if not self._browser:
self._browser = await self._playwright.chromium.launch(
headless=self.config.headless,
args=self.config.extra_chromium_args or [],
executable_path=self.config.chrome_instance_path,
)
return self._browser

async def new_context(
self, config: BrowserContextConfig = BrowserContextConfig(), context: CustomBrowserContext = None
) -> BrowserContext:
"""Create a browser context"""
return CustomBrowserContext(config=config, browser=self, context=context)
"""Create a browser context with settings to prevent new windows and handle navigation."""
if not self._browser:
await self.launch()

# Configure browser for better navigation handling
browser_args = [
'--disable-popup-blocking',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-background-networking',
'--window-size=1920,1080',
'--disable-features=IsolateOrigins,site-per-process',
'--disable-web-security',
'--disable-site-isolation-trials'
]

# Update browser configuration
if not self.config.extra_chromium_args:
self.config.extra_chromium_args = []
self.config.extra_chromium_args.extend(browser_args)

# Relaunch browser with updated settings if needed
if self._browser:
await self._browser.close()
self._browser = None
await self.launch()

return CustomBrowserContext(browser=self, config=config, context=context)

async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Ensure proper cleanup of resources"""
if self._browser and not self.config.chrome_instance_path:
await self._browser.close()
if self._playwright:
await self._playwright.stop()
225 changes: 165 additions & 60 deletions src/browser/custom_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

import asyncio
import base64
import json
import logging
import json
import os

from playwright.async_api import Browser as PlaywrightBrowser
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from typing import Optional, Type, Dict, List
from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightContext
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext, BrowserContextConfig, BrowserSession
from browser_use.browser.views import BrowserState, TabInfo
from browser_use.dom.views import DOMElementNode, DOMBaseNode

logger = logging.getLogger(__name__)

Expand All @@ -22,22 +24,21 @@ class CustomBrowserContext(BrowserContext):

def __init__(
self,
browser: 'Browser',
browser: Browser,
config: BrowserContextConfig = BrowserContextConfig(),
context: BrowserContext = None
context: PlaywrightContext = None
):
super(CustomBrowserContext, self).__init__(browser, config)
self.context = context
"""Initialize custom browser context with proper argument order"""
super().__init__(browser, config)
self._context = context
self.session = None

async def _create_context(self, browser: PlaywrightBrowser):
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
if self.context:
return self.context
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
# Connect to existing Chrome instance instead of creating new one
context = browser.contexts[0]
else:
# Original code for creating new context
if self._context:
return self._context

try:
context = await browser.new_context(
viewport=self.config.browser_window_size,
no_viewport=False,
Expand All @@ -46,51 +47,155 @@ async def _create_context(self, browser: PlaywrightBrowser):
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
),
java_script_enabled=True,
bypass_csp=self.config.disable_security,
ignore_https_errors=self.config.disable_security,
bypass_csp=True,
ignore_https_errors=True,
record_video_dir=self.config.save_recording_path,
record_video_size=self.config.browser_window_size # set record video size
record_video_size=self.config.browser_window_size,
)

# Create initial page
page = await context.new_page()
await page.goto("about:blank", wait_until="domcontentloaded")

# Add event listener to handle new windows
async def handle_popup(popup):
try:
url = popup.url
main_page = context.pages[0]
await main_page.goto(url)
await popup.close()
except Exception as e:
logger.error(f"Error handling popup: {str(e)}")

context.on("page", handle_popup)

# Configure tracing if path is set
if self.config.trace_path:
await context.tracing.start(screenshots=True, snapshots=True, sources=True)

# Load cookies if they exist
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
try:
with open(self.config.cookies_file, 'r') as f:
cookies = json.load(f)
logger.info(f'Loaded {len(cookies)} cookies from {self.config.cookies_file}')
await context.add_cookies(cookies)
except Exception as e:
logger.error(f"Error loading cookies: {str(e)}")

# Expose anti-detection scripts
await context.add_init_script(
"""
// Webdriver property
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});

// Languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en']
});

// Plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5]
});

// Chrome runtime
window.chrome = { runtime: {} };

// Permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);

// Handle window.open
window.open = (url) => {
window.location.href = url;
return null;
};
"""
)

return context

except Exception as e:
logger.error(f"Error creating browser context: {str(e)}")
raise

async def __aenter__(self):
"""Override the base context's enter to handle navigation properly"""
if not self._context:
self._context = await self._create_context(self.browser.browser)

# Create session without waiting for title
if not self.session:
# Get the first page or create one
page = self._context.pages[0] if self._context.pages else await self._context.new_page()

# Create empty DOM tree
empty_tree = DOMElementNode(
is_visible=True,
parent=None,
tag_name="html",
xpath="/html",
attributes={},
children=[],
is_interactive=False,
is_top_element=True,
shadow_root=False,
highlight_index=None
)

# Get current page info
try:
title = await page.title() or "New Page"
except Exception:
title = "New Page"

# Create empty state
state = BrowserState(
element_tree=empty_tree,
selector_map={0: empty_tree},
url=page.url,
title=title,
tabs=[TabInfo(
page_id=0,
url=page.url,
title=title
)],
screenshot=None
)

self.session = BrowserSession(
context=self._context,
current_page=page,
cached_state=state
)

if self.config.trace_path:
await context.tracing.start(screenshots=True, snapshots=True, sources=True)

# Load cookies if they exist
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
with open(self.config.cookies_file, 'r') as f:
cookies = json.load(f)
logger.info(f'Loaded {len(cookies)} cookies from {self.config.cookies_file}')
await context.add_cookies(cookies)

# Expose anti-detection scripts
await context.add_init_script(
"""
// Webdriver property
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});

// Languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en']
});

// Plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5]
});

// Chrome runtime
window.chrome = { runtime: {} };

// Permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
"""
)

return context
# Set up page event listeners for popup handling
async def handle_popup(popup):
try:
url = popup.url
await page.goto(url)
await popup.close()
except Exception as e:
logger.error(f"Error handling popup: {str(e)}")

page.on("popup", handle_popup)

return self

async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Cleanup resources properly"""
if self.config.trace_path and self._context:
await self._context.tracing.stop()
if self._context and not self.browser.config.chrome_instance_path:
await self._context.close()

@property
def context(self) -> PlaywrightContext:
return self._context
Loading