Skip to content

Commit

Permalink
Merge pull request #104 from vvincent1234/fix/requirements_update
Browse files Browse the repository at this point in the history
update requirements
  • Loading branch information
warmshao authored Jan 13, 2025
2 parents bd696d2 + ae47523 commit be89b90
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 41 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
browser-use>=0.1.18
langchain-google-genai>=2.0.8
browser-use==0.1.18
langchain-google-genai==2.0.8
pyperclip
gradio
langchain-ollama
Expand Down
65 changes: 33 additions & 32 deletions src/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,38 +164,39 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di
print(f"Error getting latest {file_type} file: {e}")

return latest_files
async def capture_screenshot(browser_context) -> str:
async def capture_screenshot(browser_context):
"""Capture and encode a screenshot"""
try:
# Extract the Playwright browser instance
playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct.

# Check if the browser instance is valid and if an existing context can be reused
if playwright_browser and playwright_browser.contexts:
playwright_context = playwright_browser.contexts[0]

# Access pages in the context
if playwright_context:
pages = playwright_context.pages

# Use an existing page or create a new one if none exist
if pages:
active_page = pages[0]
for page in pages:
if page.url != "about:blank":
active_page = page

# Take screenshot
try:
screenshot = await active_page.screenshot(
type='jpeg',
quality=75,
scale="css"
)
encoded = base64.b64encode(screenshot).decode('utf-8')
return f'<img src="data:image/jpeg;base64,{encoded}" style="width:80vw; height:90vh ; border:1px solid #ccc;">'
except Exception as e:
return f"<h1 class='error' style='width:80vw; height:90vh'>Waiting for browser session...</h1>"
# Extract the Playwright browser instance
playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct.

# Check if the browser instance is valid and if an existing context can be reused
if playwright_browser and playwright_browser.contexts:
playwright_context = playwright_browser.contexts[0]
else:
return None

# Access pages in the context
pages = None
if playwright_context:
pages = playwright_context.pages

# Use an existing page or create a new one if none exist
if pages:
active_page = pages[0]
for page in pages:
if page.url != "about:blank":
active_page = page
else:
return None

# Take screenshot
try:
screenshot = await active_page.screenshot(
type='jpeg',
quality=75,
scale="css"
)
encoded = base64.b64encode(screenshot).decode('utf-8')
return encoded
except Exception as e:
return f"<h1 class='error' style='width:80vw; height:90vh'>Waiting for browser session...</h1>"
return None
20 changes: 13 additions & 7 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,8 @@ async def run_with_stream(
max_actions_per_step,
tool_call_in_content
):
stream_vw = 80
stream_vh = int(80 * window_h // window_w)
if not headless:
result = await run_browser_agent(
agent_type=agent_type,
Expand All @@ -433,7 +435,7 @@ async def run_with_stream(
tool_call_in_content=tool_call_in_content
)
# Add HTML content at the start of the result array
html_content = "<h1 style='width:80vw; height:90vh'>Using browser...</h1>"
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
yield [html_content] + list(result)
else:
try:
Expand Down Expand Up @@ -465,17 +467,21 @@ async def run_with_stream(
)

# Initialize values for streaming
html_content = "<h1 style='width:80vw; height:90vh'>Using browser...</h1>"
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
final_result = errors = model_actions = model_thoughts = ""
latest_videos = trace = None


# Periodically update the stream while the agent task is running
while not agent_task.done():
try:
html_content = await capture_screenshot(_global_browser_context)
encoded_screenshot = await capture_screenshot(_global_browser_context)
if encoded_screenshot is not None:
html_content = f'<img src="data:image/jpeg;base64,{encoded_screenshot}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
else:
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
except Exception as e:
html_content = f"<h1 style='width:80vw; height:90vh'>Waiting for browser session...</h1>"
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"

yield [
html_content,
Expand All @@ -488,7 +494,7 @@ async def run_with_stream(
gr.update(value="Stop", interactive=True), # Re-enable stop button
gr.update(value="Run", interactive=True) # Re-enable run button
]
await asyncio.sleep(0.01)
await asyncio.sleep(0.05)

# Once the agent task completes, get the results
try:
Expand All @@ -515,7 +521,7 @@ async def run_with_stream(
except Exception as e:
import traceback
yield [
f"<h1 style='width:80vw; height:90vh'>Waiting for browser session...</h1>",
f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
"",
f"Error: {str(e)}\n{traceback.format_exc()}",
"",
Expand Down Expand Up @@ -740,7 +746,7 @@ def create_ui(theme_name="Ocean"):

with gr.Row():
browser_view = gr.HTML(
value="<h1 style='width:80vw; height:90vh'>Waiting for browser session...</h1>",
value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
label="Live Browser View",
)

Expand Down

0 comments on commit be89b90

Please sign in to comment.