Merge pull request #104 from vvincent1234/fix/requirements_update

update requirements
browser-use · Jan 13, 2025 · be89b90 · be89b90
2 parents bd696d2 + ae47523
commit be89b90
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 41 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
-browser-use>=0.1.18
-langchain-google-genai>=2.0.8
+browser-use==0.1.18
+langchain-google-genai==2.0.8
 pyperclip
 gradio
 langchain-ollama

diff --git a/src/utils/utils.py b/src/utils/utils.py
@@ -164,38 +164,39 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di
             print(f"Error getting latest {file_type} file: {e}")
 
     return latest_files
-async def capture_screenshot(browser_context) -> str:
+async def capture_screenshot(browser_context):
     """Capture and encode a screenshot"""
-    try:
-        # Extract the Playwright browser instance
-        playwright_browser = browser_context.browser.playwright_browser  # Ensure this is correct.
-
-        # Check if the browser instance is valid and if an existing context can be reused
-        if playwright_browser and playwright_browser.contexts:
-            playwright_context = playwright_browser.contexts[0]
-
-        # Access pages in the context
-        if playwright_context:
-            pages = playwright_context.pages
-
-        # Use an existing page or create a new one if none exist
-        if pages:
-            active_page = pages[0]
-            for page in pages:
-                if page.url != "about:blank":
-                    active_page = page
-
-        # Take screenshot
-        try:
-            screenshot = await active_page.screenshot(
-                type='jpeg',
-                quality=75,
-                scale="css"
-            )
-            encoded = base64.b64encode(screenshot).decode('utf-8')
-            return f'<img src="data:image/jpeg;base64,{encoded}" style="width:80vw; height:90vh ; border:1px solid #ccc;">'
-        except Exception as e:
-            return f"<h1 class='error' style='width:80vw; height:90vh'>Waiting for browser session...</h1>"
+    # Extract the Playwright browser instance
+    playwright_browser = browser_context.browser.playwright_browser  # Ensure this is correct.
+
+    # Check if the browser instance is valid and if an existing context can be reused
+    if playwright_browser and playwright_browser.contexts:
+        playwright_context = playwright_browser.contexts[0]
+    else:
+        return None
 
+    # Access pages in the context
+    pages = None
+    if playwright_context:
+        pages = playwright_context.pages
+
+    # Use an existing page or create a new one if none exist
+    if pages:
+        active_page = pages[0]
+        for page in pages:
+            if page.url != "about:blank":
+                active_page = page
+    else:
+        return None
+
+    # Take screenshot
+    try:
+        screenshot = await active_page.screenshot(
+            type='jpeg',
+            quality=75,
+            scale="css"
+        )
+        encoded = base64.b64encode(screenshot).decode('utf-8')
+        return encoded
     except Exception as e:
-        return f"<h1 class='error' style='width:80vw; height:90vh'>Waiting for browser session...</h1>"
+        return None
diff --git a/webui.py b/webui.py
@@ -408,6 +408,8 @@ async def run_with_stream(
     max_actions_per_step,
     tool_call_in_content
 ):
+    stream_vw = 80
+    stream_vh = int(80 * window_h // window_w)
     if not headless:
         result = await run_browser_agent(
             agent_type=agent_type,
@@ -433,7 +435,7 @@ async def run_with_stream(
             tool_call_in_content=tool_call_in_content
         )
         # Add HTML content at the start of the result array
-        html_content = "<h1 style='width:80vw; height:90vh'>Using browser...</h1>"
+        html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
         yield [html_content] + list(result)
     else:
         try:
@@ -465,17 +467,21 @@ async def run_with_stream(
             )
 
             # Initialize values for streaming
-            html_content = "<h1 style='width:80vw; height:90vh'>Using browser...</h1>"
+            html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
             final_result = errors = model_actions = model_thoughts = ""
             latest_videos = trace = None
 
 
             # Periodically update the stream while the agent task is running
             while not agent_task.done():
                 try:
-                    html_content = await capture_screenshot(_global_browser_context)
+                    encoded_screenshot = await capture_screenshot(_global_browser_context)
+                    if encoded_screenshot is not None:
+                        html_content = f'<img src="data:image/jpeg;base64,{encoded_screenshot}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
+                    else:
+                        html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
                 except Exception as e:
-                    html_content = f"<h1 style='width:80vw; height:90vh'>Waiting for browser session...</h1>"
+                    html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
 
                 yield [
                     html_content,
@@ -488,7 +494,7 @@ async def run_with_stream(
                     gr.update(value="Stop", interactive=True),  # Re-enable stop button
                     gr.update(value="Run", interactive=True)    # Re-enable run button
                 ]
-                await asyncio.sleep(0.01)
+                await asyncio.sleep(0.05)
 
             # Once the agent task completes, get the results
             try:
@@ -515,7 +521,7 @@ async def run_with_stream(
         except Exception as e:
             import traceback
             yield [
-                f"<h1 style='width:80vw; height:90vh'>Waiting for browser session...</h1>",
+                f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
                 "",
                 f"Error: {str(e)}\n{traceback.format_exc()}",
                 "",
@@ -740,7 +746,7 @@ def create_ui(theme_name="Ocean"):
 
                 with gr.Row():
                     browser_view = gr.HTML(
-                        value="<h1 style='width:80vw; height:90vh'>Waiting for browser session...</h1>",
+                        value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
                         label="Live Browser View",
                 )