Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Headful browsing to Agent Web Tooling #1080

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
9 changes: 7 additions & 2 deletions examples/browser/compose.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
services:
default:
image: aisiuk/inspect-web-browser-tool
init: true
build:
context: ../../src/inspect_ai/tool/_tools/_web_browser/_resources
dockerfile: Dockerfile
environment:
- HEADLESS=${INSPECT_WEB_BROWSER_TOOL_HEADLESS-True}
ports:
- "127.0.0.1:5900:5900"
39 changes: 25 additions & 14 deletions src/inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
# Base docker build file.

FROM python:3.12-bookworm

WORKDIR /app/web_browser

RUN apt-get update

RUN pip install --upgrade pip
# Install python dependancies
RUN pip install --upgrade pip \
&& pip install playwright dm-env-rpc pillow bs4 lxml

# Install playwright
RUN pip install playwright
RUN playwright install
RUN playwright install-deps
# Install playwright and dependancies
RUN playwright install \
&& playwright install-deps

# Install other dependancies
RUN pip install dm-env-rpc pillow bs4 lxml
# Install system dependancies for headful browsing
RUN apt-get update \
&& apt-get install -y \
xvfb \
x11vnc \
fluxbox \
x11-xserver-utils \
&& rm -rf /var/lib/apt/lists/*

# Copy Python files alongside the Dockerfile
COPY *.py ./

# Run the server
CMD ["python3", "/app/web_browser/web_server.py"]
# Copy the entrypoint script
COPY entrypoint.sh /app/web_browser/entrypoint.sh
RUN chmod +x /app/web_browser/entrypoint.sh

# Set environment variables (default to headless mode) and expose 5900 by default
ENV HEADLESS="True"
ENV DISPLAY=:99
EXPOSE 5900

# Run the entrypoint script at startup
ENTRYPOINT [ "/app/web_browser/entrypoint.sh" ]
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,15 @@ def __init__(self, env: dm_env.Environment):
class EnvironmentService(dm_env_rpc_pb2_grpc.EnvironmentServicer):
"""Runs the environment as a gRPC EnvironmentServicer."""

def __init__(self, env_type: Type[dm_env.Environment]) -> None:
def __init__(
self, env_type: Type[dm_env.Environment], headless: bool = True
) -> None:
"""Initializes the environment.

Args:
env_type: A dm_env class to serve.
headless (bool): If True, web browser uses headless mode. If False, uses headful mode.
Defaults to True.
"""
self._env_type = env_type
self._envs: dict[str, dm_env.Environment] = {}
Expand All @@ -63,6 +67,7 @@ def __init__(self, env_type: Type[dm_env.Environment]) -> None:
self._browser: playwright_crawler.PlaywrightBrowser = None
self._lock = threading.Lock()
self._num_worlds = 0
self._headless = headless

def Process(
self,
Expand Down Expand Up @@ -154,7 +159,9 @@ def _handle_create_world_request(
world_name = _DEFAULT_WORLD_NAME
with self._lock:
if self._browser is None:
self._browser = playwright_crawler.PlaywrightBrowser()
self._browser = playwright_crawler.PlaywrightBrowser(
headless=self._headless
)
else:
world_name += f"_{self._num_worlds}"
self._num_worlds += 1
Expand Down
31 changes: 31 additions & 0 deletions src/inspect_ai/tool/_tools/_web_browser/_resources/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
set -e

# If headful mode, spin up Xvfb, Fluxbox and x11vnc for real-time web browser viewing
if [ "$HEADLESS" = "False" ]; then

# Start Xvfb in the background
echo "Starting Xvfb..."
Xvfb :99 -screen 0 1280x1024x24 -ac &

sleep 2

# Start Fluxbox in the background
echo "Starting Fluxbox..."
fluxbox &

# Start x11vnc in the background
echo "Starting VNC server..."
x11vnc -display :99 -forever -shared -verbose &

# Wait for X server to be ready
echo "Checking X server with DISPLAY=\${DISPLAY}"
until xdpyinfo -display :99 >/dev/null 2>&1; do
echo "Waiting for X server..."
sleep 1
done
echo "X server ready"
fi

# Start the web server (runs in both headless and headful modes)
exec python3 /app/web_browser/web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,24 @@ class PlaywrightBrowser:
HEIGHT = 1080
_playwright_api = None

def __init__(self):
"""Creates the browser."""
def __init__(self, headless: bool = True):
"""
Creates the browser.

Args:
headless (bool): Defaults to True.
If True, uses headless mode.
If False, uses headful mode.
"""
if PlaywrightBrowser._playwright_api is None:
PlaywrightBrowser._playwright_api = sync_playwright().start()

logging.info("Starting chromium in headless mode.")
logging.info(
f"Starting chromium in {'headless' if headless else 'headful'} mode."
)

self._browser = PlaywrightBrowser._playwright_api.chromium.launch(
headless=True,
headless=headless,
# Required for Gmail signup see
# https://stackoverflow.com/questions/65139098/how-to-login-to-google-account-with-playwright
args=["--disable-blink-features=AutomationControlled"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,49 @@


class TestPlaywrightCrawler(parameterized.TestCase):
def setUp(self):
self._browser = playwright_crawler.PlaywrightBrowser()
def init_crawler(self, headless):
self._browser = playwright_crawler.PlaywrightBrowser(headless=headless)
self._crawler = playwright_crawler.PlaywrightCrawler(
self._browser.get_new_context()
)

def test_go_to_page_changes_url(self):
def tearDown(self):
if hasattr(self, "_browser"):
self._browser.close()
super().tearDown()

@parameterized.named_parameters(
{"testcase_name": "_HeadlessTrue", "headless": True},
{"testcase_name": "_HeadlessFalse", "headless": False},
)
def test_go_to_page_changes_url(self, headless):
self.init_crawler(headless=headless)
self.assertEqual(self._crawler.url, "about:blank")
self._crawler.go_to_page("https://www.example.com")
self.assertEqual(self._crawler.url, "https://www.example.com/")

def test_go_to_page_adds_missing_protocol(self):
@parameterized.named_parameters(
{"testcase_name": "_HeadlessTrue", "headless": True},
{"testcase_name": "_HeadlessFalse", "headless": False},
)
def test_go_to_page_adds_missing_protocol(self, headless):
self.init_crawler(headless=headless)
self._crawler.go_to_page("www.example.com")
self.assertEqual(self._crawler.url, "https://www.example.com/")

def test_nodes_change_on_update(self):
@parameterized.named_parameters(
{"testcase_name": "_HeadlessTrue", "headless": True},
{"testcase_name": "_HeadlessFalse", "headless": False},
)
def test_nodes_change_on_update(self, headless):
self.init_crawler(headless=headless)
self._crawler.go_to_page("https://www.example.com")
self.assertFalse(self._crawler._nodes)
self._crawler.update()
self.assertTrue(self._crawler._nodes)

def test_render_accessibility_tree(self):
def test_render_accessibility_tree_headless(self):
self.init_crawler(headless=True)
self._crawler.go_to_page("https://www.example.com")
at_no_update = self._crawler.render(playwright_crawler.CrawlerOutputFormat.AT)
self.assertEqual(at_no_update, "<empty>")
Expand All @@ -50,7 +71,42 @@ def test_render_accessibility_tree(self):
)
)

def test_click_adjusts_to_scrolling_position(self):
def test_render_accessibility_tree_headful(self):
"""Order of AT Nodes varies more in headful browsing. Check content of nodes, not order"""
self.init_crawler(headless=False)
self._crawler.go_to_page("https://www.example.com")
at_no_update = self._crawler.render(playwright_crawler.CrawlerOutputFormat.AT)
self.assertEqual(at_no_update, "<empty>")

self._crawler.update()

at_update = self._crawler.render(playwright_crawler.CrawlerOutputFormat.AT)
nodes = at_update.splitlines()
self.assertEqual(len(nodes), 3)
self.assertTrue(
any(
'RootWebArea "Example Domain" [focused: True, url: https://www.example.com/]'
in node
for node in nodes
)
)
self.assertTrue(
any(
'StaticText "This domain is for use in illustrative examples in documents'
in node
for node in nodes
)
)
self.assertTrue(
any(
'link "More information..." [url: https://www.iana.org/domains/example]'
in node
for node in nodes
)
)

def test_click_adjusts_to_scrolling_position_headless(self):
self.init_crawler(headless=True)
test_html = """
<!DOCTYPE html>
<html lang="en">
Expand Down Expand Up @@ -91,3 +147,56 @@ def test_click_adjusts_to_scrolling_position(self):
self._crawler.update()
at_after_click = self._crawler.render(playwright_crawler.CrawlerOutputFormat.AT)
self.assertIn("Text Changed!", at_after_click)

def test_click_adjusts_to_scrolling_position_headful(self):
"""Order of AT Nodes varies more in headful browsing. Check content of nodes, not order"""
self.init_crawler(headless=False)
test_html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Scrolling Test Page</title>
<style>
body { height: 3000px; }
.my-button { position: absolute; top: 1500px; }
</style>
</head>
<body>
<button class="my-button" onclick="changeText(this)">Click Me</button>
<script>
function changeText(button) {
button.textContent = "Text Changed!";
}
</script>
</body>
</html>
"""
self._crawler._page.set_content(test_html)
self._crawler.update()
at_before_scroll = self._crawler.render(
playwright_crawler.CrawlerOutputFormat.AT
)
self.assertIn("Scrolling Test Page", at_before_scroll)
self.assertNotIn("Click Me", at_before_scroll)

self._crawler.scroll("down")
self._crawler.update()
at_after_scroll = self._crawler.render(
playwright_crawler.CrawlerOutputFormat.AT
)
self.assertIn("Click Me", at_after_scroll)

# Find the button node ID dynamically
button_node_id = None
for node_id, node in self._crawler._nodes.items():
if "Click Me" in str(node):
button_node_id = node_id
break

self.assertIsNotNone(button_node_id, "Button node was not found")

self._crawler.click(button_node_id)
self._crawler.update()
at_after_click = self._crawler.render(playwright_crawler.CrawlerOutputFormat.AT)
self.assertIn("Text Changed!", at_after_click)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Simple script to run and test the RPC server."""

from concurrent import futures
import os

import dm_env_servicer
import grpc
Expand All @@ -27,7 +28,14 @@ def main():
futures.ThreadPoolExecutor(max_workers=1),
options=options,
)
env_service = dm_env_servicer.EnvironmentService(web_environment.WebEnvironment)

# Read the HEADLESS environment variable to use headless or headful web browser.
# Defaults to true.
HEADLESS = os.getenv("HEADLESS", "True").lower() == "true"

env_service = dm_env_servicer.EnvironmentService(
web_environment.WebEnvironment, headless=HEADLESS
)
dm_env_rpc_pb2_grpc.add_EnvironmentServicer_to_server(env_service, grpc_server)

grpc_server.add_secure_port(
Expand Down