diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py
index a39064a1b2..569494befd 100644
--- a/backend/btrixcloud/operator/crawls.py
+++ b/backend/btrixcloud/operator/crawls.py
@@ -340,6 +340,31 @@ def _load_redis(self, params, status: CrawlStatus, children):
return self.load_from_yaml("redis.yaml", params)
+ def _filter_autoclick_behavior(
+ self, behaviors: Optional[str], crawler_image: str
+ ) -> Optional[str]:
+ """Remove autoclick behavior if crawler version doesn't support it"""
+ min_autoclick_crawler_image = os.environ.get("MIN_AUTOCLICK_CRAWLER_IMAGE")
+
+ if (
+ min_autoclick_crawler_image
+ and behaviors
+ and "autoclick" in behaviors
+ and crawler_image
+ and crawler_image < min_autoclick_crawler_image
+ ):
+ print(
+ "Crawler version < min_autoclick_crawler_image, removing autoclick behavior",
+ flush=True,
+ )
+ behaviors_list = behaviors.split(",")
+ filtered_behaviors = [
+ behavior for behavior in behaviors_list if behavior != "autoclick"
+ ]
+ return ",".join(filtered_behaviors)
+
+ return behaviors
+
async def _load_crawl_configmap(self, crawl: CrawlSpec, children, params):
name = f"crawl-config-{crawl.id}"
@@ -357,7 +382,13 @@ async def _load_crawl_configmap(self, crawl: CrawlSpec, children, params):
crawlconfig = await self.crawl_config_ops.get_crawl_config(crawl.cid, crawl.oid)
- params["config"] = json.dumps(crawlconfig.get_raw_config())
+ raw_config = crawlconfig.get_raw_config()
+
+ raw_config["behaviors"] = self._filter_autoclick_behavior(
+ raw_config["behaviors"], params["crawler_image"]
+ )
+
+ params["config"] = json.dumps(raw_config)
return self.load_from_yaml("crawl_configmap.yaml", params)
diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml
index f29f7788e7..eec66c3385 100644
--- a/chart/templates/configmap.yaml
+++ b/chart/templates/configmap.yaml
@@ -61,6 +61,8 @@ data:
MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
+ MIN_AUTOCLICK_CRAWLER_IMAGE: "{{ .Values.min_autoclick_crawler_image }}"
+
NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}"
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
diff --git a/chart/values.yaml b/chart/values.yaml
index d422f60a73..6490ad28e9 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -250,6 +250,9 @@ crawler_namespace: "crawlers"
# if set, will restrict QA to image names that are >= than this value
# min_qa_crawler_image: ""
+# if set, will restrict autoclick behavior to image names that are >= than this value
+min_autoclick_crawler_image: "docker.io/webrecorder/browsertrix-crawler:1.5.0"
+
# optional: enable to use a persist volume claim for all crawls
# can be enabled to use a multi-write shared filesystem
# crawler_pv_claim: "nfs-shared-crawls"
diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts
index 6ac6f4bcda..94be3248b0 100644
--- a/frontend/src/features/crawl-workflows/workflow-editor.ts
+++ b/frontend/src/features/crawl-workflows/workflow-editor.ts
@@ -1304,6 +1304,20 @@ https://archiveweb.page/images/${"logo.svg"}`}
),
false,
)}
+ ${inputCol(
+ html`
+ ${msg("Autoclick behavior")}
+ `,
+ )}
+ ${this.renderHelpTextCol(
+ msg(
+ `When enabled the browser will automatically click on links that don't navigate to other pages.`,
+ ),
+ false,
+ )}
${inputCol(html`
({
autoAddCollections: [],
description: null,
autoscrollBehavior: true,
+ autoclickBehavior: false,
userAgent: null,
crawlerChannel: "default",
proxyId: null,
@@ -286,6 +288,9 @@ export function getInitialFormState(params: {
autoscrollBehavior: params.initialWorkflow.config.behaviors
? params.initialWorkflow.config.behaviors.includes("autoscroll")
: defaultFormState.autoscrollBehavior,
+ autoclickBehavior: params.initialWorkflow.config.behaviors
+ ? params.initialWorkflow.config.behaviors.includes("autoclick")
+ : defaultFormState.autoclickBehavior,
userAgent:
params.initialWorkflow.config.userAgent ?? defaultFormState.userAgent,
crawlerChannel: