Skip to content

Commit

Permalink
chore(chrome): add initial page retry
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Feb 28, 2025
1 parent 7435488 commit 16b9bbc
Show file tree
Hide file tree
Showing 11 changed files with 130 additions and 52 deletions.
60 changes: 30 additions & 30 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.31.1"
version = "2.31.2"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
8 changes: 8 additions & 0 deletions spider/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2221,6 +2221,10 @@ impl Page {
StatusCode::NOT_FOUND;
}
CdpError::NoResponse => {
default_response.status_code =
StatusCode::GATEWAY_TIMEOUT;
}
CdpError::LaunchTimeout => {
default_response.status_code =
StatusCode::REQUEST_TIMEOUT;
}
Expand Down Expand Up @@ -2536,6 +2540,10 @@ impl Page {
StatusCode::NOT_FOUND;
}
CdpError::NoResponse => {
default_response.status_code =
StatusCode::GATEWAY_TIMEOUT;
}
CdpError::LaunchTimeout => {
default_response.status_code =
StatusCode::REQUEST_TIMEOUT;
}
Expand Down
82 changes: 73 additions & 9 deletions spider/src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,79 @@ impl Website {
)
.await;

let mut retry_count = self.configuration.retry;

if let Some(ref final_redirect_destination) = page.final_redirect_destination {
if final_redirect_destination == "chrome-error://chromewebdata/"
&& page.status_code.is_success()
&& page.is_empty()
&& self.configuration.proxies.is_some()
{
page.error_status = Some("Invalid proxy configuration.".into());
page.should_retry = true;
page.status_code = *crate::page::UNKNOWN_STATUS_ERROR;
}
}

while page.should_retry && retry_count > 0 {
retry_count -= 1;
if let Some(timeout) = page.get_timeout() {
tokio::time::sleep(timeout).await;
}
if page.status_code == StatusCode::GATEWAY_TIMEOUT {
if let Err(elasped) = tokio::time::timeout(BACKOFF_MAX_DURATION, async {
let next_page = Page::new(
&self.url.inner(),
&client,
&chrome_page,
&self.configuration.wait_for,
&self.configuration.screenshot,
false, // we use the initial about:blank page.
&self.configuration.openai_config,
&self.configuration.execution_scripts,
&self.configuration.automation_scripts,
&self.configuration.viewport,
&self.configuration.request_timeout,
)
.await;
page.clone_from(&next_page);
})
.await
{
log::warn!("backoff timeout {elasped}");
}
} else {
let next_page = Page::new(
&self.url.inner(),
&client,
&chrome_page,
&self.configuration.wait_for,
&self.configuration.screenshot,
false, // we use the initial about:blank page.
&self.configuration.openai_config,
&self.configuration.execution_scripts,
&self.configuration.automation_scripts,
&self.configuration.viewport,
&self.configuration.request_timeout,
)
.await;
page.clone_from(&next_page);
}

// check the page again for final.
if let Some(ref final_redirect_destination) = page.final_redirect_destination {
if final_redirect_destination == "chrome-error://chromewebdata/"
&& page.status_code.is_success()
&& page.is_empty()
&& self.configuration.proxies.is_some()
{
page.error_status = Some("Invalid proxy configuration.".into());
page.should_retry = true;
page.status_code = *crate::page::UNKNOWN_STATUS_ERROR;
}
}
}

if let Some(h) = intercept_handle {
let abort_handle = h.abort_handle();
if let Err(elasped) =
Expand Down Expand Up @@ -1947,15 +2020,6 @@ impl Website {
Default::default()
};

if let Some(ref final_redirect_destination) = page.final_redirect_destination {
if final_redirect_destination == "chrome-error://chromewebdata/"
&& page.status_code.is_success()
&& self.configuration.proxies.is_some()
{
page.error_status = Some("Invalid proxy configuration.".into());
}
}

self.initial_status_code = page.status_code;

if page.status_code == reqwest::StatusCode::FORBIDDEN {
Expand Down
2 changes: 1 addition & 1 deletion spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.31.1"
version = "2.31.2"
rust-version = "1.70"
authors = [
"j-mendez <[email protected]>"
Expand Down
1 change: 0 additions & 1 deletion spider_chrome/src/conn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use std::task::ready;
use futures::stream::Stream;
use futures::task::{Context, Poll};
use futures::{SinkExt, StreamExt};
use tokio::io::AsyncWriteExt;
use tokio_tungstenite::tungstenite::Message as WsMessage;
use tokio_tungstenite::MaybeTlsStream;
use tokio_tungstenite::{tungstenite::protocol::WebSocketConfig, WebSocketStream};
Expand Down
Loading

0 comments on commit 16b9bbc

Please sign in to comment.