Skip to content

Commit

Permalink
connections and loading issue on specific sites (#335)
Browse files Browse the repository at this point in the history
* Test sites watsons.com.sg and guardian.com.sg
* Update playwright to 1.44.0 and bump package

---------

Co-authored-by: younglim <[email protected]>
  • Loading branch information
angyonghaseyo and younglim authored May 22, 2024
1 parent 2464d09 commit 2c917ca
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 13 deletions.
3 changes: 3 additions & 0 deletions constants/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ const document = new JSDOM('').window;
const httpsAgent = new https.Agent({
// Run in environments with custom certificates
rejectUnauthorized: false,
keepAlive: true,
});

export const messageOptions = {
Expand Down Expand Up @@ -684,6 +685,7 @@ const getRobotsTxtViaAxios = async (robotsUrl) => {
const instance = axios.create({
httpsAgent: new https.Agent({
rejectUnauthorized: false,
keepAlive: true,
}),
});

Expand Down Expand Up @@ -855,6 +857,7 @@ export const getLinksFromSitemap = async (
const instance = axios.create({
httpsAgent: new https.Agent({
rejectUnauthorized: false,
keepAlive: true,
}),
});
data = await (await instance.get(url, { timeout: 80000 })).data;
Expand Down
2 changes: 1 addition & 1 deletion crawlers/crawlDomain.js
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ const crawlDomain = async (
}

// Ensure page navigation completes to capture final URL in a redirect chain
await page.goto(request.url, { waitUntil: 'networkidle' });
await page.goto(request.url, { waitUntil: 'load' });

let finalUrl = page.url(); // Initialize with the request URL

Expand Down
20 changes: 10 additions & 10 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@govtechsg/purple-hats",
"main": "npmIndex.js",
"version": "0.9.52",
"version": "0.9.53",
"type": "module",
"imports": {
"#root/*.js": "./*.js"
Expand All @@ -20,7 +20,7 @@
"lodash": "^4.17.21",
"minimatch": "^9.0.3",
"pdfjs-dist": "github:veraPDF/pdfjs-dist#v2.14.305-taggedPdf-0.1.11",
"playwright": "1.42.1",
"playwright": "1.44.0",
"prettier": "^3.1.0",
"print-message": "^3.0.1",
"safe-regex": "^2.1.1",
Expand Down

0 comments on commit 2c917ca

Please sign in to comment.