Skip to content

Commit

Permalink
Log robots.txt URL to console
Browse files Browse the repository at this point in the history
  • Loading branch information
younglim authored Dec 20, 2023
1 parent 803e0d3 commit a80adf1
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions constants/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,13 @@ export const getUrlsFromRobotsTxt = async (url, browserToRun) => {
silentLogger.info(e);
}

if (!robotsTxt) constants.robotsTxtUrls[domain] = {};

if (!robotsTxt) {
constants.robotsTxtUrls[domain] = {};
return;
}

console.log('Found robots.txt: ', robotsUrl);

const lines = robotsTxt.split(/\r?\n/);
let shouldCapture = false;
let disallowedUrls = [], allowedUrls = [];
Expand Down Expand Up @@ -625,7 +630,6 @@ export const getUrlsFromRobotsTxt = async (url, browserToRun) => {
}

const getRobotsTxtViaPlaywright = async (robotsUrl, browser) => {
console.log('ROBOTS URL: ', robotsUrl);
const browserContext = await constants.launcher.launchPersistentContext(
'', {...getPlaywrightLaunchOptions(browser)},
);
Expand All @@ -634,7 +638,6 @@ const getRobotsTxtViaPlaywright = async (robotsUrl, browser) => {
await page.goto(robotsUrl, { waitUntil: 'networkidle', timeout: 30000 });

const robotsTxt = await page.evaluate(() => document.body.textContent);
console.log('ROBOTS TXT: ', robotsTxt);
return robotsTxt;
}

Expand Down

0 comments on commit a80adf1

Please sign in to comment.