diff --git a/CHANGELOG.md b/CHANGELOG.md index cd7d3dba36ff..f5ec8fd2c102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,19 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* add `iframe` expansion to `parseWithCheerio` in browsers ([#2542](https://github.com/apify/crawlee/issues/2542)) ([328d085](https://github.com/apify/crawlee/commit/328d08598807782b3712bd543e394fe9a000a85d)), closes [#2507](https://github.com/apify/crawlee/issues/2507) +* add `ignoreIframes` opt-out from the Cheerio iframe expansion ([#2562](https://github.com/apify/crawlee/issues/2562)) ([474a8dc](https://github.com/apify/crawlee/commit/474a8dc06a567cde0651d385fdac9c350ddf4508)) +* Sitemap-based request list implementation ([#2498](https://github.com/apify/crawlee/issues/2498)) ([7bf8f0b](https://github.com/apify/crawlee/commit/7bf8f0bcd4cc81e02c7cc60e82dfe7a0cdd80938)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/lerna.json b/lerna.json index 3532bdbd061e..34a399ea47db 100644 --- a/lerna.json +++ b/lerna.json @@ -2,7 +2,7 @@ "packages": [ "packages/*" ], - "version": "3.10.5", + "version": "3.11.0", "command": { "version": { "conventionalCommits": true, diff --git a/packages/basic-crawler/CHANGELOG.md b/packages/basic-crawler/CHANGELOG.md index e8553a5b33bd..4023d8beb654 100644 --- a/packages/basic-crawler/CHANGELOG.md +++ b/packages/basic-crawler/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* Sitemap-based request list implementation ([#2498](https://github.com/apify/crawlee/issues/2498)) ([7bf8f0b](https://github.com/apify/crawlee/commit/7bf8f0bcd4cc81e02c7cc60e82dfe7a0cdd80938)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/packages/basic-crawler/package.json b/packages/basic-crawler/package.json index 642ae017b71b..b364a6eb918c 100644 --- a/packages/basic-crawler/package.json +++ b/packages/basic-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/basic", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -48,9 +48,9 @@ "@apify/log": "^2.4.0", "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/core": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/core": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "csv-stringify": "^6.2.0", "fs-extra": "^11.0.0", "got-scraping": "^4.0.0", diff --git a/packages/browser-crawler/CHANGELOG.md b/packages/browser-crawler/CHANGELOG.md index 1b0a4d1eca59..e74d85798f71 100644 --- a/packages/browser-crawler/CHANGELOG.md +++ b/packages/browser-crawler/CHANGELOG.md @@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* add `iframe` expansion to `parseWithCheerio` in browsers ([#2542](https://github.com/apify/crawlee/issues/2542)) ([328d085](https://github.com/apify/crawlee/commit/328d08598807782b3712bd543e394fe9a000a85d)), closes [#2507](https://github.com/apify/crawlee/issues/2507) +* add `ignoreIframes` opt-out from the Cheerio iframe expansion ([#2562](https://github.com/apify/crawlee/issues/2562)) ([474a8dc](https://github.com/apify/crawlee/commit/474a8dc06a567cde0651d385fdac9c350ddf4508)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/packages/browser-crawler/package.json b/packages/browser-crawler/package.json index f60ea430cca7..c29fffc2c6db 100644 --- a/packages/browser-crawler/package.json +++ b/packages/browser-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/browser", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -54,10 +54,10 @@ }, "dependencies": { "@apify/timeout": "^0.3.0", - "@crawlee/basic": "3.10.5", - "@crawlee/browser-pool": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/basic": "^3.11.0", + "@crawlee/browser-pool": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "ow": "^0.28.1", "tslib": "^2.4.0", "type-fest": "^4.0.0" diff --git a/packages/browser-pool/CHANGELOG.md b/packages/browser-pool/CHANGELOG.md index eceaad9c8f8d..93805de46aea 100644 --- a/packages/browser-pool/CHANGELOG.md +++ b/packages/browser-pool/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/browser-pool + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/browser-pool diff --git a/packages/browser-pool/package.json b/packages/browser-pool/package.json index 597ac80dca93..45bc28df0081 100644 --- a/packages/browser-pool/package.json +++ b/packages/browser-pool/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/browser-pool", - "version": "3.10.5", + "version": "3.11.0", "description": "Rotate multiple browsers using popular automation libraries such as Playwright or Puppeteer.", "engines": { "node": ">=16.0.0" @@ -38,8 +38,8 @@ "dependencies": { "@apify/log": "^2.4.0", "@apify/timeout": "^0.3.0", - "@crawlee/core": "3.10.5", - "@crawlee/types": "3.10.5", + "@crawlee/core": "^3.11.0", + "@crawlee/types": "^3.11.0", "fingerprint-generator": "^2.0.6", "fingerprint-injector": "^2.0.5", "lodash.merge": "^4.6.2", diff --git a/packages/cheerio-crawler/CHANGELOG.md b/packages/cheerio-crawler/CHANGELOG.md index b73ebf5de777..58d9f0f419a6 100644 --- a/packages/cheerio-crawler/CHANGELOG.md +++ b/packages/cheerio-crawler/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/cheerio + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/cheerio diff --git a/packages/cheerio-crawler/package.json b/packages/cheerio-crawler/package.json index bb8aa6a5ebb3..d21a1169ec30 100644 --- a/packages/cheerio-crawler/package.json +++ b/packages/cheerio-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/cheerio", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -53,9 +53,9 @@ "access": "public" }, "dependencies": { - "@crawlee/http": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/http": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "cheerio": "^1.0.0-rc.12", "htmlparser2": "^9.0.0", "tslib": "^2.4.0" diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md index 7c1b54fcf435..1fdb951d7a90 100644 --- a/packages/cli/CHANGELOG.md +++ b/packages/cli/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/cli + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/cli diff --git a/packages/cli/package.json b/packages/cli/package.json index 1947e5cdae83..f066628b67e3 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/cli", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -51,7 +51,7 @@ "access": "public" }, "dependencies": { - "@crawlee/templates": "3.10.5", + "@crawlee/templates": "^3.11.0", "ansi-colors": "^4.1.3", "fs-extra": "^11.0.0", "inquirer": "^8.2.4", diff --git a/packages/core/CHANGELOG.md b/packages/core/CHANGELOG.md index 967235398ba1..6fe240968d6e 100644 --- a/packages/core/CHANGELOG.md +++ b/packages/core/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* Sitemap-based request list implementation ([#2498](https://github.com/apify/crawlee/issues/2498)) ([7bf8f0b](https://github.com/apify/crawlee/commit/7bf8f0bcd4cc81e02c7cc60e82dfe7a0cdd80938)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/packages/core/package.json b/packages/core/package.json index 04c8a78bda16..562fcc524691 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/core", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -59,9 +59,9 @@ "@apify/pseudo_url": "^2.0.30", "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/memory-storage": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/memory-storage": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "@sapphire/async-queue": "^1.5.1", "@types/tough-cookie": "^4.0.2", "@vladfrangu/async_event_emitter": "^2.2.2", diff --git a/packages/crawlee/CHANGELOG.md b/packages/crawlee/CHANGELOG.md index d8303105b168..9f071d82dcf1 100644 --- a/packages/crawlee/CHANGELOG.md +++ b/packages/crawlee/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package crawlee + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package crawlee diff --git a/packages/crawlee/package.json b/packages/crawlee/package.json index eaa50c3bdf70..1760f9472b83 100644 --- a/packages/crawlee/package.json +++ b/packages/crawlee/package.json @@ -1,6 +1,6 @@ { "name": "crawlee", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -54,18 +54,18 @@ "access": "public" }, "dependencies": { - "@crawlee/basic": "3.10.5", - "@crawlee/browser": "3.10.5", - "@crawlee/browser-pool": "3.10.5", - "@crawlee/cheerio": "3.10.5", - "@crawlee/cli": "3.10.5", - "@crawlee/core": "3.10.5", - "@crawlee/http": "3.10.5", - "@crawlee/jsdom": "3.10.5", - "@crawlee/linkedom": "3.10.5", - "@crawlee/playwright": "3.10.5", - "@crawlee/puppeteer": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/basic": "^3.11.0", + "@crawlee/browser": "^3.11.0", + "@crawlee/browser-pool": "^3.11.0", + "@crawlee/cheerio": "^3.11.0", + "@crawlee/cli": "^3.11.0", + "@crawlee/core": "^3.11.0", + "@crawlee/http": "^3.11.0", + "@crawlee/jsdom": "^3.11.0", + "@crawlee/linkedom": "^3.11.0", + "@crawlee/playwright": "^3.11.0", + "@crawlee/puppeteer": "^3.11.0", + "@crawlee/utils": "^3.11.0", "import-local": "^3.1.0", "tslib": "^2.4.0" }, diff --git a/packages/http-crawler/CHANGELOG.md b/packages/http-crawler/CHANGELOG.md index 8468ef7d7913..9fc4e46bdad6 100644 --- a/packages/http-crawler/CHANGELOG.md +++ b/packages/http-crawler/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/http + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/packages/http-crawler/package.json b/packages/http-crawler/package.json index 266f1627abfd..6b7a29c3a3c0 100644 --- a/packages/http-crawler/package.json +++ b/packages/http-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/http", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -55,9 +55,9 @@ "dependencies": { "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/basic": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/basic": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "@types/content-type": "^1.1.5", "cheerio": "^1.0.0-rc.12", "content-type": "^1.0.4", diff --git a/packages/jsdom-crawler/CHANGELOG.md b/packages/jsdom-crawler/CHANGELOG.md index 0157d92ac355..209b2fb243e7 100644 --- a/packages/jsdom-crawler/CHANGELOG.md +++ b/packages/jsdom-crawler/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/jsdom + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/packages/jsdom-crawler/package.json b/packages/jsdom-crawler/package.json index 85e5e74c6cb4..cf941f76b797 100644 --- a/packages/jsdom-crawler/package.json +++ b/packages/jsdom-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/jsdom", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -55,9 +55,9 @@ "dependencies": { "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/http": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/http": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "@types/jsdom": "^21.0.0", "cheerio": "^1.0.0-rc.12", "jsdom": "^24.0.0", diff --git a/packages/linkedom-crawler/CHANGELOG.md b/packages/linkedom-crawler/CHANGELOG.md index 406455e29cb0..aa9866c2a0ff 100644 --- a/packages/linkedom-crawler/CHANGELOG.md +++ b/packages/linkedom-crawler/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/linkedom + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/linkedom diff --git a/packages/linkedom-crawler/package.json b/packages/linkedom-crawler/package.json index 423805728e8c..b039b5c7bdff 100644 --- a/packages/linkedom-crawler/package.json +++ b/packages/linkedom-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/linkedom", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -55,8 +55,8 @@ "dependencies": { "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/http": "3.10.5", - "@crawlee/types": "3.10.5", + "@crawlee/http": "^3.11.0", + "@crawlee/types": "^3.11.0", "linkedom": "^0.18.0", "ow": "^0.28.2", "tslib": "^2.4.0" diff --git a/packages/memory-storage/CHANGELOG.md b/packages/memory-storage/CHANGELOG.md index affa9a424a31..68a146c7eb28 100644 --- a/packages/memory-storage/CHANGELOG.md +++ b/packages/memory-storage/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/memory-storage + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/memory-storage diff --git a/packages/memory-storage/package.json b/packages/memory-storage/package.json index 3006bb664739..b4c71d1633ea 100644 --- a/packages/memory-storage/package.json +++ b/packages/memory-storage/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/memory-storage", - "version": "3.10.5", + "version": "3.11.0", "description": "A simple in-memory storage implementation of the Apify API", "engines": { "node": ">= 16" @@ -49,7 +49,7 @@ }, "dependencies": { "@apify/log": "^2.4.0", - "@crawlee/types": "3.10.5", + "@crawlee/types": "^3.11.0", "@sapphire/async-queue": "^1.5.0", "@sapphire/shapeshift": "^3.0.0", "content-type": "^1.0.4", diff --git a/packages/playwright-crawler/CHANGELOG.md b/packages/playwright-crawler/CHANGELOG.md index 516370482c9d..70589f6a79a0 100644 --- a/packages/playwright-crawler/CHANGELOG.md +++ b/packages/playwright-crawler/CHANGELOG.md @@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* add `iframe` expansion to `parseWithCheerio` in browsers ([#2542](https://github.com/apify/crawlee/issues/2542)) ([328d085](https://github.com/apify/crawlee/commit/328d08598807782b3712bd543e394fe9a000a85d)), closes [#2507](https://github.com/apify/crawlee/issues/2507) +* add `ignoreIframes` opt-out from the Cheerio iframe expansion ([#2562](https://github.com/apify/crawlee/issues/2562)) ([474a8dc](https://github.com/apify/crawlee/commit/474a8dc06a567cde0651d385fdac9c350ddf4508)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) diff --git a/packages/playwright-crawler/package.json b/packages/playwright-crawler/package.json index e406bbd143bd..01d84f339a2f 100644 --- a/packages/playwright-crawler/package.json +++ b/packages/playwright-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/playwright", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -56,11 +56,11 @@ "@apify/datastructures": "^2.0.0", "@apify/log": "^2.4.0", "@apify/timeout": "^0.3.1", - "@crawlee/browser": "3.10.5", - "@crawlee/browser-pool": "3.10.5", - "@crawlee/core": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/browser": "^3.11.0", + "@crawlee/browser-pool": "^3.11.0", + "@crawlee/core": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "cheerio": "^1.0.0-rc.12", "idcac-playwright": "^0.1.2", "jquery": "^3.6.0", diff --git a/packages/puppeteer-crawler/CHANGELOG.md b/packages/puppeteer-crawler/CHANGELOG.md index aa39f41a831b..00880d88630e 100644 --- a/packages/puppeteer-crawler/CHANGELOG.md +++ b/packages/puppeteer-crawler/CHANGELOG.md @@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* add `iframe` expansion to `parseWithCheerio` in browsers ([#2542](https://github.com/apify/crawlee/issues/2542)) ([328d085](https://github.com/apify/crawlee/commit/328d08598807782b3712bd543e394fe9a000a85d)), closes [#2507](https://github.com/apify/crawlee/issues/2507) +* add `ignoreIframes` opt-out from the Cheerio iframe expansion ([#2562](https://github.com/apify/crawlee/issues/2562)) ([474a8dc](https://github.com/apify/crawlee/commit/474a8dc06a567cde0651d385fdac9c350ddf4508)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/puppeteer diff --git a/packages/puppeteer-crawler/package.json b/packages/puppeteer-crawler/package.json index e1ba78426d5e..1087f7ca5392 100644 --- a/packages/puppeteer-crawler/package.json +++ b/packages/puppeteer-crawler/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/puppeteer", - "version": "3.10.5", + "version": "3.11.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -55,10 +55,10 @@ "dependencies": { "@apify/datastructures": "^2.0.0", "@apify/log": "^2.4.0", - "@crawlee/browser": "3.10.5", - "@crawlee/browser-pool": "3.10.5", - "@crawlee/types": "3.10.5", - "@crawlee/utils": "3.10.5", + "@crawlee/browser": "^3.11.0", + "@crawlee/browser-pool": "^3.11.0", + "@crawlee/types": "^3.11.0", + "@crawlee/utils": "^3.11.0", "cheerio": "^1.0.0-rc.12", "devtools-protocol": "*", "idcac-playwright": "^0.1.2", diff --git a/packages/templates/CHANGELOG.md b/packages/templates/CHANGELOG.md index 00040283c7bb..1fb64e18a38a 100644 --- a/packages/templates/CHANGELOG.md +++ b/packages/templates/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/templates + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/templates diff --git a/packages/templates/package.json b/packages/templates/package.json index 5999d6b6af5a..2422eac85de9 100644 --- a/packages/templates/package.json +++ b/packages/templates/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/templates", - "version": "3.10.5", + "version": "3.11.0", "description": "Templates for the crawlee projects", "engines": { "node": ">=16.0.0" diff --git a/packages/types/CHANGELOG.md b/packages/types/CHANGELOG.md index 54e3d71dc324..d85eb8c0500b 100644 --- a/packages/types/CHANGELOG.md +++ b/packages/types/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + +**Note:** Version bump only for package @crawlee/types + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/types diff --git a/packages/types/package.json b/packages/types/package.json index 27b2ad433672..49fa936b37cd 100644 --- a/packages/types/package.json +++ b/packages/types/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/types", - "version": "3.10.5", + "version": "3.11.0", "description": "Shared types for the crawlee projects", "engines": { "node": ">=16.0.0" diff --git a/packages/utils/CHANGELOG.md b/packages/utils/CHANGELOG.md index a4e6a6488001..2a5638c2445a 100644 --- a/packages/utils/CHANGELOG.md +++ b/packages/utils/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +# [3.11.0](https://github.com/apify/crawlee/compare/v3.10.5...v3.11.0) (2024-07-09) + + +### Features + +* Sitemap-based request list implementation ([#2498](https://github.com/apify/crawlee/issues/2498)) ([7bf8f0b](https://github.com/apify/crawlee/commit/7bf8f0bcd4cc81e02c7cc60e82dfe7a0cdd80938)) + + + + + ## [3.10.5](https://github.com/apify/crawlee/compare/v3.10.4...v3.10.5) (2024-06-12) **Note:** Version bump only for package @crawlee/utils diff --git a/packages/utils/package.json b/packages/utils/package.json index f672ebdbabfa..00439599e071 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -1,6 +1,6 @@ { "name": "@crawlee/utils", - "version": "3.10.5", + "version": "3.11.0", "description": "A set of shared utilities that can be used by crawlers", "engines": { "node": ">=16.0.0" @@ -49,7 +49,7 @@ "dependencies": { "@apify/log": "^2.4.0", "@apify/ps-tree": "^1.2.0", - "@crawlee/types": "3.10.5", + "@crawlee/types": "^3.11.0", "@types/sax": "^1.2.7", "cheerio": "^1.0.0-rc.12", "file-type": "^19.0.0", diff --git a/yarn.lock b/yarn.lock index 34501016341b..cbcc9eb243fd 100644 --- a/yarn.lock +++ b/yarn.lock @@ -477,16 +477,16 @@ __metadata: languageName: node linkType: hard -"@crawlee/basic@npm:3.10.5, @crawlee/basic@workspace:packages/basic-crawler": +"@crawlee/basic@npm:^3.11.0, @crawlee/basic@workspace:packages/basic-crawler": version: 0.0.0-use.local resolution: "@crawlee/basic@workspace:packages/basic-crawler" dependencies: "@apify/log": "npm:^2.4.0" "@apify/timeout": "npm:^0.3.0" "@apify/utilities": "npm:^2.7.10" - "@crawlee/core": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/core": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" csv-stringify: "npm:^6.2.0" fs-extra: "npm:^11.0.0" got-scraping: "npm:^4.0.0" @@ -497,14 +497,14 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/browser-pool@npm:3.10.5, @crawlee/browser-pool@workspace:packages/browser-pool": +"@crawlee/browser-pool@npm:^3.11.0, @crawlee/browser-pool@workspace:packages/browser-pool": version: 0.0.0-use.local resolution: "@crawlee/browser-pool@workspace:packages/browser-pool" dependencies: "@apify/log": "npm:^2.4.0" "@apify/timeout": "npm:^0.3.0" - "@crawlee/core": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" + "@crawlee/core": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" fingerprint-generator: "npm:^2.0.6" fingerprint-injector: "npm:^2.0.5" lodash.merge: "npm:^4.6.2" @@ -526,15 +526,15 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/browser@npm:3.10.5, @crawlee/browser@workspace:packages/browser-crawler": +"@crawlee/browser@npm:^3.11.0, @crawlee/browser@workspace:packages/browser-crawler": version: 0.0.0-use.local resolution: "@crawlee/browser@workspace:packages/browser-crawler" dependencies: "@apify/timeout": "npm:^0.3.0" - "@crawlee/basic": "npm:3.10.5" - "@crawlee/browser-pool": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/basic": "npm:^3.11.0" + "@crawlee/browser-pool": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" ow: "npm:^0.28.1" tslib: "npm:^2.4.0" type-fest: "npm:^4.0.0" @@ -549,24 +549,24 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/cheerio@npm:3.10.5, @crawlee/cheerio@workspace:packages/cheerio-crawler": +"@crawlee/cheerio@npm:^3.11.0, @crawlee/cheerio@workspace:packages/cheerio-crawler": version: 0.0.0-use.local resolution: "@crawlee/cheerio@workspace:packages/cheerio-crawler" dependencies: - "@crawlee/http": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/http": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" cheerio: "npm:^1.0.0-rc.12" htmlparser2: "npm:^9.0.0" tslib: "npm:^2.4.0" languageName: unknown linkType: soft -"@crawlee/cli@npm:3.10.5, @crawlee/cli@workspace:packages/cli": +"@crawlee/cli@npm:^3.11.0, @crawlee/cli@workspace:packages/cli": version: 0.0.0-use.local resolution: "@crawlee/cli@workspace:packages/cli" dependencies: - "@crawlee/templates": "npm:3.10.5" + "@crawlee/templates": "npm:^3.11.0" ansi-colors: "npm:^4.1.3" fs-extra: "npm:^11.0.0" inquirer: "npm:^8.2.4" @@ -578,7 +578,7 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/core@npm:3.10.5, @crawlee/core@npm:^3.9.0, @crawlee/core@workspace:packages/core": +"@crawlee/core@npm:^3.11.0, @crawlee/core@npm:^3.9.0, @crawlee/core@workspace:packages/core": version: 0.0.0-use.local resolution: "@crawlee/core@workspace:packages/core" dependencies: @@ -588,9 +588,9 @@ __metadata: "@apify/pseudo_url": "npm:^2.0.30" "@apify/timeout": "npm:^0.3.0" "@apify/utilities": "npm:^2.7.10" - "@crawlee/memory-storage": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/memory-storage": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" "@sapphire/async-queue": "npm:^1.5.1" "@types/tough-cookie": "npm:^4.0.2" "@vladfrangu/async_event_emitter": "npm:^2.2.2" @@ -609,15 +609,15 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/http@npm:3.10.5, @crawlee/http@workspace:packages/http-crawler": +"@crawlee/http@npm:^3.11.0, @crawlee/http@workspace:packages/http-crawler": version: 0.0.0-use.local resolution: "@crawlee/http@workspace:packages/http-crawler" dependencies: "@apify/timeout": "npm:^0.3.0" "@apify/utilities": "npm:^2.7.10" - "@crawlee/basic": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/basic": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" "@types/content-type": "npm:^1.1.5" cheerio: "npm:^1.0.0-rc.12" content-type: "npm:^1.0.4" @@ -630,15 +630,15 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/jsdom@npm:3.10.5, @crawlee/jsdom@workspace:packages/jsdom-crawler": +"@crawlee/jsdom@npm:^3.11.0, @crawlee/jsdom@workspace:packages/jsdom-crawler": version: 0.0.0-use.local resolution: "@crawlee/jsdom@workspace:packages/jsdom-crawler" dependencies: "@apify/timeout": "npm:^0.3.0" "@apify/utilities": "npm:^2.7.10" - "@crawlee/http": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/http": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" "@types/jsdom": "npm:^21.0.0" cheerio: "npm:^1.0.0-rc.12" jsdom: "npm:^24.0.0" @@ -647,26 +647,26 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/linkedom@npm:3.10.5, @crawlee/linkedom@workspace:packages/linkedom-crawler": +"@crawlee/linkedom@npm:^3.11.0, @crawlee/linkedom@workspace:packages/linkedom-crawler": version: 0.0.0-use.local resolution: "@crawlee/linkedom@workspace:packages/linkedom-crawler" dependencies: "@apify/timeout": "npm:^0.3.0" "@apify/utilities": "npm:^2.7.10" - "@crawlee/http": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" + "@crawlee/http": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" linkedom: "npm:^0.18.0" ow: "npm:^0.28.2" tslib: "npm:^2.4.0" languageName: unknown linkType: soft -"@crawlee/memory-storage@npm:3.10.5, @crawlee/memory-storage@workspace:packages/memory-storage": +"@crawlee/memory-storage@npm:^3.11.0, @crawlee/memory-storage@workspace:packages/memory-storage": version: 0.0.0-use.local resolution: "@crawlee/memory-storage@workspace:packages/memory-storage" dependencies: "@apify/log": "npm:^2.4.0" - "@crawlee/types": "npm:3.10.5" + "@crawlee/types": "npm:^3.11.0" "@sapphire/async-queue": "npm:^1.5.0" "@sapphire/shapeshift": "npm:^3.0.0" content-type: "npm:^1.0.4" @@ -678,18 +678,18 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/playwright@npm:3.10.5, @crawlee/playwright@workspace:packages/playwright-crawler": +"@crawlee/playwright@npm:^3.11.0, @crawlee/playwright@workspace:packages/playwright-crawler": version: 0.0.0-use.local resolution: "@crawlee/playwright@workspace:packages/playwright-crawler" dependencies: "@apify/datastructures": "npm:^2.0.0" "@apify/log": "npm:^2.4.0" "@apify/timeout": "npm:^0.3.1" - "@crawlee/browser": "npm:3.10.5" - "@crawlee/browser-pool": "npm:3.10.5" - "@crawlee/core": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/browser": "npm:^3.11.0" + "@crawlee/browser-pool": "npm:^3.11.0" + "@crawlee/core": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" cheerio: "npm:^1.0.0-rc.12" idcac-playwright: "npm:^0.1.2" jquery: "npm:^3.6.0" @@ -707,16 +707,16 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/puppeteer@npm:3.10.5, @crawlee/puppeteer@workspace:packages/puppeteer-crawler": +"@crawlee/puppeteer@npm:^3.11.0, @crawlee/puppeteer@workspace:packages/puppeteer-crawler": version: 0.0.0-use.local resolution: "@crawlee/puppeteer@workspace:packages/puppeteer-crawler" dependencies: "@apify/datastructures": "npm:^2.0.0" "@apify/log": "npm:^2.4.0" - "@crawlee/browser": "npm:3.10.5" - "@crawlee/browser-pool": "npm:3.10.5" - "@crawlee/types": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/browser": "npm:^3.11.0" + "@crawlee/browser-pool": "npm:^3.11.0" + "@crawlee/types": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" cheerio: "npm:^1.0.0-rc.12" devtools-protocol: "npm:*" idcac-playwright: "npm:^0.1.2" @@ -797,7 +797,7 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/templates@npm:3.10.5, @crawlee/templates@workspace:packages/templates": +"@crawlee/templates@npm:^3.11.0, @crawlee/templates@workspace:packages/templates": version: 0.0.0-use.local resolution: "@crawlee/templates@workspace:packages/templates" dependencies: @@ -809,7 +809,7 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/types@npm:3.10.5, @crawlee/types@npm:^3.3.0, @crawlee/types@npm:^3.9.0, @crawlee/types@workspace:packages/types": +"@crawlee/types@npm:^3.11.0, @crawlee/types@npm:^3.3.0, @crawlee/types@npm:^3.9.0, @crawlee/types@workspace:packages/types": version: 0.0.0-use.local resolution: "@crawlee/types@workspace:packages/types" dependencies: @@ -817,13 +817,13 @@ __metadata: languageName: unknown linkType: soft -"@crawlee/utils@npm:3.10.5, @crawlee/utils@npm:^3.9.0, @crawlee/utils@workspace:packages/utils": +"@crawlee/utils@npm:^3.11.0, @crawlee/utils@npm:^3.9.0, @crawlee/utils@workspace:packages/utils": version: 0.0.0-use.local resolution: "@crawlee/utils@workspace:packages/utils" dependencies: "@apify/log": "npm:^2.4.0" "@apify/ps-tree": "npm:^1.2.0" - "@crawlee/types": "npm:3.10.5" + "@crawlee/types": "npm:^3.11.0" "@types/sax": "npm:^1.2.7" "@types/whatwg-mimetype": "npm:^3.0.2" cheerio: "npm:^1.0.0-rc.12" @@ -4155,18 +4155,18 @@ __metadata: version: 0.0.0-use.local resolution: "crawlee@workspace:packages/crawlee" dependencies: - "@crawlee/basic": "npm:3.10.5" - "@crawlee/browser": "npm:3.10.5" - "@crawlee/browser-pool": "npm:3.10.5" - "@crawlee/cheerio": "npm:3.10.5" - "@crawlee/cli": "npm:3.10.5" - "@crawlee/core": "npm:3.10.5" - "@crawlee/http": "npm:3.10.5" - "@crawlee/jsdom": "npm:3.10.5" - "@crawlee/linkedom": "npm:3.10.5" - "@crawlee/playwright": "npm:3.10.5" - "@crawlee/puppeteer": "npm:3.10.5" - "@crawlee/utils": "npm:3.10.5" + "@crawlee/basic": "npm:^3.11.0" + "@crawlee/browser": "npm:^3.11.0" + "@crawlee/browser-pool": "npm:^3.11.0" + "@crawlee/cheerio": "npm:^3.11.0" + "@crawlee/cli": "npm:^3.11.0" + "@crawlee/core": "npm:^3.11.0" + "@crawlee/http": "npm:^3.11.0" + "@crawlee/jsdom": "npm:^3.11.0" + "@crawlee/linkedom": "npm:^3.11.0" + "@crawlee/playwright": "npm:^3.11.0" + "@crawlee/puppeteer": "npm:^3.11.0" + "@crawlee/utils": "npm:^3.11.0" import-local: "npm:^3.1.0" tslib: "npm:^2.4.0" peerDependencies: