diff --git a/package.json b/package.json index 238b1ac..c329735 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "ci": "npm run build && npm run check-format && npm run check-exports && npm run lint && npm run test", "lint": "tsc", "test": "vitest run", + "test:watch": "vitest", "format": "prettier --write ./src", "check-format": "prettier --check ./src", "check-exports": "attw --pack .", @@ -53,6 +54,8 @@ "@arethetypeswrong/cli": "^0.16.4", "@changesets/changelog-github": "^0.5.0", "@changesets/cli": "^2.27.9", + "domhandler": "^5.0.3", + "jsdom": "^25.0.1", "prettier": "^3.3.3", "tsup": "^8.3.5", "typescript": "^5.6.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 488d38d..c80ccf8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -33,6 +33,12 @@ importers: '@changesets/cli': specifier: ^2.27.9 version: 2.27.9 + domhandler: + specifier: ^5.0.3 + version: 5.0.3 + jsdom: + specifier: ^25.0.1 + version: 25.0.1 prettier: specifier: ^3.3.3 version: 3.3.3 @@ -47,7 +53,7 @@ importers: version: 5.4.10 vitest: specifier: ^2.1.3 - version: 2.1.3 + version: 2.1.3(jsdom@25.0.1) packages: @@ -593,6 +599,10 @@ packages: '@vitest/utils@2.1.3': resolution: {integrity: sha512-xpiVfDSg1RrYT0tX6czgerkpcKFmFOF/gCr30+Mve5V2kewCy4Prn1/NDMSRwaSmT7PRaOF83wu+bEtsY1wrvA==} + agent-base@7.1.1: + resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==} + engines: {node: '>= 14'} + ansi-colors@4.1.3: resolution: {integrity: sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==} engines: {node: '>=6'} @@ -631,6 +641,9 @@ packages: resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} engines: {node: '>=12'} + asynckit@0.4.0: + resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} @@ -718,6 +731,10 @@ packages: color-name@1.1.4: resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + combined-stream@1.0.8: + resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} + engines: {node: '>= 0.8'} + commander@10.0.1: resolution: {integrity: sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==} engines: {node: '>=14'} @@ -744,6 +761,14 @@ packages: resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==} engines: {node: '>= 6'} + cssstyle@4.1.0: + resolution: {integrity: sha512-h66W1URKpBS5YMI/V8PyXvTMFT8SupJ1IzoIV8IeBC/ji8WVmrO8dGlTi+2dh6whmdk6BiKJLD/ZBkhWbcg6nA==} + engines: {node: '>=18'} + + data-urls@5.0.0: + resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} + engines: {node: '>=18'} + dataloader@1.4.0: resolution: {integrity: sha512-68s5jYdlvasItOJnCuI2Q9s4q98g0pCyL3HrcKJu8KNugUl8ahgmZYg38ysLTgQjjXX3H8CJLkAvWrclWfcalw==} @@ -756,10 +781,17 @@ packages: supports-color: optional: true + decimal.js@10.4.3: + resolution: {integrity: sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==} + deep-eql@5.0.2: resolution: {integrity: sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==} engines: {node: '>=6'} + delayed-stream@1.0.0: + resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} + engines: {node: '>=0.4.0'} + detect-indent@6.1.0: resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==} engines: {node: '>=8'} @@ -878,6 +910,10 @@ packages: resolution: {integrity: sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==} engines: {node: '>=14'} + form-data@4.0.1: + resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==} + engines: {node: '>= 6'} + fs-extra@7.0.1: resolution: {integrity: sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw==} engines: {node: '>=6 <7 || >=8'} @@ -917,9 +953,21 @@ packages: highlight.js@10.7.3: resolution: {integrity: sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==} + html-encoding-sniffer@4.0.0: + resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} + engines: {node: '>=18'} + htmlparser2@9.1.0: resolution: {integrity: sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==} + http-proxy-agent@7.0.2: + resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} + engines: {node: '>= 14'} + + https-proxy-agent@7.0.5: + resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==} + engines: {node: '>= 14'} + human-id@1.0.2: resolution: {integrity: sha512-UNopramDEhHJD+VR+ehk8rOslwSfByxPIZyJRfV739NDhN5LF1fa1MqnzKm2lGTQRjNrjK19Q5fhkgIfjlVUKw==} @@ -951,6 +999,9 @@ packages: resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} engines: {node: '>=0.12.0'} + is-potential-custom-element-name@1.0.1: + resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==} + is-subdir@1.2.0: resolution: {integrity: sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw==} engines: {node: '>=4'} @@ -976,6 +1027,15 @@ packages: resolution: {integrity: sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==} hasBin: true + jsdom@25.0.1: + resolution: {integrity: sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==} + engines: {node: '>=18'} + peerDependencies: + canvas: ^2.11.2 + peerDependenciesMeta: + canvas: + optional: true + jsonfile@4.0.0: resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==} @@ -1031,6 +1091,14 @@ packages: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} + mime-db@1.52.0: + resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} + engines: {node: '>= 0.6'} + + mime-types@2.1.35: + resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} + engines: {node: '>= 0.6'} + minimatch@9.0.5: resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} engines: {node: '>=16 || 14 >=14.17'} @@ -1070,6 +1138,9 @@ packages: nth-check@2.1.1: resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + nwsapi@2.2.13: + resolution: {integrity: sha512-cTGB9ptp9dY9A5VbMSe7fQBcl/tt22Vcqdq8+eN93rblOuE0aCFu4aZ2vMwct/2t+lFnosm8RkQW1I0Omb1UtQ==} + object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} @@ -1243,12 +1314,19 @@ packages: engines: {node: '>=18.0.0', npm: '>=8.0.0'} hasBin: true + rrweb-cssom@0.7.1: + resolution: {integrity: sha512-TrEMa7JGdVm0UThDJSx7ddw5nVm3UJS9o9CCIZ72B1vSyEZoziDqBYP3XIoi/12lKrJR8rE3jeFHMok2F/Mnsg==} + run-parallel@1.2.0: resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==} safer-buffer@2.1.2: resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + saxes@6.0.0: + resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==} + engines: {node: '>=v12.22.7'} + semver@7.6.3: resolution: {integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==} engines: {node: '>=10'} @@ -1341,6 +1419,9 @@ packages: resolution: {integrity: sha512-2rn0BZ+/f7puLOHZm1HOJfwBggfaHXUpPUSSG/SWM4TWp5KCfmNYwnC3hruy2rZlMnmWZ+QAGpZfchu3f3695A==} engines: {node: '>=14.18'} + symbol-tree@3.2.4: + resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} + term-size@2.2.1: resolution: {integrity: sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg==} engines: {node: '>=8'} @@ -1377,6 +1458,13 @@ packages: resolution: {integrity: sha512-n1cw8k1k0x4pgA2+9XrOkFydTerNcJ1zWCO5Nn9scWHTD+5tp8dghT2x1uduQePZTZgd3Tupf+x9BxJjeJi77Q==} engines: {node: '>=14.0.0'} + tldts-core@6.1.57: + resolution: {integrity: sha512-lXnRhuQpx3zU9EONF9F7HfcRLvN1uRYUBIiKL+C/gehC/77XTU+Jye6ui86GA3rU6FjlJ0triD1Tkjt2F/2lEg==} + + tldts@6.1.57: + resolution: {integrity: sha512-Oy7yDXK8meJl8vPMOldzA+MtueAJ5BrH4l4HXwZuj2AtfoQbLjmTJmjNWPUcAo+E/ibHn7QlqMS0BOcXJFJyHQ==} + hasBin: true + tmp@0.0.33: resolution: {integrity: sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==} engines: {node: '>=0.6.0'} @@ -1388,12 +1476,20 @@ packages: toposort@2.0.2: resolution: {integrity: sha512-0a5EOkAUp8D4moMi2W8ZF8jcga7BgZd91O/yabJCFY8az+XSzeGyTKs0Aoo897iV1Nj6guFq8orWDS96z91oGg==} + tough-cookie@5.0.0: + resolution: {integrity: sha512-FRKsF7cz96xIIeMZ82ehjC3xW2E+O2+v11udrDYewUbszngYhsGa8z6YUMMzO9QJZzzyd0nGGXnML/TReX6W8Q==} + engines: {node: '>=16'} + tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} tr46@1.0.1: resolution: {integrity: sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA==} + tr46@5.0.0: + resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==} + engines: {node: '>=18'} + tree-kill@1.2.2: resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==} hasBin: true @@ -1511,12 +1607,20 @@ packages: jsdom: optional: true + w3c-xmlserializer@5.0.0: + resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==} + engines: {node: '>=18'} + webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} webidl-conversions@4.0.2: resolution: {integrity: sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==} + webidl-conversions@7.0.0: + resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==} + engines: {node: '>=12'} + whatwg-encoding@3.1.1: resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} engines: {node: '>=18'} @@ -1525,6 +1629,10 @@ packages: resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} engines: {node: '>=18'} + whatwg-url@14.0.0: + resolution: {integrity: sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==} + engines: {node: '>=18'} + whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} @@ -1553,6 +1661,25 @@ packages: resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==} engines: {node: '>=12'} + ws@8.18.0: + resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==} + engines: {node: '>=10.0.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: '>=5.0.2' + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + + xml-name-validator@5.0.0: + resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==} + engines: {node: '>=18'} + + xmlchars@2.2.0: + resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + y18n@5.0.8: resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} engines: {node: '>=10'} @@ -2068,6 +2195,12 @@ snapshots: loupe: 3.1.2 tinyrainbow: 1.2.0 + agent-base@7.1.1: + dependencies: + debug: 4.3.7 + transitivePeerDependencies: + - supports-color + ansi-colors@4.1.3: {} ansi-escapes@7.0.0: @@ -2094,6 +2227,8 @@ snapshots: assertion-error@2.0.1: {} + asynckit@0.4.0: {} + balanced-match@1.0.2: {} better-path-resolve@1.0.0: @@ -2196,6 +2331,10 @@ snapshots: color-name@1.1.4: {} + combined-stream@1.0.8: + dependencies: + delayed-stream: 1.0.0 + commander@10.0.1: {} commander@4.1.1: {} @@ -2224,14 +2363,27 @@ snapshots: css-what@6.1.0: {} + cssstyle@4.1.0: + dependencies: + rrweb-cssom: 0.7.1 + + data-urls@5.0.0: + dependencies: + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + dataloader@1.4.0: {} debug@4.3.7: dependencies: ms: 2.1.3 + decimal.js@10.4.3: {} + deep-eql@5.0.2: {} + delayed-stream@1.0.0: {} + detect-indent@6.1.0: {} dir-glob@3.0.1: @@ -2389,6 +2541,12 @@ snapshots: cross-spawn: 7.0.3 signal-exit: 4.1.0 + form-data@4.0.1: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + mime-types: 2.1.35 + fs-extra@7.0.1: dependencies: graceful-fs: 4.2.11 @@ -2434,6 +2592,10 @@ snapshots: highlight.js@10.7.3: {} + html-encoding-sniffer@4.0.0: + dependencies: + whatwg-encoding: 3.1.1 + htmlparser2@9.1.0: dependencies: domelementtype: 2.3.0 @@ -2441,6 +2603,20 @@ snapshots: domutils: 3.1.0 entities: 4.5.0 + http-proxy-agent@7.0.2: + dependencies: + agent-base: 7.1.1 + debug: 4.3.7 + transitivePeerDependencies: + - supports-color + + https-proxy-agent@7.0.5: + dependencies: + agent-base: 7.1.1 + debug: 4.3.7 + transitivePeerDependencies: + - supports-color + human-id@1.0.2: {} iconv-lite@0.4.24: @@ -2463,6 +2639,8 @@ snapshots: is-number@7.0.0: {} + is-potential-custom-element-name@1.0.1: {} + is-subdir@1.2.0: dependencies: better-path-resolve: 1.0.0 @@ -2492,6 +2670,34 @@ snapshots: argparse: 1.0.10 esprima: 4.0.1 + jsdom@25.0.1: + dependencies: + cssstyle: 4.1.0 + data-urls: 5.0.0 + decimal.js: 10.4.3 + form-data: 4.0.1 + html-encoding-sniffer: 4.0.0 + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.5 + is-potential-custom-element-name: 1.0.1 + nwsapi: 2.2.13 + parse5: 7.2.0 + rrweb-cssom: 0.7.1 + saxes: 6.0.0 + symbol-tree: 3.2.4 + tough-cookie: 5.0.0 + w3c-xmlserializer: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-encoding: 3.1.1 + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + ws: 8.18.0 + xml-name-validator: 5.0.0 + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + jsonfile@4.0.0: optionalDependencies: graceful-fs: 4.2.11 @@ -2542,6 +2748,12 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 + mime-db@1.52.0: {} + + mime-types@2.1.35: + dependencies: + mime-db: 1.52.0 + minimatch@9.0.5: dependencies: brace-expansion: 2.0.1 @@ -2575,6 +2787,8 @@ snapshots: dependencies: boolbase: 1.0.0 + nwsapi@2.2.13: {} + object-assign@4.1.1: {} os-tmpdir@1.0.2: {} @@ -2712,12 +2926,18 @@ snapshots: '@rollup/rollup-win32-x64-msvc': 4.24.0 fsevents: 2.3.3 + rrweb-cssom@0.7.1: {} + run-parallel@1.2.0: dependencies: queue-microtask: 1.2.3 safer-buffer@2.1.2: {} + saxes@6.0.0: + dependencies: + xmlchars: 2.2.0 + semver@7.6.3: {} shebang-command@1.2.0: @@ -2802,6 +3022,8 @@ snapshots: has-flag: 4.0.0 supports-color: 7.2.0 + symbol-tree@3.2.4: {} + term-size@2.2.1: {} thenify-all@1.6.0: @@ -2829,6 +3051,12 @@ snapshots: tinyspy@3.0.2: {} + tldts-core@6.1.57: {} + + tldts@6.1.57: + dependencies: + tldts-core: 6.1.57 + tmp@0.0.33: dependencies: os-tmpdir: 1.0.2 @@ -2839,12 +3067,20 @@ snapshots: toposort@2.0.2: {} + tough-cookie@5.0.0: + dependencies: + tldts: 6.1.57 + tr46@0.0.3: {} tr46@1.0.1: dependencies: punycode: 2.3.1 + tr46@5.0.0: + dependencies: + punycode: 2.3.1 + tree-kill@1.2.2: {} ts-interface-checker@0.1.13: {} @@ -2915,7 +3151,7 @@ snapshots: optionalDependencies: fsevents: 2.3.3 - vitest@2.1.3: + vitest@2.1.3(jsdom@25.0.1): dependencies: '@vitest/expect': 2.1.3 '@vitest/mocker': 2.1.3(@vitest/spy@2.1.3)(vite@5.4.10) @@ -2936,6 +3172,8 @@ snapshots: vite: 5.4.10 vite-node: 2.1.3 why-is-node-running: 2.3.0 + optionalDependencies: + jsdom: 25.0.1 transitivePeerDependencies: - less - lightningcss @@ -2947,16 +3185,27 @@ snapshots: - supports-color - terser + w3c-xmlserializer@5.0.0: + dependencies: + xml-name-validator: 5.0.0 + webidl-conversions@3.0.1: {} webidl-conversions@4.0.2: {} + webidl-conversions@7.0.0: {} + whatwg-encoding@3.1.1: dependencies: iconv-lite: 0.6.3 whatwg-mimetype@4.0.0: {} + whatwg-url@14.0.0: + dependencies: + tr46: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-url@5.0.0: dependencies: tr46: 0.0.3 @@ -2993,6 +3242,12 @@ snapshots: string-width: 5.1.2 strip-ansi: 7.1.0 + ws@8.18.0: {} + + xml-name-validator@5.0.0: {} + + xmlchars@2.2.0: {} + y18n@5.0.8: {} yallist@2.1.2: {} diff --git a/src/createScraper.effect.test.ts b/src/createScraper.effect.test.ts deleted file mode 100644 index 6fb0c22..0000000 --- a/src/createScraper.effect.test.ts +++ /dev/null @@ -1,188 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import * as S from 'effect/Schema'; -import { createScraper } from '@/createScraper.js'; -import { EffectValidator } from '@/validators/effect.js'; -import { type SchemaFieldDefinitions } from '@/types.js'; - -const Title = S.optional(S.String).pipe( - S.withDecodingDefault(() => 'No title'), -); - -const schema = S.Struct({ - title: Title, - description: S.String, - keywords: S.Array(S.String), - views: S.Number, -}); - -const schemaWithNested = S.Struct({ - title: Title, - image: S.Struct({ - url: S.String, - width: S.Number, - height: S.Number, - }), -}); - -type FieldDefinitions = SchemaFieldDefinitions>; -type NestedFieldDefinitions = SchemaFieldDefinitions< - S.Schema.Type ->; - -const fields: FieldDefinitions = { - title: { - selector: 'title', - }, - description: { - selector: 'meta[name="description"]', - attribute: 'content', - defaultValue: 'No description', - }, - keywords: { - selector: 'meta[name="keywords"]', - attribute: 'content', - transform: (value) => value.split(','), - defaultValue: [], - }, - views: { - selector: 'meta[name="views"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - defaultValue: 0, - }, -}; - -const nestedFields: NestedFieldDefinitions = { - title: { - selector: 'title', - }, - image: { - fields: { - url: { - selector: 'meta[property="og:image"]', - attribute: 'content', - }, - width: { - selector: 'meta[property="og:image:width"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - height: { - selector: 'meta[property="og:image:height"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - }, - }, -}; - -const html = ` - - - - - - - Example Title - - - -`; - -const htmlWithNested = ` - - - - Example Title - - - - - - -`; - -describe('xscrape with Effect/Schema', () => { - test('extracts data from HTML', () => { - const validator = new EffectValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(html); - - expect(data).toEqual({ - title: 'Example Title', - description: 'An example description.', - keywords: ['typescript', 'html', 'parsing'], - views: 1234, - }); - }); - - test('handles missing data', () => { - const validator = new EffectValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(''); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: [], - views: 0, - }); - }); - - test('handles multiple values', () => { - const validator = new EffectValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper( - '', - ); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: ['typescript', 'html', 'parsing'], - views: 0, - }); - }); - - test('handles invalid data', () => { - const validator = new EffectValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - try { - scraper( - '', - ); - } catch (error) { - expect(error).toBeInstanceOf(Error); - } - }); - - test('extracts nested data from HTML', () => { - const validator = new EffectValidator(schemaWithNested); - const scraper = createScraper({ - fields: nestedFields, - validator, - }); - const data = scraper(htmlWithNested); - - expect(data).toEqual({ - title: 'Example Title', - image: { - url: 'https://example.se/images/c12ffe73-3227-4a4a-b8ad-a3003cdf1d70?h=708&tight=false&w=1372', - width: 1372, - height: 708, - }, - }); - }); -}); diff --git a/src/createScraper.joi.test.ts b/src/createScraper.joi.test.ts deleted file mode 100644 index da84bcb..0000000 --- a/src/createScraper.joi.test.ts +++ /dev/null @@ -1,200 +0,0 @@ -import { createScraper } from '@/createScraper.js'; -import { describe, test, expect } from 'vitest'; -import Joi from 'joi'; -import { JoiValidator } from '@/validators/joi.js'; -import { type SchemaFieldDefinitions } from '@/types.js'; - -const schema = Joi.object({ - title: Joi.string().default('No title'), - description: Joi.string().required(), - keywords: Joi.array().items(Joi.string()).default([]), - views: Joi.number().default(0), -}); - -const schemaWithNested = Joi.object({ - title: Joi.string().default('No title nested'), - image: Joi.object({ - url: Joi.string().required(), - width: Joi.number().required(), - height: Joi.number().required(), - }) - .default({ url: '', width: 0, height: 0 }) - .optional(), -}); - -type SchemaType = { - title: string; - description: string; - keywords: string[]; - views: number; -}; - -type NestedSchemaType = { - title: string; - image?: { - url: string; - width: number; - height: number; - }; -}; - -type FieldDefinitions = SchemaFieldDefinitions; -type NestedFieldDefinitions = SchemaFieldDefinitions; - -const fields: FieldDefinitions = { - title: { - selector: 'title', - }, - description: { - selector: 'meta[name="description"]', - attribute: 'content', - defaultValue: 'No description', - }, - keywords: { - selector: 'meta[name="keywords"]', - attribute: 'content', - transform: (value) => value.split(','), - defaultValue: [], - }, - views: { - selector: 'meta[name="views"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - defaultValue: 0, - }, -}; - -const nestedFields: NestedFieldDefinitions = { - title: { - selector: 'title', - }, - image: { - fields: { - url: { - selector: 'meta[property="og:image"]', - attribute: 'content', - }, - width: { - selector: 'meta[property="og:image:width"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - height: { - selector: 'meta[property="og:image:height"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - }, - }, -}; - -const html = ` - - - - - - - Example Title - - - -`; - -const htmlWithNested = ` - - - - Example Title - - - - - - -`; - -describe('xscrape with Joi', () => { - test('extracts data from HTML', () => { - const validator = new JoiValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(html); - - expect(data).toEqual({ - title: 'Example Title', - description: 'An example description.', - keywords: ['typescript', 'html', 'parsing'], - views: 1234, - }); - }); - - test('handles missing data', () => { - const validator = new JoiValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(''); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: [], - views: 0, - }); - }); - - test('handles multiple values', () => { - const validator = new JoiValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper( - '', - ); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: ['typescript', 'html', 'parsing'], - views: 0, - }); - }); - - test('handles invalid data', () => { - const validator = new JoiValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - try { - scraper( - '', - ); - } catch (error) { - expect(error).toBeInstanceOf(Error); - } - }); - - test('extracts nested data from HTML', () => { - const validator = new JoiValidator(schemaWithNested); - const scraper = createScraper({ - fields: nestedFields, - validator, - }); - const data = scraper(htmlWithNested); - - expect(data).toEqual({ - title: 'Example Title', - image: { - url: 'https://example.se/images/c12ffe73-3227-4a4a-b8ad-a3003cdf1d70?h=708&tight=false&w=1372', - width: 1372, - height: 708, - }, - }); - }); -}); diff --git a/src/createScraper.ts b/src/createScraper.ts deleted file mode 100644 index b465642..0000000 --- a/src/createScraper.ts +++ /dev/null @@ -1,61 +0,0 @@ -import * as cheerio from 'cheerio'; -import { type ScrapeConfig, type SchemaFieldDefinitions } from '@/types.js'; - -const extractData = ( - fields: SchemaFieldDefinitions, - $context: cheerio.CheerioAPI, -): Partial => { - const data: Partial = {}; - - for (const key in fields) { - const fieldDef = fields[key]; - - if ('fields' in fieldDef) { - const nestedData = extractData( - fieldDef.fields as SchemaFieldDefinitions, - $context, - ); - - data[key as keyof U] = nestedData as U[typeof key]; - } else { - const elements = $context(fieldDef.selector); - let values: string[] = []; - - elements.each((_, element) => { - const value = fieldDef.attribute - ? $context(element).attr(fieldDef.attribute) - : $context(element).text().trim(); - - if (value !== undefined) { - values.push(value); - } - }); - - if (values.length === 0 && fieldDef.defaultValue !== undefined) { - data[key as keyof U] = fieldDef.defaultValue as U[typeof key]; - } else if (fieldDef.multiple) { - data[key as keyof U] = values.map((value) => - fieldDef.transform ? fieldDef.transform(value) : value, - ) as U[typeof key]; - } else { - const value = values[0]; - data[key as keyof U] = ( - fieldDef.transform && value ? fieldDef.transform(value) : value - ) as U[typeof key]; - } - } - } - - return data; -}; - -export const createScraper = ({ - fields, - validator, -}: ScrapeConfig): ((html: cheerio.CheerioAPI | string) => T) => { - return (html: cheerio.CheerioAPI | string): T => { - const $ = typeof html === 'string' ? cheerio.load(html) : html; - const data = extractData(fields, $); - return validator.validate(data); - }; -}; diff --git a/src/createScraper.yup.test.ts b/src/createScraper.yup.test.ts deleted file mode 100644 index bd754c4..0000000 --- a/src/createScraper.yup.test.ts +++ /dev/null @@ -1,187 +0,0 @@ -import { createScraper } from '@/createScraper.js'; -import { describe, test, expect } from 'vitest'; -import * as yup from 'yup'; -import { YupValidator } from '@/validators/yup.js'; -import { type SchemaFieldDefinitions } from '@/types.js'; - -const schema = yup.object({ - title: yup.string().default('No title'), - description: yup.string().required(), - keywords: yup.array().of(yup.string()).default([]), - views: yup.number().default(0), -}); - -const schemaWithNested = yup.object({ - title: yup.string().default('No title nested'), - image: yup - .object({ - url: yup.string().required(), - width: yup.number().required(), - height: yup.number().required(), - }) - .default(() => ({ url: '', width: 0, height: 0 })) - .optional(), -}); - -type FieldDefinitions = SchemaFieldDefinitions>; -type NestedFieldDefinitions = SchemaFieldDefinitions< - yup.InferType ->; - -const fields: FieldDefinitions = { - title: { - selector: 'title', - }, - description: { - selector: 'meta[name="description"]', - attribute: 'content', - defaultValue: 'No description', - }, - keywords: { - selector: 'meta[name="keywords"]', - attribute: 'content', - transform: (value) => value.split(','), - defaultValue: [], - }, - views: { - selector: 'meta[name="views"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - defaultValue: 0, - }, -}; - -const nestedFields: NestedFieldDefinitions = { - title: { - selector: 'title', - }, - image: { - fields: { - url: { - selector: 'meta[property="og:image"]', - attribute: 'content', - }, - width: { - selector: 'meta[property="og:image:width"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - height: { - selector: 'meta[property="og:image:height"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - }, - }, -}; - -const html = ` - - - - - - - Example Title - - - -`; - -const htmlWithNested = ` - - - - Example Title - - - - - - -`; - -describe('xscrape with Yup', () => { - test('extracts data from HTML', () => { - const validator = new YupValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(html); - - expect(data).toEqual({ - title: 'Example Title', - description: 'An example description.', - keywords: ['typescript', 'html', 'parsing'], - views: 1234, - }); - }); - - test('handles missing data', () => { - const validator = new YupValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(''); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: [], - views: 0, - }); - }); - - test('handles multiple values', () => { - const validator = new YupValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper( - '', - ); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: ['typescript', 'html', 'parsing'], - views: 0, - }); - }); - - test('handles invalid data', () => { - const validator = new YupValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - try { - scraper( - '', - ); - } catch (error) { - expect(error).toBeInstanceOf(Error); - } - }); - - test('extracts nested data from HTML', () => { - const validator = new YupValidator(schemaWithNested); - const scraper = createScraper({ - fields: nestedFields, - validator, - }); - const data = scraper(htmlWithNested); - - expect(data).toEqual({ - title: 'Example Title', - image: { - url: 'https://example.se/images/c12ffe73-3227-4a4a-b8ad-a3003cdf1d70?h=708&tight=false&w=1372', - width: 1372, - height: 708, - }, - }); - }); -}); diff --git a/src/createScraper.zod.test.ts b/src/createScraper.zod.test.ts deleted file mode 100644 index a03ba09..0000000 --- a/src/createScraper.zod.test.ts +++ /dev/null @@ -1,187 +0,0 @@ -import { createScraper } from '@/createScraper.js'; -import { describe, test, expect } from 'vitest'; -import { z } from 'zod'; -import { ZodValidator } from '@/validators/zod.js'; -import { type SchemaFieldDefinitions } from '@/types.js'; - -const schema = z.object({ - title: z.string().default('No title'), - description: z.string(), - keywords: z.array(z.string()), - views: z.number(), -}); - -const schemaWithNested = z.object({ - title: z.string().default('No title nested'), - image: z - .object({ - url: z.string(), - width: z.number(), - height: z.number(), - }) - .default({ url: '', width: 0, height: 0 }) - .optional(), -}); - -type FieldDefinitions = SchemaFieldDefinitions>; -type NestedFieldDefinitions = SchemaFieldDefinitions< - z.infer ->; - -const fields: FieldDefinitions = { - title: { - selector: 'title', - }, - description: { - selector: 'meta[name="description"]', - attribute: 'content', - defaultValue: 'No description', - }, - keywords: { - selector: 'meta[name="keywords"]', - attribute: 'content', - transform: (value) => value.split(','), - defaultValue: [], - }, - views: { - selector: 'meta[name="views"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - defaultValue: 0, - }, -}; - -const nestedFields: NestedFieldDefinitions = { - title: { - selector: 'title', - }, - image: { - fields: { - url: { - selector: 'meta[property="og:image"]', - attribute: 'content', - }, - width: { - selector: 'meta[property="og:image:width"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - height: { - selector: 'meta[property="og:image:height"]', - attribute: 'content', - transform: (value) => parseInt(value, 10), - }, - }, - }, -}; - -const html = ` - - - - - - - Example Title - - - -`; - -const htmlWithNested = ` - - - - Example Title - - - - - - -`; - -describe('xscrape with Zod', () => { - test('extracts data from HTML', () => { - const validator = new ZodValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(html); - - expect(data).toEqual({ - title: 'Example Title', - description: 'An example description.', - keywords: ['typescript', 'html', 'parsing'], - views: 1234, - }); - }); - - test('handles missing data', () => { - const validator = new ZodValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper(''); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: [], - views: 0, - }); - }); - - test('handles multiple values', () => { - const validator = new ZodValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - const data = scraper( - '', - ); - - expect(data).toEqual({ - title: 'No title', - description: 'No description', - keywords: ['typescript', 'html', 'parsing'], - views: 0, - }); - }); - - test('handles invalid data', () => { - const validator = new ZodValidator(schema); - const scraper = createScraper({ - fields, - validator, - }); - try { - scraper( - '', - ); - } catch (error) { - expect(error).toBeInstanceOf(Error); - } - }); - - test('extracts nested data from HTML', () => { - const validator = new ZodValidator(schemaWithNested); - const scraper = createScraper({ - fields: nestedFields, - validator, - }); - const data = scraper(htmlWithNested); - - expect(data).toEqual({ - title: 'Example Title', - image: { - url: 'https://example.se/images/c12ffe73-3227-4a4a-b8ad-a3003cdf1d70?h=708&tight=false&w=1372', - width: 1372, - height: 708, - }, - }); - }); -}); diff --git a/src/defineScraper.ts b/src/defineScraper.ts new file mode 100644 index 0000000..0ac9dfa --- /dev/null +++ b/src/defineScraper.ts @@ -0,0 +1,61 @@ +import * as cheerio from 'cheerio'; +import { createValidator } from '@/validators.js'; +import type { + ScraperConfig, + ScraperResult, + ValidatorType, +} from '@/types/main.js'; + +/** + * Defines a scraper with the provided configuration. + * + * @template T - The shape of the extracted data. + * @template V - The type of the validator used for validation. + * @template R - The type of the result after optional transformation, defaults to T. + * + * @param config - The configuration object for the scraper. + * @returns A function that takes an HTML string and returns the scraping result, which could be + * a scraper result or a promise of a scraper result. + */ +export function defineScraper< + T extends Record, + V extends ValidatorType, + R extends T = T, +>(config: ScraperConfig): (html: string) => Promise> { + const validator = createValidator(config.validator, config.schema); + + return async (html: string): Promise> => { + try { + const $ = cheerio.load(html); + const extractedData = $.extract(config.extract); + + const validationResult = validator.validate(extractedData); + + if (!validationResult.success) { + return { error: validationResult.error }; + } + + if (!validationResult.data) { + return { + error: new Error('Validation succeeded but no data was returned'), + }; + } + + // Apply optional transformation + if (config.transform) { + try { + const transformed = await Promise.resolve( + config.transform(validationResult.data), + ); + return { data: transformed }; + } catch (error) { + return { error }; + } + } + + return { data: validationResult.data as R }; + } catch (error) { + return { error }; + } + }; +} diff --git a/src/index.ts b/src/index.ts index 46d863a..c73cd91 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,2 @@ -export * from '@/createScraper.js'; -export * from '@/validators/index.js'; -export * from '@/types.js'; +export * from '@/defineScraper.js'; +export * from '@/types/main.js'; diff --git a/src/types.ts b/src/types.ts deleted file mode 100644 index fd57456..0000000 --- a/src/types.ts +++ /dev/null @@ -1,26 +0,0 @@ -export type ScrapeConfig = { - fields: SchemaFieldDefinitions; - validator: SchemaValidator; -}; - -export type FieldDefinition = - | { - selector: string; - attribute?: string; - transform?: (value: string) => T; - defaultValue?: T; - multiple?: boolean; - } - | NestedFieldDefinition; - -type NestedFieldDefinition = { - fields: SchemaFieldDefinitions; -}; - -export type SchemaFieldDefinitions = { - [K in keyof T]: FieldDefinition; -}; - -export interface SchemaValidator { - validate(data: unknown): T; -} diff --git a/src/types/cheerio.ts b/src/types/cheerio.ts new file mode 100644 index 0000000..1933865 --- /dev/null +++ b/src/types/cheerio.ts @@ -0,0 +1,44 @@ +import type { Element } from 'domhandler'; + +interface StyleProp { + length: number; + [key: string]: string | number; + [index: number]: string; +} + +type PropType = string | undefined | null | Element[keyof Element] | StyleProp; + +type ExtractDescriptorFn = ( + el: Element, + key: string, + obj: Record, +) => unknown; + +interface ExtractDescriptor { + selector: string; + value?: string | ExtractDescriptorFn | ExtractMap; +} + +type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor]; + +export interface ExtractMap { + [key: string]: ExtractValue; +} + +type ExtractedValue = V extends [ + string | ExtractDescriptor, +] + ? NonNullable>[] + : V extends string + ? string | undefined + : V extends ExtractDescriptor + ? V['value'] extends ExtractMap + ? ExtractedMap | undefined + : V['value'] extends ExtractDescriptorFn + ? ReturnType | undefined + : PropType | undefined + : never; + +export type ExtractedMap = { + [key in keyof M]: ExtractedValue; +}; diff --git a/src/types/main.ts b/src/types/main.ts new file mode 100644 index 0000000..2ae321c --- /dev/null +++ b/src/types/main.ts @@ -0,0 +1,68 @@ +import { z } from 'zod'; +import { Schema } from 'effect'; +import type { ExtractMap } from './cheerio.js'; + +export type ValidatorType = 'zod' | 'effect'; + +type ZodBuilder = typeof z; +type EffectBuilder = typeof Schema; + +export type SchemaBuilder = V extends 'zod' + ? ZodBuilder + : V extends 'effect' + ? EffectBuilder + : never; + +export type SchemaFunction = ( + builder: SchemaBuilder, +) => V extends 'zod' + ? z.ZodSchema + : V extends 'effect' + ? Schema.Schema + : never; + +export type ScraperConfig< + T extends Record, + V extends ValidatorType, + R extends T = T, +> = { + validator: V; + schema: SchemaFunction; + extract: ExtractMap; + transform?: (data: T) => Promise | R; +}; + +type BaseFieldOptions = { + attribute?: string; +}; + +export type LeafFieldConfig = BaseFieldOptions & { + selector?: string; + selectorAll?: string; +} & ( + | { selector: string; selectorAll?: never } + | { selector?: never; selectorAll: string } + ); + +export type FieldConfig = T extends object + ? T extends Array + ? LeafFieldConfig + : { + fields: Fields; + } + : LeafFieldConfig; + +export type Fields = { + [K in keyof T]: FieldConfig; +}; + +export type ValidationResult = { + success: boolean; + data?: T; + error?: unknown; +}; + +export type ScraperResult = { + data?: T; + error?: unknown; +}; diff --git a/src/validators.ts b/src/validators.ts new file mode 100644 index 0000000..3e19c6a --- /dev/null +++ b/src/validators.ts @@ -0,0 +1,63 @@ +import type { + ValidationResult, + SchemaBuilder, + SchemaFunction, + ValidatorType, +} from '@/types/main.js'; +import { z } from 'zod'; +import { Effect, Schema } from 'effect'; + +class Validator implements SchemaValidator { + constructor( + private schema: unknown, + private validateFunction: (schema: unknown, data: unknown) => T, + ) {} + + validate(data: unknown): ValidationResult { + try { + const result = this.validateFunction(this.schema, data); + return { success: true, data: result }; + } catch (error) { + return { success: false, error }; + } + } +} + +interface SchemaValidator { + validate(data: unknown): ValidationResult; +} + +export function getSchemaBuilder( + type: V, +): SchemaBuilder { + switch (type) { + case 'zod': + return z as SchemaBuilder; + case 'effect': + return Schema as SchemaBuilder; + default: + throw new Error(`Unsupported validator type: ${type}`); + } +} + +export function createValidator( + type: V, + schemaFn: SchemaFunction, +): SchemaValidator { + const builder = getSchemaBuilder(type); + const schema = schemaFn(builder); + + switch (type) { + case 'zod': + return new Validator(schema, (schema, data) => + (schema as z.ZodSchema).parse(data), + ); + case 'effect': + return new Validator(schema, (schema, data) => { + const result = Schema.decodeUnknown(schema as Schema.Schema)(data); + return Effect.runSync(result); + }); + default: + throw new Error(`Unsupported validator type: ${type}`); + } +} diff --git a/src/validators/effect.ts b/src/validators/effect.ts deleted file mode 100644 index 8372699..0000000 --- a/src/validators/effect.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { type SchemaValidator } from '@/types.js'; -import * as Schema from 'effect/Schema'; -import { Effect } from 'effect'; - -export class EffectValidator implements SchemaValidator { - constructor(private schema: Schema.Schema) {} - - validate(data: unknown): A { - const result = Schema.decodeUnknown(this.schema)(data); - - return Effect.runSync(result); - } -} diff --git a/src/validators/index.ts b/src/validators/index.ts deleted file mode 100644 index d596a7b..0000000 --- a/src/validators/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export * from '@/validators/effect.js'; -export * from '@/validators/zod.js'; -export * from '@/validators/joi.js'; -export * from '@/validators/yup.js'; diff --git a/src/validators/joi.ts b/src/validators/joi.ts deleted file mode 100644 index b8a58ab..0000000 --- a/src/validators/joi.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { type SchemaValidator } from '@/types.js'; -import type { Schema, ValidationError } from 'joi'; - -export class JoiValidator implements SchemaValidator { - constructor(private schema: Schema) {} - - validate(data: unknown): T { - const { error, value } = this.schema.validate(data, { - convert: true, - stripUnknown: true, - presence: 'optional', - abortEarly: false, - }); - - if (error) { - throw new Error(this.formatError(error)); - } - - return value; - } - - private formatError(error: ValidationError): string { - return error.details.map((detail) => detail.message).join('\n'); - } -} diff --git a/src/validators/yup.ts b/src/validators/yup.ts deleted file mode 100644 index 196efaa..0000000 --- a/src/validators/yup.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { type SchemaValidator } from '@/types.js'; -import * as yup from 'yup'; - -export class YupValidator implements SchemaValidator { - constructor(private schema: yup.Schema) {} - - validate(data: unknown): T { - try { - return this.schema.validateSync(data, { - stripUnknown: true, - strict: false, - abortEarly: false, - }); - } catch (error) { - if (error instanceof yup.ValidationError) { - throw new Error(this.formatError(error)); - } - throw error; - } - } - - private formatError(error: yup.ValidationError): string { - return error.errors.join('\n'); - } -} diff --git a/src/validators/zod.ts b/src/validators/zod.ts deleted file mode 100644 index 5601f4d..0000000 --- a/src/validators/zod.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { ZodSchema } from 'zod'; -import { type SchemaValidator } from '@/types.js'; - -export class ZodValidator implements SchemaValidator { - constructor(private schema: ZodSchema) {} - - validate(data: unknown): T { - return this.schema.parse(data); - } -} diff --git a/test/__fixtures__/html.ts b/test/__fixtures__/html.ts new file mode 100644 index 0000000..a321a1f --- /dev/null +++ b/test/__fixtures__/html.ts @@ -0,0 +1,226 @@ +export const kitchenSink = ` + + + + + + + Example Title + + + +`; + +export const kitchenSinkWithNested = ` + + + + Example Title + + + + + + +`; + +export const largeKitchenSink = ` + + + + + + HTML Kitchen Sink + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Kitchen Sink Example

+

This page demonstrates a wide variety of HTML elements for styling and functional testing purposes.

+ +

Headings

+

Heading 1

+

Heading 2

+

Heading 3

+

Heading 4

+
Heading 5
+
Heading 6
+ +

Text Elements

+

This is a paragraph of text. Bold text, italic text, underlined text, and deleted text can be styled inline.

+

Here’s an example of a link and a link with a tooltip.

+ +

Lists

+

Unordered List

+
    +
  • List item 1
  • +
  • List item 2 +
      +
    • Nested item 1
    • +
    • Nested item 2
    • +
    +
  • +
  • List item 3
  • +
+ +

Ordered List

+
    +
  1. Ordered item 1
  2. +
  3. Ordered item 2
  4. +
  5. Ordered item 3
  6. +
+ +

Definition List

+
+
HTML
+
A markup language for creating web pages.
+
CSS
+
A style sheet language used to style HTML elements.
+
+ +

Forms

+
+ + + + +
+ Choose a Plan: + +
+ + +
+ + + +
+ +

Tables

+ + + + + + + + + + + + + + + + + + + + +
NameAgeCountry
John Doe28USA
Jane Doe26UK
+ +

Code

+

Inline code example: <div>Hello</div>

+
+    
+      <html>
+        <body>
+          <p>Hello World</p>
+        </body>
+      </html>
+    
+  
+ +

Images

+ Placeholder Image +
+ Placeholder Image +
A sample figure with caption.
+
+ +

Embedded Video

+ + +

Blockquotes

+
+ "This is a blockquote example. Great for long quotes." +
+ + + +`; diff --git a/test/effect.test.ts b/test/effect.test.ts new file mode 100644 index 0000000..c8211c9 --- /dev/null +++ b/test/effect.test.ts @@ -0,0 +1,110 @@ +import { describe, test, expect } from 'vitest'; +import { defineScraper } from '@/defineScraper.js'; +import { kitchenSink, kitchenSinkWithNested } from './__fixtures__/html.js'; + +describe('xscrape with Effect/Schema', () => { + test('extracts data from HTML', async () => { + const scraper = defineScraper({ + validator: 'effect', + schema: (S) => + S.Struct({ + title: S.String, + description: S.String, + keywords: S.Array(S.String), + views: S.Number, + }), + extract: { + title: 'title', + description: { + selector: 'meta[name="description"]', + value: 'content', + }, + keywords: { + selector: 'meta[name="keywords"]', + value: (el) => el.attribs['content']?.split(',') || [], + }, + views: { + selector: 'meta[name="views"]', + value: (el) => parseInt(el.attribs['content'] || '0', 10), + }, + }, + }); + const { data, error } = await scraper(kitchenSink); + + expect(error).toBeUndefined(); + + expect(data).toEqual({ + title: 'Example Title', + description: 'An example description.', + keywords: ['typescript', 'html', 'parsing'], + views: 1234, + }); + }); + + // test('handles missing data', () => { + // const validator = new EffectValidator(schema); + // const scraper = createScraper({ + // fields, + // validator, + // }); + // const data = scraper(''); + + // expect(data).toEqual({ + // title: 'No title', + // description: 'No description', + // keywords: [], + // views: 0, + // }); + // }); + + // test('handles multiple values', () => { + // const validator = new EffectValidator(schema); + // const scraper = createScraper({ + // fields, + // validator, + // }); + // const data = scraper( + // '', + // ); + + // expect(data).toEqual({ + // title: 'No title', + // description: 'No description', + // keywords: ['typescript', 'html', 'parsing'], + // views: 0, + // }); + // }); + + // test('handles invalid data', () => { + // const validator = new EffectValidator(schema); + // const scraper = createScraper({ + // fields, + // validator, + // }); + // try { + // scraper( + // '', + // ); + // } catch (error) { + // expect(error).toBeInstanceOf(Error); + // } + // }); + + // test('extracts nested data from HTML', () => { + // const validator = new EffectValidator(schemaWithNested); + // const scraper = createScraper({ + // fields: nestedFields, + // validator, + // }); + // const data = scraper(htmlWithNested); + + // expect(data).toEqual({ + // title: 'Example Title', + // image: { + // url: 'https://example.se/images/c12ffe73-3227-4a4a-b8ad-a3003cdf1d70?h=708&tight=false&w=1372', + // width: 1372, + // height: 708, + // }, + // }); + // }); +}); diff --git a/test/zod.test.ts b/test/zod.test.ts new file mode 100644 index 0000000..ffdd26c --- /dev/null +++ b/test/zod.test.ts @@ -0,0 +1,201 @@ +import { describe, test, expect } from 'vitest'; +import { defineScraper } from '@/defineScraper.js'; +import { kitchenSink, kitchenSinkWithNested } from './__fixtures__/html.js'; + +describe('xscrape with Zod', () => { + test('extracts data from HTML', async () => { + const scraper = defineScraper({ + validator: 'zod', + schema: (z) => + z.object({ + title: z.string(), + description: z.string(), + keywords: z.array(z.string()), + views: z.coerce.number(), + }), + extract: { + title: { + selector: 'title', + }, + description: { + selector: 'meta[name="description"]', + value: 'content', + }, + keywords: { + selector: 'meta[name="keywords"]', + value(el) { + return el.attribs['content']?.split(','); + }, + }, + views: { + selector: 'meta[name="views"]', + value: 'content', + }, + }, + }); + const { data, error } = await scraper(kitchenSink); + + expect(error).toBeUndefined(); + + expect(data).toEqual({ + title: 'Example Title', + description: 'An example description.', + keywords: ['typescript', 'html', 'parsing'], + views: 1234, + }); + }); + + test('handles missing data', async () => { + const scraper = defineScraper({ + validator: 'zod', + schema: (z) => + z.object({ + title: z.string().default('No title'), + description: z.string().default('No description'), + views: z.coerce.number().default(0), + }), + extract: { + title: { + selector: 'title', + }, + description: { + selector: 'meta[name="description"]', + value: 'content', + }, + views: { + selector: 'meta[name="views"]', + value: 'content', + }, + }, + }); + const { data, error } = await scraper( + '', + ); + + console.log(error); + + expect(error).toBeUndefined(); + + expect(data).toEqual({ + title: 'No title', + description: 'No description', + views: 0, + }); + }); + + test('handles multiple values', async () => { + const scraper = defineScraper({ + validator: 'zod', + schema: (z) => + z.object({ + keywords: z.array(z.string()), + }), + extract: { + keywords: { + selector: 'meta[name="keywords"]', + value(el) { + return el.attribs['content']?.split(','); + }, + }, + }, + }); + const { data, error } = await scraper( + '', + ); + + expect(error).toBeUndefined(); + + expect(data).toEqual({ + keywords: ['typescript', 'html', 'parsing'], + }); + }); + + test('handles invalid data', async () => { + const scraper = defineScraper({ + validator: 'zod', + schema: (z) => + z.object({ + title: z.string(), + description: z.string(), + keywords: z.array(z.string()), + views: z.coerce.number(), + }), + extract: { + title: { + selector: 'title', + }, + description: { + selector: 'meta[name="description"]', + value: 'content', + }, + keywords: { + selector: 'meta[name="keywords"]', + value(el) { + return el.attribs['content']?.split(','); + }, + }, + views: { + selector: 'meta[name="views"]', + value: 'content', + }, + }, + }); + try { + await scraper( + '', + ); + } catch (error) { + expect(error).toBeInstanceOf(Error); + } + }); + + test('extracts nested data from HTML', async () => { + const scraper = defineScraper({ + validator: 'zod', + schema: (z) => + z.object({ + title: z.string(), + image: z + .object({ + url: z.string().url(), + width: z.coerce.number(), + height: z.coerce.number(), + }) + .default({ url: '', width: 0, height: 0 }) + .optional(), + }), + extract: { + title: { + selector: 'title', + }, + image: { + selector: 'head', + value: { + url: { + selector: 'meta[property="og:image"]', + value: 'content', + }, + width: { + selector: 'meta[property="og:image:width"]', + value: 'content', + }, + height: { + selector: 'meta[property="og:image:height"]', + value: 'content', + }, + }, + }, + }, + }); + const { data } = await scraper(kitchenSinkWithNested); + + expect(data).toEqual({ + title: 'Example Title', + image: { + url: 'https://example.se/images/c12ffe73-3227-4a4a-b8ad-a3003cdf1d70?h=708&tight=false&w=1372', + width: 1372, + height: 708, + }, + }); + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index ddbf7ab..0dea2bd 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -1,4 +1,4 @@ -import { defineConfig } from 'vite'; +import { defineConfig } from 'vitest/config'; import path from 'path'; export default defineConfig({ @@ -7,4 +7,8 @@ export default defineConfig({ '@': path.resolve(__dirname, './src'), }, }, + test: { + globals: true, + environment: 'jsdom', + }, });