-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test case: sitemap validator - added
- Loading branch information
Showing
9 changed files
with
245 additions
and
144 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,11 @@ | ||
import { readFileSync } from "fs"; | ||
import { join } from "path"; | ||
import { validateXML } from "xsd-schema-validator"; | ||
import config from "../configLoader"; | ||
import { makeSitemap } from "../lib/utils"; | ||
import { Hawk } from "../lib/core"; | ||
import validateSitemap from "./utils/validate-sitemap"; | ||
|
||
async function _validateSitemap(): Promise<boolean> { | ||
//Generate site map | ||
await makeSitemap(true, [], [], true); | ||
|
||
/* Loading sitemap.xml */ | ||
const sitemapPath: string = config.sitemapPath; | ||
const sitemapXML: string = readFileSync(sitemapPath, { | ||
encoding: "utf8", | ||
}); | ||
|
||
const sitemapSchemaFile: string = join(__dirname, "sitemap-schema.xsd"); | ||
|
||
/* Validating */ | ||
try { | ||
const result = await validateXML(sitemapXML, sitemapSchemaFile); | ||
return result.valid; | ||
} catch (err) { | ||
console.log(err); | ||
return false; | ||
} | ||
} | ||
const hawkInstance = new Hawk(); | ||
const testSampleRootPath = "./test/test-sample"; | ||
|
||
test("Sitemap.xml validation", async () => { | ||
expect(await _validateSitemap()).toBe(true); | ||
expect(await validateSitemap(testSampleRootPath, hawkInstance)).toBe( | ||
true, | ||
); | ||
}); |
File renamed without changes.
File renamed without changes.
File renamed without changes.
232 changes: 116 additions & 116 deletions
232
test/sitemap-schema.xsd → test/test-sample/sitemap-schema.xsd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,116 +1,116 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" | ||
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9" | ||
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | ||
elementFormDefault="qualified"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
XML Schema for Sitemap files. | ||
Last Modifed 2008-03-26 | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
|
||
<xsd:element name="urlset"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
Container for a set of up to 50,000 document elements. | ||
This is the root element of the XML file. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:complexType> | ||
<xsd:sequence> | ||
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/> | ||
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/> | ||
</xsd:sequence> | ||
</xsd:complexType> | ||
</xsd:element> | ||
|
||
<xsd:complexType name="tUrl"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
Container for the data needed to describe a document to crawl. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:sequence> | ||
<xsd:element name="loc" type="tLoc"/> | ||
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/> | ||
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/> | ||
<xsd:element name="priority" type="tPriority" minOccurs="0"/> | ||
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/> | ||
</xsd:sequence> | ||
</xsd:complexType> | ||
|
||
<xsd:simpleType name="tLoc"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
REQUIRED: The location URI of a document. | ||
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt). | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:restriction base="xsd:anyURI"> | ||
<xsd:minLength value="12"/> | ||
<xsd:maxLength value="2048"/> | ||
</xsd:restriction> | ||
</xsd:simpleType> | ||
|
||
<xsd:simpleType name="tLastmod"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
OPTIONAL: The date the document was last modified. The date must conform | ||
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime). | ||
Example: 2005-05-10 | ||
Lastmod may also contain a timestamp. | ||
Example: 2005-05-10T17:33:30+08:00 | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:union> | ||
<xsd:simpleType> | ||
<xsd:restriction base="xsd:date"/> | ||
</xsd:simpleType> | ||
<xsd:simpleType> | ||
<xsd:restriction base="xsd:dateTime"/> | ||
</xsd:simpleType> | ||
</xsd:union> | ||
</xsd:simpleType> | ||
|
||
<xsd:simpleType name="tChangeFreq"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
OPTIONAL: Indicates how frequently the content at a particular URL is | ||
likely to change. The value "always" should be used to describe | ||
documents that change each time they are accessed. The value "never" | ||
should be used to describe archived URLs. Please note that web | ||
crawlers may not necessarily crawl pages marked "always" more often. | ||
Consider this element as a friendly suggestion and not a command. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:restriction base="xsd:string"> | ||
<xsd:enumeration value="always"/> | ||
<xsd:enumeration value="hourly"/> | ||
<xsd:enumeration value="daily"/> | ||
<xsd:enumeration value="weekly"/> | ||
<xsd:enumeration value="monthly"/> | ||
<xsd:enumeration value="yearly"/> | ||
<xsd:enumeration value="never"/> | ||
</xsd:restriction> | ||
</xsd:simpleType> | ||
|
||
<xsd:simpleType name="tPriority"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
OPTIONAL: The priority of a particular URL relative to other pages | ||
on the same site. The value for this element is a number between | ||
0.0 and 1.0 where 0.0 identifies the lowest priority page(s). | ||
The default priority of a page is 0.5. Priority is used to select | ||
between pages on your site. Setting a priority of 1.0 for all URLs | ||
will not help you, as the relative priority of pages on your site | ||
is what will be considered. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:restriction base="xsd:decimal"> | ||
<xsd:minInclusive value="0.0"/> | ||
<xsd:maxInclusive value="1.0"/> | ||
</xsd:restriction> | ||
</xsd:simpleType> | ||
|
||
</xsd:schema> | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" | ||
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9" | ||
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | ||
elementFormDefault="qualified"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
XML Schema for Sitemap files. | ||
Last Modifed 2008-03-26 | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
|
||
<xsd:element name="urlset"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
Container for a set of up to 50,000 document elements. | ||
This is the root element of the XML file. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:complexType> | ||
<xsd:sequence> | ||
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/> | ||
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/> | ||
</xsd:sequence> | ||
</xsd:complexType> | ||
</xsd:element> | ||
|
||
<xsd:complexType name="tUrl"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
Container for the data needed to describe a document to crawl. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:sequence> | ||
<xsd:element name="loc" type="tLoc"/> | ||
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/> | ||
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/> | ||
<xsd:element name="priority" type="tPriority" minOccurs="0"/> | ||
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/> | ||
</xsd:sequence> | ||
</xsd:complexType> | ||
|
||
<xsd:simpleType name="tLoc"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
REQUIRED: The location URI of a document. | ||
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt). | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:restriction base="xsd:anyURI"> | ||
<xsd:minLength value="12"/> | ||
<xsd:maxLength value="2048"/> | ||
</xsd:restriction> | ||
</xsd:simpleType> | ||
|
||
<xsd:simpleType name="tLastmod"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
OPTIONAL: The date the document was last modified. The date must conform | ||
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime). | ||
Example: 2005-05-10 | ||
Lastmod may also contain a timestamp. | ||
Example: 2005-05-10T17:33:30+08:00 | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:union> | ||
<xsd:simpleType> | ||
<xsd:restriction base="xsd:date"/> | ||
</xsd:simpleType> | ||
<xsd:simpleType> | ||
<xsd:restriction base="xsd:dateTime"/> | ||
</xsd:simpleType> | ||
</xsd:union> | ||
</xsd:simpleType> | ||
|
||
<xsd:simpleType name="tChangeFreq"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
OPTIONAL: Indicates how frequently the content at a particular URL is | ||
likely to change. The value "always" should be used to describe | ||
documents that change each time they are accessed. The value "never" | ||
should be used to describe archived URLs. Please note that web | ||
crawlers may not necessarily crawl pages marked "always" more often. | ||
Consider this element as a friendly suggestion and not a command. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:restriction base="xsd:string"> | ||
<xsd:enumeration value="always"/> | ||
<xsd:enumeration value="hourly"/> | ||
<xsd:enumeration value="daily"/> | ||
<xsd:enumeration value="weekly"/> | ||
<xsd:enumeration value="monthly"/> | ||
<xsd:enumeration value="yearly"/> | ||
<xsd:enumeration value="never"/> | ||
</xsd:restriction> | ||
</xsd:simpleType> | ||
|
||
<xsd:simpleType name="tPriority"> | ||
<xsd:annotation> | ||
<xsd:documentation> | ||
OPTIONAL: The priority of a particular URL relative to other pages | ||
on the same site. The value for this element is a number between | ||
0.0 and 1.0 where 0.0 identifies the lowest priority page(s). | ||
The default priority of a page is 0.5. Priority is used to select | ||
between pages on your site. Setting a priority of 1.0 for all URLs | ||
will not help you, as the relative priority of pages on your site | ||
is what will be considered. | ||
</xsd:documentation> | ||
</xsd:annotation> | ||
<xsd:restriction base="xsd:decimal"> | ||
<xsd:minInclusive value="0.0"/> | ||
<xsd:maxInclusive value="1.0"/> | ||
</xsd:restriction> | ||
</xsd:simpleType> | ||
|
||
</xsd:schema> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import { XMLParser } from "fast-xml-parser"; | ||
import { globSync } from "glob"; | ||
import { existsSync, readFileSync, rmSync } from "node:fs"; | ||
import { join } from "node:path"; | ||
import { validateXML } from "xsd-schema-validator"; | ||
import { type Hawk } from "../../lib/core"; | ||
|
||
export default async function validateSitemap( | ||
testSampleRootPath: string, | ||
hawkInstance: Hawk, | ||
): Promise<boolean> { | ||
process.chdir(testSampleRootPath); | ||
|
||
const lookupPattern = ["**/*.html"]; | ||
const uploadToFTP = false; | ||
const expectedSitemapOutputPath = "test-sitemap.xml"; | ||
|
||
hawkInstance.configurations.sitemapPath = expectedSitemapOutputPath; | ||
await hawkInstance.utils.makeSitemap( | ||
lookupPattern, | ||
[], | ||
false, | ||
uploadToFTP, | ||
); | ||
|
||
//check sitemap if exist | ||
const siteMapExist = existsSync(expectedSitemapOutputPath); | ||
|
||
if (siteMapExist) { | ||
//validate sitemap with schema | ||
const sitemapXML: string = readFileSync(expectedSitemapOutputPath, { | ||
encoding: "utf8", | ||
}); | ||
|
||
//delete sitemap as no longer needed | ||
rmSync(expectedSitemapOutputPath, { recursive: true, force: true }); | ||
|
||
const sitemapSchemaFile = "sitemap-schema.xsd"; | ||
|
||
const { valid } = await validateXML(sitemapXML, sitemapSchemaFile); | ||
|
||
if (valid) { | ||
//check number of available routes against nof available files; | ||
const parser = new XMLParser(); | ||
const parsed = parser.parse(sitemapXML); | ||
|
||
const urls = parsed.urlset.url.map( | ||
(url: { loc: string; lastmod: string }) => url.loc, | ||
); | ||
const availableRoutes = globSync(lookupPattern); | ||
|
||
const numberOfRoutesinMap = urls.length; | ||
const numberOfFiles = availableRoutes.length; | ||
|
||
const expectedRoutesCount = numberOfFiles === numberOfRoutesinMap; | ||
|
||
if (expectedRoutesCount) { | ||
//ping to all routes if any failed return false | ||
return _pingRoutes(urls); | ||
} | ||
} else { | ||
console.log("⚠️ Sitemap failed at schematic test"); | ||
} | ||
} else { | ||
console.log("⚠️ Sitemap not found!"); | ||
} | ||
|
||
return false; | ||
} | ||
|
||
function _pingRoutes(urls: string[]): boolean { | ||
return urls.every((url: string) => { | ||
let { pathname } = new URL(url); | ||
|
||
if (pathname === "/") pathname = "index"; | ||
|
||
const filePath = join(process.cwd(), pathname + ".html"); | ||
|
||
const goodRoute = existsSync(filePath); | ||
|
||
if (!goodRoute) { | ||
console.log(`⚠️ Ping failed on: ${url}`); | ||
} | ||
|
||
return goodRoute; | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,5 +37,6 @@ | |
"node_modules", | ||
"test", | ||
"jest.config.ts", | ||
"dist" | ||
] | ||
} |
Oops, something went wrong.