diff --git a/index.js b/index.js index cc4c9ae..ef80c7e 100644 --- a/index.js +++ b/index.js @@ -50,7 +50,29 @@ function extractSections(markdown) { const sectionTitles = markdown.match(/^#+ .*$/gm) || []; const sections = sectionTitles.map(section => - section.replace(/^\W+/, '').replace(/\W+$/, '').replace(/[^\w\s-]+/g, '').replace(/\s+/g, '-').toLowerCase() + // The links are compared with the headings (simple text comparison). + // However, the links are url-encoded beforehand, so the headings + // have to also be encoded so that they can also be matched. + encodeURIComponent( + section + // replace links, the links can start with "./", "/", "http://", "https://" or "#" + // and keep the value of the text ($1) + .replace(/\[(.+)\]\(((?:\.?\/|https?:\/\/|#)[\w\d./?=#-]+)\)/, "$1") + // make everything (Unicode-aware) lower case + .toLowerCase() + // remove white spaces and "#" at the beginning + .replace(/^#+\s*/, '') + // remove everything that is NOT a (Unicode) Letter, (Unicode) Number decimal, + // (Unicode) Number letter, white space, underscore or hyphen + // https://ruby-doc.org/3.3.2/Regexp.html#class-Regexp-label-Unicode+Character+Categories + .replace(/[^\p{L}\p{Nd}\p{Nl}\s_\-`]/gu, "") + // remove sequences of * + .replace(/\*(?=.*)/gu, "") + // remove leftover backticks + .replace(/`/gu, "") + // Now replace remaining blanks with '-' + .replace(/\s/gu, "-") + ) ); var uniq = {}; diff --git a/test/hash-links.md b/test/hash-links.md index 4db835b..09cd06a 100644 --- a/test/hash-links.md +++ b/test/hash-links.md @@ -14,12 +14,78 @@ The title is [Foo](#foo). The second section is [Bar](#bar). +To test a failure. Link that [does not exist](#does-not-exist). + ## Uh, oh There is no section named [Potato](#potato). There is an anchor named [Tomato](#tomato). -## Header with special char ✨ +## Header with special char at end ✨ + +Test [header with image](#header-with-special-char-at-end-) + +## Header with multiple special chars at end ✨✨ + +Test [header with multiple images](#header-with-multiple-special-chars-at-end-) + +## Header with special ✨ char + +Test [header with image](#header-with-special--char) + +## Header with multiple special ✨✨ chars + +Test [header with multiple images](#header-with-multiple-special--chars) + +## Header with German umlaut Ö + +Link to [German umlaut Ö](#header-with-german-umlaut-ö) + +## Header with German umlaut ö manual encoded link + +Link to [German umlaut ö manual encoded in link](#header-with-german-umlaut-%C3%B6-manual-encoded-link) + +### [Heading with a link](https://github.com/tcort/markdown-link-check) + +An [anchor link](#heading-with-a-link) to a heading. + +### [Heading with an anchor link](#foo) + +An [anchor link](#heading-with-an-anchor-link) to a heading. + +## --docker + +[--docker](#--docker) + +## Step 7 - Lint & Test + +[Step 7 - Lint \& Test](#step-7---lint--test) + +## Product Owner / Design Approval + +[Product Owner / Design Approval](#product-owner--design-approval) + +## Migrating from `<= v1.18.0` + +Whitespaces separated by special characters (no workaround) + +[migrating from <= v1.18.0](#migrating-from--v1180) + +## Client/server examples using `network.peer.* + +Consequent whitespaces typo (easy to workaround) + +[Client/server examples using `network.peer.*`](#clientserver-examples-using--networkpeer) + +## This header is [linked](#somewhere) + +This is a [link to a linked header](#this-header-is-linked) + +### Somewhere + +## L. Is the package in the Linux distro base image? + +Anchor links ending with `?`. -Test [header with image](#header-with-special-char-) +[L. Is the package in the Linux distro base image?](#l-is-the-package-in-the-linux-distro-base-image) diff --git a/test/markdown-link-check.test.js b/test/markdown-link-check.test.js index 9ebeaff..62f40c2 100644 --- a/test/markdown-link-check.test.js +++ b/test/markdown-link-check.test.js @@ -403,9 +403,26 @@ describe('markdown-link-check', function () { expect(result).to.eql([ { link: '#foo', statusCode: 200, err: null, status: 'alive' }, { link: '#bar', statusCode: 200, err: null, status: 'alive' }, + { link: '#does-not-exist', statusCode: 404, err: null, status: 'dead' }, { link: '#potato', statusCode: 404, err: null, status: 'dead' }, { link: '#tomato', statusCode: 404, err: null, status: 'dead' }, - { link: '#header-with-special-char-', statusCode: 404, err: null, status: 'dead' }, + { link: '#header-with-special-char-at-end-', statusCode: 200, err: null, status: 'alive' }, + { link: '#header-with-multiple-special-chars-at-end-', statusCode: 200, err: null, status: 'alive' }, + { link: '#header-with-special--char', statusCode: 200, err: null, status: 'alive' }, + { link: '#header-with-multiple-special--chars', statusCode: 200, err: null, status: 'alive' }, + { link: '#header-with-german-umlaut-%C3%B6', statusCode: 200, err: null, status: 'alive' }, + { link: '#header-with-german-umlaut-%C3%B6-manual-encoded-link', statusCode: 200, err: null, status: 'alive' }, + { link: 'https://github.com/tcort/markdown-link-check', statusCode: 200, err: null, status: 'alive' }, + { link: '#heading-with-a-link', statusCode: 200, err: null, status: 'alive' }, + { link: '#heading-with-an-anchor-link', statusCode: 200, err: null, status: 'alive' }, + { link: '#--docker', statusCode: 200, err: null, status: 'alive' }, + { link: '#step-7---lint--test', statusCode: 200, err: null, status: 'alive' }, + { link: '#product-owner--design-approval', statusCode: 200, err: null, status: 'alive' }, + { link: '#migrating-from--v1180', statusCode: 200, err: null, status: 'alive' }, + { link: '#clientserver-examples-using--networkpeer', statusCode: 200, err: null, status: 'alive' }, + { link: '#somewhere', statusCode: 200, err: null, status: 'alive' }, + { link: '#this-header-is-linked', statusCode: 200, err: null, status: 'alive' }, + { link: '#l-is-the-package-in-the-linux-distro-base-image', statusCode: 200, err: null, status: 'alive' }, ]); done(); });