Skip to content

Commit

Permalink
Merge pull request #328 from dklimpel/anchor_unicode_support
Browse files Browse the repository at this point in the history
feat: add support for unicode characters in anchor links
  • Loading branch information
tcort authored Nov 5, 2024
2 parents 3003004 + 8090157 commit b54367e
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 4 deletions.
24 changes: 23 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,29 @@ function extractSections(markdown) {
const sectionTitles = markdown.match(/^#+ .*$/gm) || [];

const sections = sectionTitles.map(section =>
section.replace(/^\W+/, '').replace(/\W+$/, '').replace(/[^\w\s-]+/g, '').replace(/\s+/g, '-').toLowerCase()
// The links are compared with the headings (simple text comparison).
// However, the links are url-encoded beforehand, so the headings
// have to also be encoded so that they can also be matched.
encodeURIComponent(
section
// replace links, the links can start with "./", "/", "http://", "https://" or "#"
// and keep the value of the text ($1)
.replace(/\[(.+)\]\(((?:\.?\/|https?:\/\/|#)[\w\d./?=#-]+)\)/, "$1")
// make everything (Unicode-aware) lower case
.toLowerCase()
// remove white spaces and "#" at the beginning
.replace(/^#+\s*/, '')
// remove everything that is NOT a (Unicode) Letter, (Unicode) Number decimal,
// (Unicode) Number letter, white space, underscore or hyphen
// https://ruby-doc.org/3.3.2/Regexp.html#class-Regexp-label-Unicode+Character+Categories
.replace(/[^\p{L}\p{Nd}\p{Nl}\s_\-`]/gu, "")
// remove sequences of *
.replace(/\*(?=.*)/gu, "")
// remove leftover backticks
.replace(/`/gu, "")
// Now replace remaining blanks with '-'
.replace(/\s/gu, "-")
)
);

var uniq = {};
Expand Down
70 changes: 68 additions & 2 deletions test/hash-links.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,78 @@ The title is [Foo](#foo).

The second section is [Bar](#bar).

To test a failure. Link that [does not exist](#does-not-exist).

## Uh, oh

There is no section named [Potato](#potato).

There is an anchor named [Tomato](#tomato).

## Header with special char ✨
## Header with special char at end ✨

Test [header with image](#header-with-special-char-at-end-)

## Header with multiple special chars at end ✨✨

Test [header with multiple images](#header-with-multiple-special-chars-at-end-)

## Header with special ✨ char

Test [header with image](#header-with-special--char)

## Header with multiple special ✨✨ chars

Test [header with multiple images](#header-with-multiple-special--chars)

## Header with German umlaut Ö

Link to [German umlaut Ö](#header-with-german-umlaut-ö)

## Header with German umlaut ö manual encoded link

Link to [German umlaut ö manual encoded in link](#header-with-german-umlaut-%C3%B6-manual-encoded-link)

### [Heading with a link](https://github.com/tcort/markdown-link-check)

An [anchor link](#heading-with-a-link) to a heading.

### [Heading with an anchor link](#foo)

An [anchor link](#heading-with-an-anchor-link) to a heading.

## --docker

[--docker](#--docker)

## Step 7 - Lint & Test

[Step 7 - Lint \& Test](#step-7---lint--test)

## Product Owner / Design Approval

[Product Owner / Design Approval](#product-owner--design-approval)

## Migrating from `<= v1.18.0`

Whitespaces separated by special characters (no workaround)

[migrating from <= v1.18.0](#migrating-from--v1180)

## Client/server examples using `network.peer.*

Consequent whitespaces typo (easy to workaround)

[Client/server examples using `network.peer.*`](#clientserver-examples-using--networkpeer)

## This header is [linked](#somewhere)

This is a [link to a linked header](#this-header-is-linked)

### Somewhere

## L. Is the package in the Linux distro base image?

Anchor links ending with `?`.

Test [header with image](#header-with-special-char-)
[L. Is the package in the Linux distro base image?](#l-is-the-package-in-the-linux-distro-base-image)
19 changes: 18 additions & 1 deletion test/markdown-link-check.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,26 @@ describe('markdown-link-check', function () {
expect(result).to.eql([
{ link: '#foo', statusCode: 200, err: null, status: 'alive' },
{ link: '#bar', statusCode: 200, err: null, status: 'alive' },
{ link: '#does-not-exist', statusCode: 404, err: null, status: 'dead' },
{ link: '#potato', statusCode: 404, err: null, status: 'dead' },
{ link: '#tomato', statusCode: 404, err: null, status: 'dead' },
{ link: '#header-with-special-char-', statusCode: 404, err: null, status: 'dead' },
{ link: '#header-with-special-char-at-end-', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-multiple-special-chars-at-end-', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-special--char', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-multiple-special--chars', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-german-umlaut-%C3%B6', statusCode: 200, err: null, status: 'alive' },
{ link: '#header-with-german-umlaut-%C3%B6-manual-encoded-link', statusCode: 200, err: null, status: 'alive' },
{ link: 'https://github.com/tcort/markdown-link-check', statusCode: 200, err: null, status: 'alive' },
{ link: '#heading-with-a-link', statusCode: 200, err: null, status: 'alive' },
{ link: '#heading-with-an-anchor-link', statusCode: 200, err: null, status: 'alive' },
{ link: '#--docker', statusCode: 200, err: null, status: 'alive' },
{ link: '#step-7---lint--test', statusCode: 200, err: null, status: 'alive' },
{ link: '#product-owner--design-approval', statusCode: 200, err: null, status: 'alive' },
{ link: '#migrating-from--v1180', statusCode: 200, err: null, status: 'alive' },
{ link: '#clientserver-examples-using--networkpeer', statusCode: 200, err: null, status: 'alive' },
{ link: '#somewhere', statusCode: 200, err: null, status: 'alive' },
{ link: '#this-header-is-linked', statusCode: 200, err: null, status: 'alive' },
{ link: '#l-is-the-package-in-the-linux-distro-base-image', statusCode: 200, err: null, status: 'alive' },
]);
done();
});
Expand Down

0 comments on commit b54367e

Please sign in to comment.