Skip to content

Commit

Permalink
normalize newlines inside parser.Parse()
Browse files Browse the repository at this point in the history
  • Loading branch information
kjk committed Mar 21, 2023
1 parent 2ced44d commit 154b583
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 28 deletions.
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ Sample text.

func main() {
md := []byte(mdStr)
// always normalize newlines, this library only supports Unix LF newlines
md = markdown.NormalizeNewlines(md)

// create markdown parser
extensions := parser.CommonExtensions | parser.AutoHeadingIDs
Expand Down Expand Up @@ -86,10 +84,6 @@ Try it online:

For more documentation read [this guide](https://blog.kowalczyk.info/article/cxn3/advanced-markdown-processing-in-go.html)

## Always normalize newlines

The library only supports Unix newlines. For peace of mind, always normalize markdown content with `md = markdown.NormalizeNewlines(md)`.

## Sanitize untrusted content

We don't protect against malicious content. When dealing with user-provided
Expand Down
23 changes: 1 addition & 22 deletions markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,25 +87,4 @@ func ToHTML(markdown []byte, p *parser.Parser, renderer Renderer) []byte {
// NormalizeNewlines converts Windows and Mac newlines to Unix newlines.
// The parser only supports Unix newlines. If your markdown content
// might contain Windows or Mac newlines, use this function to convert to Unix newlines
func NormalizeNewlines(d []byte) []byte {
wi := 0
n := len(d)
for i := 0; i < n; i++ {
c := d[i]
// 13 is CR
if c != 13 {
d[wi] = c
wi++
continue
}
// replace CR (mac / win) with LF (unix)
d[wi] = 10
wi++
if i < n-1 && d[i+1] == 10 {
// this was CRLF, so skip the LF
i++
}

}
return d[:wi]
}
var NormalizeNewlines = parser.NormalizeNewlines
27 changes: 27 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,10 @@ type Reference struct {
// You can then convert AST to html using html.Renderer, to some other format
// using a custom renderer or transform the tree.
func (p *Parser) Parse(input []byte) ast.Node {
// the code only works with Unix CR newlines so to make life easy for
// callers normalize newlines
input = NormalizeNewlines(input)

p.Block(input)
// Walk the tree and finish up some of unfinished blocks
for p.tip != nil {
Expand Down Expand Up @@ -895,3 +899,26 @@ func isListItem(d ast.Node) bool {
_, ok := d.(*ast.ListItem)
return ok
}

func NormalizeNewlines(d []byte) []byte {
wi := 0
n := len(d)
for i := 0; i < n; i++ {
c := d[i]
// 13 is CR
if c != 13 {
d[wi] = c
wi++
continue
}
// replace CR (mac / win) with LF (unix)
d[wi] = 10
wi++
if i < n-1 && d[i+1] == 10 {
// this was CRLF, so skip the LF
i++
}

}
return d[:wi]
}

0 comments on commit 154b583

Please sign in to comment.