From 9def5d2074331d6b1af80d7b394be551a0d70c53 Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Sun, 10 Dec 2017 10:21:48 -0500 Subject: [PATCH] Change default behavior to not strip html by default Added option `strip_html` to restore old behavior. Resolves #165, #243 --- LICENSE | 2 +- README.md | 10 +++++++++- lib/feedparser/index.js | 13 ++++++++++--- test/strip-html.js | 15 ++++++++++++++- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/LICENSE b/LICENSE index aead257..24c0a7f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ ---------------------------------------------------------------------- node-feedparser is released under the MIT License -Copyright (c) 2011-2016 Dan MacTough and contributors +Copyright (c) 2011-2017 Dan MacTough and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation diff --git a/README.md b/README.md index 80f5747..557abbf 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,14 @@ You can also check out this nice [working implementation](https://github.com/scr behavior. If you want total control over handling these errors and optionally aborting parsing the feed, use this option. +- `strip_html` - Set to `true` to override Feedparser's default behavior, which is + to pass through all substrings that look like html. In older versions, we always + stripped these html-like substrings to help users avoid inadvertently creating + XSS vulnerabilities by reflecting the value of these elements without properly + escaping them. We decided that wasn't particularly helpful because the simple + sanitation we were performing didn't address all cases and did a poor job. However, + if you were relying on the legacy behavior, you can set this option to `true`. + ## Examples See the [`examples`](examples/) directory. @@ -206,7 +214,7 @@ the original inspiration and a starting point. (The MIT License) -Copyright (c) 2011-2016 Dan MacTough and contributors +Copyright (c) 2011-2017 Dan MacTough and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js index 78a1c3b..f72f74a 100644 --- a/lib/feedparser/index.js +++ b/lib/feedparser/index.js @@ -72,6 +72,7 @@ function FeedParser (options) { if (!('normalize' in this.options)) this.options.normalize = true; if (!('addmeta' in this.options)) this.options.addmeta = true; if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true; + if (!('strip_html' in this.options)) this.options.strip_html = false; if ('MAX_BUFFER_LENGTH' in this.options) { sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers } else { @@ -430,6 +431,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { var meta = {} , normalize = !options || (options && options.normalize) + , stripHtml = !options || (options && options.strip_html) ; if (normalize) { @@ -765,8 +767,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { if (!meta.xmlurl && this.options.feedurl) { meta.xmlurl = meta.xmlUrl = this.options.feedurl; } - meta.title = meta.title && _.stripHtml(meta.title); - meta.description = meta.description && _.stripHtml(meta.description); + if (stripHtml) { + meta.title = meta.title && _.stripHtml(meta.title); + meta.description = meta.description && _.stripHtml(meta.description); + } } return meta; @@ -777,6 +781,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ var item = {} , normalize = !options || (options && options.normalize) + , stripHtml = !options || (options && options.strip_html) ; if (normalize) { @@ -1106,7 +1111,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ item.link = item.guid; } } - item.title = item.title && _.stripHtml(item.title); + if (stripHtml) { + item.title = item.title && _.stripHtml(item.title); + } } return item; }; diff --git a/test/strip-html.js b/test/strip-html.js index bf46254..21ba623 100644 --- a/test/strip-html.js +++ b/test/strip-html.js @@ -2,8 +2,21 @@ describe('strip html', function () { var feed = __dirname + '/feeds/title-with-angle-brackets.xml'; - it('should aggressively strip html', function (done) { + it('should NOT aggressively strip html by default', function (done) { fs.createReadStream(feed).pipe(new FeedParser()) + .once('readable', function () { + var stream = this; + assert.equal(stream.read().title, 'RSS <<< Title >>>'); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + }); + + it('should aggressively strip html with option `strip_html`', function (done) { + fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true })) .once('readable', function () { var stream = this; assert.equal(stream.read().title, 'RSS ');