danmactough · danmactough · Dec 10, 2017 · Dec 10, 2017
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 ----------------------------------------------------------------------
 node-feedparser is released under the MIT License
-Copyright (c) 2011-2016 Dan MacTough and contributors
+Copyright (c) 2011-2017 Dan MacTough and contributors
 
 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation

diff --git a/README.md b/README.md
@@ -97,6 +97,14 @@ You can also check out this nice [working demo](https://github.com/scripting/fee
   behavior. If you want total control over handling these errors and optionally
   aborting parsing the feed, use this option.
 
+- `strip_html` - Set to `true` to override Feedparser's default behavior, which is
+  to pass through all substrings that look like html. In older versions, we always
+  stripped these html-like substrings to help users avoid inadvertently creating
+  XSS vulnerabilities by reflecting the value of these elements without properly
+  escaping them. We decided that wasn't particularly helpful because the simple
+  sanitation we were performing didn't address all cases and did a poor job. However,
+  if you were relying on the legacy behavior, you can set this option to `true`.
+
 ## Examples
 
 See the [`examples`](examples/) directory.
@@ -202,7 +210,7 @@ the original inspiration and a starting point.
 
 (The MIT License)
 
-Copyright (c) 2011-2016 Dan MacTough and contributors
+Copyright (c) 2011-2017 Dan MacTough and contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the 'Software'), to deal in

diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js
@@ -72,6 +72,7 @@ function FeedParser (options) {
   if (!('normalize' in this.options)) this.options.normalize = true;
   if (!('addmeta' in this.options)) this.options.addmeta = true;
   if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true;
+  if (!('strip_html' in this.options)) this.options.strip_html = false;
   if ('MAX_BUFFER_LENGTH' in this.options) {
     sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers
   } else {
@@ -414,6 +415,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
 
   var meta = {}
     , normalize = !options || (options && options.normalize)
+    , stripHtml = !options || (options && options.strip_html)
     ;
 
   if (normalize) {
@@ -749,8 +751,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
     if (!meta.xmlurl && this.options.feedurl) {
       meta.xmlurl = meta.xmlUrl = this.options.feedurl;
     }
-    meta.title = meta.title && _.stripHtml(meta.title);
-    meta.description = meta.description && _.stripHtml(meta.description);
+    if (stripHtml) {
+      meta.title = meta.title && _.stripHtml(meta.title);
+      meta.description = meta.description && _.stripHtml(meta.description);
+    }
   }
 
   return meta;
@@ -761,6 +765,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
 
   var item = {}
     , normalize = !options || (options && options.normalize)
+    , stripHtml = !options || (options && options.strip_html)
     ;
 
   if (normalize) {
@@ -1090,7 +1095,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){
         item.link = item.guid;
       }
     }
-    item.title = item.title && _.stripHtml(item.title);
+    if (stripHtml) {
+      item.title = item.title && _.stripHtml(item.title);
+    }
   }
   return item;
 };

diff --git a/lib/utils.js b/lib/utils.js
@@ -169,7 +169,7 @@ exports.reresolve = reresolve;
  * @private
  */
 function stripHtml (str) {
-  return str.replace(/<.*?>/g, '');
+  return str.replace(/<+[^>]+?>+/g, '');
 }
 
 exports.stripHtml = stripHtml;
diff --git a/test/feeds/title-with-angle-brackets.xml b/test/feeds/title-with-angle-brackets.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+<channel>
+    <title>Channel title</title>
+    <link>http://example.com/</link>
+    <description>Channel</description>
+    <item>
+      <title>RSS &lt;&lt;&lt; Title &gt;&gt;&gt;</title>
+    </item>
+</channel>
+</rss>
diff --git a/test/strip-html.js b/test/strip-html.js
@@ -0,0 +1,31 @@
+describe('strip html', function () {
+
+  var feed = __dirname + '/feeds/title-with-angle-brackets.xml';
+
+  it('should NOT aggressively strip html by default', function (done) {
+    fs.createReadStream(feed).pipe(new FeedParser())
+      .once('readable', function () {
+        var stream = this;
+        assert.equal(stream.read().title, 'RSS <<< Title >>>');
+        done();
+      })
+      .on('error', function (err) {
+        assert.ifError(err);
+        done(err);
+      });
+  });
+
+  it('should aggressively strip html with option `strip_html`', function (done) {
+    fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true }))
+      .once('readable', function () {
+        var stream = this;
+        assert.equal(stream.read().title, 'RSS ');
+        done();
+      })
+      .on('error', function (err) {
+        assert.ifError(err);
+        done(err);
+      });
+  });
+
+});