Skip to content

Commit

Permalink
Merge pull request #5 from msufa/update-soup
Browse files Browse the repository at this point in the history
Update beautifulsoup4 and remove workarounds
  • Loading branch information
msufa authored Oct 24, 2018
2 parents 65c8c39 + f752c93 commit 17c4998
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 22 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

install_requires = (
'beautifulsoup4==4.5.1',
'beautifulsoup4==4.6.3',
)

tests_require = (
Expand Down
11 changes: 0 additions & 11 deletions test/test_injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,3 @@ def test_injection(tmpdir):
found = soup.find_all("script")
assert found
assert tracking_tag in found[0].text


def test_removing_stray_tags():
with open(os.path.join(os.path.dirname(__file__), "data/test.html"), "r") as infile:
soup = BeautifulSoup(infile, "html.parser")
embed_tag = soup.new_tag("embed")
soup.body.append(embed_tag)

output = soup.prettify()
assert "</embed>" in output
assert "</embed>" not in injector._remove_stray_tags(output)
11 changes: 1 addition & 10 deletions tridinjector/injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,7 @@ def _inject_tracking_id(tracking_id, soup):

def _write_output(soup, output_filename):
with open(output_filename, 'w') as outfile:
outfile.write(_remove_stray_tags(soup.prettify()).encode('utf-8'))


def _remove_stray_tags(output):
"""
BeautifulSoup's parser is sometimes to eager to close tags, which produces output like
<embed></embed>, which some HTML validators fail to accept. This function removes
the stray tags if found.
"""
return output.replace("</embed>", "")
outfile.write(soup.prettify().encode('utf-8'))


def main():
Expand Down

0 comments on commit 17c4998

Please sign in to comment.