Skip to content

Commit

Permalink
Fix #87: do not process hrefs pointing outside iFixit
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Oct 20, 2022
1 parent 996489a commit 0f84bae
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 21 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.2.3

- Do not process unrecognized href, i.e. pointing outside iFixit

# 0.2.2

- Fixed URL normalization on articles redirecting outside domain (help.ifixit.com)
Expand Down
2 changes: 1 addition & 1 deletion ifixit2zim/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.1
0.2.3
20 changes: 0 additions & 20 deletions ifixit2zim/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,26 +241,6 @@ def _process_external_url(url, rel_prefix):

@staticmethod
def _process_unrecognized_href(url, rel_prefix):
if not url.startswith("https://") and not url.startswith("http://"):
return Global._process_external_url(url, rel_prefix)
try:
resp = requests.head(url, timeout=5)
headers = resp.headers
except requests.exceptions.ConnectionError:
logger.debug(f"Unable to HEAD unrecognized href (ConnectionError): {url}")
return Global._process_external_url(url, rel_prefix)
except requests.exceptions.ReadTimeout:
logger.debug(f"Unable to HEAD unrecognized href (ReadTimeout): {url}")
return Global._process_external_url(url, rel_prefix)
except Exception as exc:
logger.warning(f"Unable to HEAD unrecognized href: {url}")
logger.exception(exc)
return Global._process_external_url(url, rel_prefix)

contentType = headers.get("Content-Type")
if contentType and contentType.startswith("image/"):
return f"{rel_prefix}{Global.get_image_path(url)}"

return Global._process_external_url(url, rel_prefix)

def _process_href_regex_dynamics(href, rel_prefix):
Expand Down

0 comments on commit 0f84bae

Please sign in to comment.