From 4028b8a41d92fe281f77799729996c32c6c55e58 Mon Sep 17 00:00:00 2001
From: automationator <automationator@gmail.com>
Date: Mon, 27 Aug 2018 09:24:49 -0400
Subject: [PATCH] Adds support for properly finding meta-refresh URLs

---
 urlfinderlib/urlfinderlib.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/urlfinderlib/urlfinderlib.py b/urlfinderlib/urlfinderlib.py
index 5e64f72..24a2f39 100644
--- a/urlfinderlib/urlfinderlib.py
+++ b/urlfinderlib/urlfinderlib.py
@@ -140,6 +140,23 @@ def _recursive_tag_values(tag, values=[]):
     # Loop over both soups.
     for soup in soups:
 
+        # Find any meta-refresh URLs.
+        meta_urls = []
+        meta_tags = soup.find_all('meta')
+        for meta_tag in meta_tags:
+            for key in meta_tag.attrs:
+                if key.lower() == 'content':
+                    value = meta_tag.attrs[key]
+                    if 'url=' in value:
+                        split_value = value.split('url=')
+                        url = split_value[1]
+                        # Remove any quotes around the URL.
+                        if url.startswith('"') and url.endswith('"'):
+                            url = url[1:-1]
+                        if url.startswith("'") and url.endswith("'"):
+                            url = url[1:-1]
+                        meta_urls.append(url)
+
         # Hacky way to find URLs in the CSS.
         css_urls = re.compile(r'url\((.*?)\)').findall(str(soup))
         
@@ -190,6 +207,7 @@ def _recursive_tag_values(tag, values=[]):
         else:
             urls = _recursive_tag_values(soup)
             urls += css_urls
+            urls += meta_urls
 
         # As a last-ditch effort, find URLs in the visible text of the HTML. However,
         # we only want to add strings that are valid URLs as they are. What we do not