From e4b8f45d04726be368b5ccc175979066d8c40a7d Mon Sep 17 00:00:00 2001 From: MikeMeliz Date: Fri, 26 Jul 2019 18:54:21 +0300 Subject: [PATCH] Improve for gh-5 Signed-off-by: MikeMeliz --- modules/extractor.py | 6 +++--- requirements.txt | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/extractor.py b/modules/extractor.py index 6da50b2..9e2a0d1 100644 --- a/modules/extractor.py +++ b/modules/extractor.py @@ -66,9 +66,9 @@ def outex(website, outputFile, outpath): def termex(website): try: print urllib2.urlopen(website).read() - except: - e = sys.exc_info()[0] - print("Error: %s" % e + "\n## Not valid URL \n## Did you forget \'http://\'?") + except (urllib2.HTTPError, urllib2.URLError) as e: + print("Error: (%s) %s" % (e, website)) + return None def extractor(website, crawl, outputFile, inputFile, outpath): diff --git a/requirements.txt b/requirements.txt index ef1634f..ce9a964 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -beautifulsoup4==4.4.1 +beautifulsoup4==4.7.1 requests==2.21.0 -urllib3>=1.24.2