From 4b872a5c84197487b88b22f8afbb2d50f12efb59 Mon Sep 17 00:00:00 2001 From: Giovanni Cerretani Date: Sun, 14 Apr 2019 09:29:57 +0200 Subject: [PATCH] fixes #1 --- antenati.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/antenati.py b/antenati.py index 882577f..b73c94d 100755 --- a/antenati.py +++ b/antenati.py @@ -4,11 +4,12 @@ import sys import os import re +import click from threading import Thread def threadedDownloader(url, filename): - print "Downloading", filename + print 'Downloading', filename r = urllib3.PoolManager().request('GET', url) f = open(filename, 'wb') f.write(r.data) @@ -21,9 +22,9 @@ class ImageHTMLParser(HTMLParser.HTMLParser): def get_threads(self): return self.threads def set_filename(self, name): - self.filename = "img_archive_" + name + ".jpg" + self.filename = 'img_archive_' + name + '.jpg' def handle_starttag(self, tag, attrs): - if tag == "a": + if tag == 'a': url = attrs[0][1] t = Thread(target = threadedDownloader, args = (url, self.filename)) t.start() @@ -31,15 +32,15 @@ def handle_starttag(self, tag, attrs): class UrlHTMLParser(HTMLParser.HTMLParser): - next = "" + next = '' def set_next(self, name): self.next = name def get_next(self): return self.next def handle_starttag(self, tag, attrs): - if tag == "a": - if attrs[1][1] == "next": - self.set_next("http://dl.antenati.san.beniculturali.it" + attrs[0][1]) + if tag == 'a': + if attrs[1][1] == 'next': + self.set_next('http://dl.antenati.san.beniculturali.it' + attrs[0][1]) def main(): @@ -50,16 +51,17 @@ def main(): url_parser.set_next(sys.argv[1]) splitting = re.split('[_/?.]', url_parser.get_next()) - str_comune = splitting[10].replace('+', '_') - str_type = splitting[11].replace('+', '_') - str_year = splitting[12].replace('+', '_') - foldername = '_'.join([str_comune, str_type, str_year]) + html_element = splitting.index('html') + gallery_name_elements = splitting[10 : html_element - 3] + foldername = '_'.join(gallery_name_elements).replace('+', '_') if os.path.exists(foldername): - print("Directory " + foldername + " already exists. Please remove it.") - return - - os.mkdir(foldername) + if not click.confirm('Directory ' + foldername + ' already exists. Do you want to copy images to this directory?'): + print('Exiting') + return + else: + os.mkdir(foldername) + os.chdir(foldername) stop = False @@ -68,17 +70,19 @@ def main(): stop = True r = urllib3.PoolManager().request('GET', url_parser.get_next()) splitting = re.split('[_/?.]', url_parser.get_next()) - img_parser.set_filename(splitting[13] + "_" + splitting[14] + "_" + splitting[15]) + html_element = splitting.index('html') + file_name_elements = splitting[html_element - 3 : html_element - 1] + img_parser.set_filename('_'.join(file_name_elements)) - for line in r.data.split("\n"): - if "zoomAntenati1" in line: + for line in r.data.split('\n'): + if 'zoomAntenati1' in line: img_parser.feed(line) - if "successivo" in line: + if 'successivo' in line: stop = False url_parser.feed(line) for t in img_parser.get_threads(): t.join() -if __name__ == "__main__": +if __name__ == '__main__': main()