Skip to content

Commit

Permalink
fixes #1
Browse files Browse the repository at this point in the history
  • Loading branch information
gcerretani committed Apr 14, 2019
1 parent e8803c1 commit 4b872a5
Showing 1 changed file with 24 additions and 20 deletions.
44 changes: 24 additions & 20 deletions antenati.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
import sys
import os
import re
import click
from threading import Thread


def threadedDownloader(url, filename):
print "Downloading", filename
print 'Downloading', filename
r = urllib3.PoolManager().request('GET', url)
f = open(filename, 'wb')
f.write(r.data)
Expand All @@ -21,25 +22,25 @@ class ImageHTMLParser(HTMLParser.HTMLParser):
def get_threads(self):
return self.threads
def set_filename(self, name):
self.filename = "img_archive_" + name + ".jpg"
self.filename = 'img_archive_' + name + '.jpg'
def handle_starttag(self, tag, attrs):
if tag == "a":
if tag == 'a':
url = attrs[0][1]
t = Thread(target = threadedDownloader, args = (url, self.filename))
t.start()
self.threads.append(t)


class UrlHTMLParser(HTMLParser.HTMLParser):
next = ""
next = ''
def set_next(self, name):
self.next = name
def get_next(self):
return self.next
def handle_starttag(self, tag, attrs):
if tag == "a":
if attrs[1][1] == "next":
self.set_next("http://dl.antenati.san.beniculturali.it" + attrs[0][1])
if tag == 'a':
if attrs[1][1] == 'next':
self.set_next('http://dl.antenati.san.beniculturali.it' + attrs[0][1])


def main():
Expand All @@ -50,16 +51,17 @@ def main():
url_parser.set_next(sys.argv[1])
splitting = re.split('[_/?.]', url_parser.get_next())

str_comune = splitting[10].replace('+', '_')
str_type = splitting[11].replace('+', '_')
str_year = splitting[12].replace('+', '_')
foldername = '_'.join([str_comune, str_type, str_year])
html_element = splitting.index('html')
gallery_name_elements = splitting[10 : html_element - 3]
foldername = '_'.join(gallery_name_elements).replace('+', '_')

if os.path.exists(foldername):
print("Directory " + foldername + " already exists. Please remove it.")
return

os.mkdir(foldername)
if not click.confirm('Directory ' + foldername + ' already exists. Do you want to copy images to this directory?'):
print('Exiting')
return
else:
os.mkdir(foldername)

os.chdir(foldername)

stop = False
Expand All @@ -68,17 +70,19 @@ def main():
stop = True
r = urllib3.PoolManager().request('GET', url_parser.get_next())
splitting = re.split('[_/?.]', url_parser.get_next())
img_parser.set_filename(splitting[13] + "_" + splitting[14] + "_" + splitting[15])
html_element = splitting.index('html')
file_name_elements = splitting[html_element - 3 : html_element - 1]
img_parser.set_filename('_'.join(file_name_elements))

for line in r.data.split("\n"):
if "zoomAntenati1" in line:
for line in r.data.split('\n'):
if 'zoomAntenati1' in line:
img_parser.feed(line)
if "successivo" in line:
if 'successivo' in line:
stop = False
url_parser.feed(line)

for t in img_parser.get_threads():
t.join()

if __name__ == "__main__":
if __name__ == '__main__':
main()

0 comments on commit 4b872a5

Please sign in to comment.