From eaf079556f4692a68428902d16a30c511f47874e Mon Sep 17 00:00:00 2001
From: Giovanni Cerretani <giovanni.cerretani@gmail.com>
Date: Sat, 12 Oct 2019 14:56:52 +0200
Subject: [PATCH] upgraded to python3 mostly using 2to3 tool

---
 README.md   |  2 +-
 antenati.py | 27 +++++++++++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 6debe2c..c360cfd 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,6 @@ In the website, navigate to the archive you want to download. For example, for t
 
 Then, copy the link to the first page, and call the script with that link as argument:
 
-    ./antenati.py http://dl.antenati.san.beniculturali.it/v/Archivio+di+Stato+di+Firenze/Stato+civile+della+restaurazione/Montalcino+provincia+di+Siena/Nati/1832/179/005178080_00303.jpg.html
+    ./antenati.py http://dl.antenati.san.beniculturali.it/v/Archivio+di+Stato+di+Firenze/Stato+civile+della+restaurazione+1816-1860/Montalcino+provincia+di+Siena/Nati/1832/179/005178080_00303.jpg.html
 
 The results will be placed in a folder named *Montalcino_provincia_di_Siena_Nati_1832*.
\ No newline at end of file
diff --git a/antenati.py b/antenati.py
index c28261b..4c199b0 100755
--- a/antenati.py
+++ b/antenati.py
@@ -1,6 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+"""
+antenati.py: a tool to download data from the Portale Antenati
+"""
+
+__author__      = "Giovanni Cerretani"
+__copyright__   = "Copyright (c) 2018, MIT License"
+
 import urllib3
-import HTMLParser
+import html.parser
 import sys
 import os
 import re
@@ -16,17 +23,17 @@ def __init__ (self, pool, url, filename):
         self.filename = filename
         self.start()
     def run(self):
-        print('Downloading ' + self.filename)
+        print('Downloading ', self.filename)
         r = self.pool.request_encode_url('GET', self.url)
         f = open(self.filename, 'wb')
         f.write(r.data)
         f.close()
-        print('Done ' + self.filename)
+        print('Done ', self.filename)
 
 
-class ImageHTMLParser(HTMLParser.HTMLParser):
+class ImageHTMLParser(html.parser.HTMLParser):
     def __init__(self, pool):
-        HTMLParser.HTMLParser.__init__(self)
+        html.parser.HTMLParser.__init__(self)
         self.pool = pool
         self.filename = None
         self.threads = []
@@ -42,9 +49,9 @@ def handle_starttag(self, tag, attrs):
             self.threads.append(t)
 
 
-class UrlHTMLParser(HTMLParser.HTMLParser):
+class UrlHTMLParser(html.parser.HTMLParser):
     def __init__(self):
-        HTMLParser.HTMLParser.__init__(self)
+        html.parser.HTMLParser.__init__(self)
         self.next = None
     def set_next(self, next):
         self.next = next
@@ -84,14 +91,14 @@ def main():
     
     while not stop:
         stop = True
-        r = connection_pool.request_encode_url('GET', url_parser.get_next())
+        r = connection_pool.request('GET', url_parser.get_next())
         splitting = re.split('[_/?.]', url_parser.get_next())
         html_element = splitting.index('html')
         file_name_elements = splitting[html_element - 3 : html_element - 1]
         local_filename = '_'.join(file_name_elements)
         img_parser.set_filename(local_filename)
     
-        for line in r.data.split('\n'):
+        for line in r.data.decode('utf-8').split('\n'):
             if 'zoomAntenati1' in line:
                 img_parser.feed(line)
             if 'successivo' in line: