dbisUnibas · timoCasti · Mar 12, 2021
diff --git a/README.md b/README.md
@@ -3,17 +3,7 @@ Repository of the [PS-Battles dataset, an image collection for image manipulatio
 
 # Download instructions
 
-## Ubuntu
-On ubuntu, simply run the provided ```download.sh``` script. It will download all images from the provided .tsv-files and verify them with the provided sha256sum.
-
-## MacOS
-On Mac, there is no ```sha256sum``` command.
- We recommend either running the following command first: ```function sha256sum() { shasum -a 256 "$@" ; } && export -f sha256sum```
- OR you can either change the command in the ```download.sh``` script to ```shasum -a 256```
- OR you can disable checksum-verification in the provided script.
-
-## Windows
-We do not provide a download script for windows, you can use your preferred scripting language and download tool to download images using the links in the provided .tsv-files.
+Simply put the download.py script and the originals.tsv & photoshops.tsv in the same directory and run the python script.
 
 # Bibtex
 ```

diff --git a/download.sh b/download.sh
diff --git a/downloader.py b/downloader.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+import csv
+import os
+import requests
+import hashlib
+
+
+def download(url, destination, checksum):
+    if os.path.exists(destination):
+        print("file already exist skipping  :  " + destination)
+        return
+
+    img_data = requests.get(url).content
+    m = hashlib.sha256()
+    m.update(img_data)
+    hash = m.hexdigest()
+
+    if not hash == checksum:
+        print("hash :  " + hash + "  does not equal checksum from tsv : " + checksum)
+
+    with open(destination, 'wb') as handler:
+        handler.write(img_data)
+
+
+# Press the green button in the gutter to run the script.
+
+if __name__ == '__main__':
+
+    if not (os.path.exists("originals.tsv")):
+        print("originals.tsv does not exist")
+        exit(127)
+    if not os.path.exists("photoshops.tsv"):
+        print("photoshops.tsv does not exist")
+        exit(127)
+
+        # starting with originals
+    file = open("originals.tsv")
+    count: int = len(open("originals.tsv").readlines())
+    percent: int = round(count / 100)
+
+    if os.path.exists("originals"):
+        print(" originals directory already existst")
+        exit(127)
+
+    os.mkdir("originals")
+
+    print("starting with downloading originals.tsv")
+    read_tsv = csv.reader(file, delimiter="\t")
+    counter = 0
+    pc = 0
+    # skip header
+    next(read_tsv)
+    for row in read_tsv:
+        # row= id	url	end	hash	filesize	score	author	link	timestamp	width	height
+        filepath = "originals/" + row[0] + "." + row[2]  # originals/id.end
+        url = row[1]
+        hash = row[3]
+        try:
+            download(url, filepath, hash)
+        except:
+            print("Failed Download for:"+ row[0])
+        counter += 1
+        if counter >= percent:
+            pc += 1
+            print(pc, "% of orginals downloaded")
+            counter = 0
+
+    file.close()
+    print("finished downloading originals")
+
+    # starting with photoshops
+    file = open("photoshops.tsv")
+    count: int = len(open("photoshops.tsv").readlines())
+    percent: int = round(count / 100)
+
+    if os.path.exists("photoshops"):
+        print(" photoshops directory already exist")
+        exit(127)
+
+    os.mkdir("photoshops")
+
+    print("starting with downloading photoshops.tsv")
+    read_tsv = csv.reader(file, delimiter="\t")
+    counter = 0
+    pc = 0  # percent counter
+
+    # skip header
+    next(read_tsv)
+    for row in read_tsv:
+        # row=id	original	url	end	hash	filesize	score	author	link	timestamp	width	height
+        if not os.path.exists("photoshops/"+row[1]):
+            os.mkdir("photoshops/"+row[1])
+
+        filepath = "photoshops/"+row[1]+ "/" + row[0] + "." + row[3]  # photoshops/id.end
+        url = row[2]
+        hash = row[4]
+        try:
+            download(url, filepath, hash)
+        except:
+            print("Failed Download for: "+ row[0])
+        counter += 1
+        if counter >= percent:
+            pc += 1
+            print(pc, "% of photoshops downloaded")
+            counter = 0
+    file.close()
+    print("finished downloading photoshops")