From 2a24b1246d680c0859f193b23c57a9aaf756e2ae Mon Sep 17 00:00:00 2001
From: Pragati Baheti <t-prbahe@microsoft.com>
Date: Mon, 3 Jan 2022 16:41:51 +0530
Subject: [PATCH] Stanford dogs dataset upload helper files

---
 Stanford Dogs Dataset/README.md      | 25 +++++++++++++++++++++++++
 Stanford Dogs Dataset/annotations.py | 10 ++++++++++
 Stanford Dogs Dataset/upload.py      | 19 +++++++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 Stanford Dogs Dataset/README.md
 create mode 100644 Stanford Dogs Dataset/annotations.py
 create mode 100644 Stanford Dogs Dataset/upload.py

diff --git a/Stanford Dogs Dataset/README.md b/Stanford Dogs Dataset/README.md
new file mode 100644
index 0000000..81638e4
--- /dev/null
+++ b/Stanford Dogs Dataset/README.md	
@@ -0,0 +1,25 @@
+# Stanford Dogs Dataset
+
+## Get the Dataset
+
+```
+Download the dataset from http://vision.stanford.edu/aditya86/ImageNetDogs/
+```
+
+## Extract the annotations from .tar file
+
+```
+Run annotations.py to get the annotations from the tar file
+
+Run the following command in the annotations folder to convert the annotations to .xml format  
+find . -type f -exec mv '{}' '{}'.xml \;
+
+```
+
+## Make the annotations compatible to the corresponding images
+
+```
+
+Run upload.py to make the annotations of bounding box compatable to upload. It includes pre-processing of folder/filenames of the annotations matching as the images.
+
+```
\ No newline at end of file
diff --git a/Stanford Dogs Dataset/annotations.py b/Stanford Dogs Dataset/annotations.py
new file mode 100644
index 0000000..1a4a25a
--- /dev/null
+++ b/Stanford Dogs Dataset/annotations.py	
@@ -0,0 +1,10 @@
+import tarfile
+my_tar = tarfile.open('annotation.tar')
+my_tar.extractall('./annotations') # specify which folder to extract to
+my_tar.close()
+
+
+import tarfile
+my_tar = tarfile.open('images.tar')
+my_tar.extractall('./images') # specify which folder to extract to
+my_tar.close()
\ No newline at end of file
diff --git a/Stanford Dogs Dataset/upload.py b/Stanford Dogs Dataset/upload.py
new file mode 100644
index 0000000..412e41a
--- /dev/null
+++ b/Stanford Dogs Dataset/upload.py	
@@ -0,0 +1,19 @@
+import xml.etree.ElementTree as ET
+
+
+import os
+directory = 'annotations/Annotation'
+for filename in os.listdir(directory):
+    f = os.path.join(directory, filename)
+    for files in os.listdir(f):
+        f1 = os.path.join(f,files)
+        mytree = ET.parse(f)
+        myroot = mytree.getroot()
+        classname = ''
+        for files in myroot.iter('folder'):
+            files.text = ''
+        for file in myroot.iter('filename'):
+            classname = file.text
+            file.text = classname + '.jpg'
+
+        mytree.write(f1)
\ No newline at end of file