From 2a24b1246d680c0859f193b23c57a9aaf756e2ae Mon Sep 17 00:00:00 2001 From: Pragati Baheti Date: Mon, 3 Jan 2022 16:41:51 +0530 Subject: [PATCH] Stanford dogs dataset upload helper files --- Stanford Dogs Dataset/README.md | 25 +++++++++++++++++++++++++ Stanford Dogs Dataset/annotations.py | 10 ++++++++++ Stanford Dogs Dataset/upload.py | 19 +++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 Stanford Dogs Dataset/README.md create mode 100644 Stanford Dogs Dataset/annotations.py create mode 100644 Stanford Dogs Dataset/upload.py diff --git a/Stanford Dogs Dataset/README.md b/Stanford Dogs Dataset/README.md new file mode 100644 index 0000000..81638e4 --- /dev/null +++ b/Stanford Dogs Dataset/README.md @@ -0,0 +1,25 @@ +# Stanford Dogs Dataset + +## Get the Dataset + +``` +Download the dataset from http://vision.stanford.edu/aditya86/ImageNetDogs/ +``` + +## Extract the annotations from .tar file + +``` +Run annotations.py to get the annotations from the tar file + +Run the following command in the annotations folder to convert the annotations to .xml format +find . -type f -exec mv '{}' '{}'.xml \; + +``` + +## Make the annotations compatible to the corresponding images + +``` + +Run upload.py to make the annotations of bounding box compatable to upload. It includes pre-processing of folder/filenames of the annotations matching as the images. + +``` \ No newline at end of file diff --git a/Stanford Dogs Dataset/annotations.py b/Stanford Dogs Dataset/annotations.py new file mode 100644 index 0000000..1a4a25a --- /dev/null +++ b/Stanford Dogs Dataset/annotations.py @@ -0,0 +1,10 @@ +import tarfile +my_tar = tarfile.open('annotation.tar') +my_tar.extractall('./annotations') # specify which folder to extract to +my_tar.close() + + +import tarfile +my_tar = tarfile.open('images.tar') +my_tar.extractall('./images') # specify which folder to extract to +my_tar.close() \ No newline at end of file diff --git a/Stanford Dogs Dataset/upload.py b/Stanford Dogs Dataset/upload.py new file mode 100644 index 0000000..412e41a --- /dev/null +++ b/Stanford Dogs Dataset/upload.py @@ -0,0 +1,19 @@ +import xml.etree.ElementTree as ET + + +import os +directory = 'annotations/Annotation' +for filename in os.listdir(directory): + f = os.path.join(directory, filename) + for files in os.listdir(f): + f1 = os.path.join(f,files) + mytree = ET.parse(f) + myroot = mytree.getroot() + classname = '' + for files in myroot.iter('folder'): + files.text = '' + for file in myroot.iter('filename'): + classname = file.text + file.text = classname + '.jpg' + + mytree.write(f1) \ No newline at end of file