first commit

kkalkidan · Oct 7, 2019 · 15a9417 · 15a9417
commit 15a9417
Show file tree

Hide file tree

Showing 115 changed files with 54,647 additions and 0 deletions.
diff --git a/assignment1/.gitignore b/assignment1/.gitignore
@@ -0,0 +1,7 @@
+*.swp
+*.pyc
+.env/*
+*.ipynb_checkpoints/*
+
+# gitignore the built release.
+assignment3/*
diff --git a/assignment1/README.md b/assignment1/README.md
@@ -0,0 +1 @@
+Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2019.
diff --git a/assignment1/collectSubmission.sh b/assignment1/collectSubmission.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS
+
+NOTEBOOKS="knn.ipynb 
+svm.ipynb 
+softmax.ipynb 
+two_layer_net.ipynb 
+features.ipynb"
+
+CODE="cs231n/classifiers/k_nearest_neighbor.py
+cs231n/classifiers/linear_classifier.py
+cs231n/classifiers/linear_svm.py
+cs231n/classifiers/softmax.py
+cs231n/classifiers/neural_net.py"
+
+LOCAL_DIR=`pwd`
+REMOTE_DIR="cs231n-2019-assignment1"
+ASSIGNMENT_NO=1
+ZIP_FILENAME="a1.zip"
+
+C_R="\e[31m"
+C_G="\e[32m"
+C_BLD="\e[1m"
+C_E="\e[0m"
+
+FILES=""
+for FILE in "${NOTEBOOKS} ${CODE}"
+do
+	if [ ! -f ${F} ]; then
+		echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}"
+		exit 0
+	fi
+	FILES="${FILES} ${LOCAL_DIR}/${FILE}"
+done
+
+echo -e "${C_BLD}### Zipping file ###${C_E}"
+rm -f ${ZIP_FILENAME}
+zip -r ${ZIP_FILENAME} . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "collectSubmission.sh" "*requirements.txt" "*__pycache__*" ".env/*" > assignment_zip.log
+echo ""
+
+echo -e "${C_BLD}### Submitting to myth ###${C_E}"
+echo "Type in your Stanford student ID (alphanumeric, *not* the 8-digit ID):"
+read -p "Student ID: " SUID
+echo ""
+
+echo -e "${C_BLD}### Copying to ${SUID}@myth.stanford.edu:${REMOTE_DIR} ###${C_E}"
+echo -e "${C_G}Note: if myth is under heavy use, this may hang: If this happens, rerun the script.${C_E}"
+FILES="${FILES} ${LOCAL_DIR}/${ZIP_FILENAME}"
+rsync -avP ${FILES} ${SUID}@myth.stanford.edu:${REMOTE_DIR}
+echo ""
+
+echo -e "${C_BLD}### Running remote submission script from ${SUID}@myth.stanford.edu:${REMOTE_DIR} ###${C_E}"
+ssh ${SUID}@myth.stanford.edu "cd ${REMOTE_DIR} && /afs/ir/class/cs231n/grading/submit ${ASSIGNMENT_NO} ${SUID} ${ZIP_FILENAME} && exit"
diff --git a/assignment1/cs231n/__init__.py b/assignment1/cs231n/__init__.py
diff --git a/assignment1/cs231n/classifiers/__init__.py b/assignment1/cs231n/classifiers/__init__.py
@@ -0,0 +1,2 @@
+from cs231n.classifiers.k_nearest_neighbor import *
+from cs231n.classifiers.linear_classifier import *
diff --git a/assignment1/cs231n/classifiers/k_nearest_neighbor.py b/assignment1/cs231n/classifiers/k_nearest_neighbor.py
@@ -0,0 +1,190 @@
+from builtins import range
+from builtins import object
+import numpy as np
+from past.builtins import xrange
+
+
+class KNearestNeighbor(object):
+    """ a kNN classifier with L2 distance """
+
+    def __init__(self):
+        pass
+
+    def train(self, X, y):
+        """
+        Train the classifier. For k-nearest neighbors this is just
+        memorizing the training data.
+
+        Inputs:
+        - X: A numpy array of shape (num_train, D) containing the training data
+          consisting of num_train samples each of dimension D.
+        - y: A numpy array of shape (N,) containing the training labels, where
+             y[i] is the label for X[i].
+        """
+        self.X_train = X
+        self.y_train = y
+
+    def predict(self, X, k=1, num_loops=0):
+        """
+        Predict labels for test data using this classifier.
+
+        Inputs:
+        - X: A numpy array of shape (num_test, D) containing test data consisting
+             of num_test samples each of dimension D.
+        - k: The number of nearest neighbors that vote for the predicted labels.
+        - num_loops: Determines which implementation to use to compute distances
+          between training points and testing points.
+
+        Returns:
+        - y: A numpy array of shape (num_test,) containing predicted labels for the
+          test data, where y[i] is the predicted label for the test point X[i].
+        """
+        if num_loops == 0:
+            dists = self.compute_distances_no_loops(X)
+        elif num_loops == 1:
+            dists = self.compute_distances_one_loop(X)
+        elif num_loops == 2:
+            dists = self.compute_distances_two_loops(X)
+        else:
+            raise ValueError('Invalid value %d for num_loops' % num_loops)
+
+        return self.predict_labels(dists, k=k)
+
+    def compute_distances_two_loops(self, X):
+        """
+        Compute the distance between each test point in X and each training point
+        in self.X_train using a nested loop over both the training data and the
+        test data.
+
+        Inputs:
+        - X: A numpy array of shape (num_test, D) containing test data.
+
+        Returns:
+        - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
+          is the Euclidean distance between the ith test point and the jth training
+          point.
+        """
+        num_test = X.shape[0]
+        num_train = self.X_train.shape[0]
+        dists = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            for j in range(num_train):
+                #####################################################################
+                # TODO:                                                             #
+                # Compute the l2 distance between the ith test point and the jth    #
+                # training point, and store the result in dists[i, j]. You should   #
+                # not use a loop over dimension, nor use np.linalg.norm().          #
+                #####################################################################
+                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+                dists[i][j] = np.sum((X[i] - self.X_train[j])**2) ** 0.5
+
+                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return dists
+
+    def compute_distances_one_loop(self, X):
+        """
+        Compute the distance between each test point in X and each training point
+        in self.X_train using a single loop over the test data.
+
+        Input / Output: Same as compute_distances_two_loops
+        """
+        num_test = X.shape[0]
+        num_train = self.X_train.shape[0]
+        dists = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            #######################################################################
+            # TODO:                                                               #
+            # Compute the l2 distance between the ith test point and all training #
+            # points, and store the result in dists[i, :].                        #
+            # Do not use np.linalg.norm().                                        #
+            #######################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+            dists[i, :] = np.sqrt(np.sum(np.square(self.X_train - X[i]), axis=1))
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return dists
+
+    def compute_distances_no_loops(self, X):
+        """
+        Compute the distance between each test point in X and each training point
+        in self.X_train using no explicit loops.
+
+        Input / Output: Same as compute_distances_two_loops
+        """
+        num_test = X.shape[0]
+        num_train = self.X_train.shape[0]
+        dists = np.zeros((num_test, num_train))
+        #########################################################################
+        # TODO:                                                                 #
+        # Compute the l2 distance between all test points and all training      #
+        # points without using any explicit loops, and store the result in      #
+        # dists.                                                                #
+        #                                                                       #
+        # You should implement this function using only basic array operations; #
+        # in particular you should not use functions from scipy,                #
+        # nor use np.linalg.norm().                                             #
+        #                                                                       #
+        # HINT: Try to formulate the l2 distance using matrix multiplication    #
+        #       and two broadcast sums.                                         #
+        #########################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        # Euclidian sum = sum(a**2) - 2*a.b + sum(b**2)
+
+        a = np.sum(np.square(X).reshape(X.shape[0], 1, X.shape[1]), axis=2)
+
+        b = np.sum(np.square(self.X_train), axis =1)
+
+        dot_product = 2*(X.dot(self.X_train.T))
+
+        dists = np.sqrt(a - dot_product + b)
+
+
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return dists
+
+    def predict_labels(self, dists, k=1):
+        """
+        Given a matrix of distances between test points and training points,
+        predict a label for each test point.
+
+        Inputs:
+        - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
+          gives the distance betwen the ith test point and the jth training point.
+
+        Returns:
+        - y: A numpy array of shape (num_test,) containing predicted labels for the
+          test data, where y[i] is the predicted label for the test point X[i].
+        """
+        num_test = dists.shape[0]
+        y_pred = np.zeros(num_test)
+        for i in range(num_test):
+            # A list of length k storing the labels of the k nearest neighbors to
+            # the ith test point.
+            closest_y = []
+            #########################################################################
+            # TODO:                                                                 #
+            # Use the distance matrix to find the k nearest neighbors of the ith    #
+            # testing point, and use self.y_train to find the labels of these       #
+            # neighbors. Store these labels in closest_y.                           #
+            # Hint: Look up the function numpy.argsort.                             #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+            k_nearest_idx = np.argsort(dists[i])[:k]
+            closest_y = self.y_train[k_nearest_idx]
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+            #########################################################################
+            # TODO:                                                                 #
+            # Now that you have found the labels of the k nearest neighbors, you    #
+            # need to find the most common label in the list closest_y of labels.   #
+            # Store this label in y_pred[i]. Break ties by choosing the smaller     #
+            # label.                                                                #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            y_pred[i] = np.argmax(np.bincount(closest_y))
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        return y_pred
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2019.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from cs231n.classifiers.k_nearest_neighbor import *
		from cs231n.classifiers.linear_classifier import *