Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kkalkidan committed Oct 7, 2019
0 parents commit 15a9417
Show file tree
Hide file tree
Showing 115 changed files with 54,647 additions and 0 deletions.
7 changes: 7 additions & 0 deletions assignment1/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
*.swp
*.pyc
.env/*
*.ipynb_checkpoints/*

# gitignore the built release.
assignment3/*
1 change: 1 addition & 0 deletions assignment1/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2019.
53 changes: 53 additions & 0 deletions assignment1/collectSubmission.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
#NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS

NOTEBOOKS="knn.ipynb
svm.ipynb
softmax.ipynb
two_layer_net.ipynb
features.ipynb"

CODE="cs231n/classifiers/k_nearest_neighbor.py
cs231n/classifiers/linear_classifier.py
cs231n/classifiers/linear_svm.py
cs231n/classifiers/softmax.py
cs231n/classifiers/neural_net.py"

LOCAL_DIR=`pwd`
REMOTE_DIR="cs231n-2019-assignment1"
ASSIGNMENT_NO=1
ZIP_FILENAME="a1.zip"

C_R="\e[31m"
C_G="\e[32m"
C_BLD="\e[1m"
C_E="\e[0m"

FILES=""
for FILE in "${NOTEBOOKS} ${CODE}"
do
if [ ! -f ${F} ]; then
echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}"
exit 0
fi
FILES="${FILES} ${LOCAL_DIR}/${FILE}"
done

echo -e "${C_BLD}### Zipping file ###${C_E}"
rm -f ${ZIP_FILENAME}
zip -r ${ZIP_FILENAME} . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "collectSubmission.sh" "*requirements.txt" "*__pycache__*" ".env/*" > assignment_zip.log
echo ""

echo -e "${C_BLD}### Submitting to myth ###${C_E}"
echo "Type in your Stanford student ID (alphanumeric, *not* the 8-digit ID):"
read -p "Student ID: " SUID
echo ""

echo -e "${C_BLD}### Copying to ${SUID}@myth.stanford.edu:${REMOTE_DIR} ###${C_E}"
echo -e "${C_G}Note: if myth is under heavy use, this may hang: If this happens, rerun the script.${C_E}"
FILES="${FILES} ${LOCAL_DIR}/${ZIP_FILENAME}"
rsync -avP ${FILES} ${SUID}@myth.stanford.edu:${REMOTE_DIR}
echo ""

echo -e "${C_BLD}### Running remote submission script from ${SUID}@myth.stanford.edu:${REMOTE_DIR} ###${C_E}"
ssh ${SUID}@myth.stanford.edu "cd ${REMOTE_DIR} && /afs/ir/class/cs231n/grading/submit ${ASSIGNMENT_NO} ${SUID} ${ZIP_FILENAME} && exit"
Empty file added assignment1/cs231n/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions assignment1/cs231n/classifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from cs231n.classifiers.k_nearest_neighbor import *
from cs231n.classifiers.linear_classifier import *
190 changes: 190 additions & 0 deletions assignment1/cs231n/classifiers/k_nearest_neighbor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
from builtins import range
from builtins import object
import numpy as np
from past.builtins import xrange


class KNearestNeighbor(object):
""" a kNN classifier with L2 distance """

def __init__(self):
pass

def train(self, X, y):
"""
Train the classifier. For k-nearest neighbors this is just
memorizing the training data.
Inputs:
- X: A numpy array of shape (num_train, D) containing the training data
consisting of num_train samples each of dimension D.
- y: A numpy array of shape (N,) containing the training labels, where
y[i] is the label for X[i].
"""
self.X_train = X
self.y_train = y

def predict(self, X, k=1, num_loops=0):
"""
Predict labels for test data using this classifier.
Inputs:
- X: A numpy array of shape (num_test, D) containing test data consisting
of num_test samples each of dimension D.
- k: The number of nearest neighbors that vote for the predicted labels.
- num_loops: Determines which implementation to use to compute distances
between training points and testing points.
Returns:
- y: A numpy array of shape (num_test,) containing predicted labels for the
test data, where y[i] is the predicted label for the test point X[i].
"""
if num_loops == 0:
dists = self.compute_distances_no_loops(X)
elif num_loops == 1:
dists = self.compute_distances_one_loop(X)
elif num_loops == 2:
dists = self.compute_distances_two_loops(X)
else:
raise ValueError('Invalid value %d for num_loops' % num_loops)

return self.predict_labels(dists, k=k)

def compute_distances_two_loops(self, X):
"""
Compute the distance between each test point in X and each training point
in self.X_train using a nested loop over both the training data and the
test data.
Inputs:
- X: A numpy array of shape (num_test, D) containing test data.
Returns:
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
is the Euclidean distance between the ith test point and the jth training
point.
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
#####################################################################
# TODO: #
# Compute the l2 distance between the ith test point and the jth #
# training point, and store the result in dists[i, j]. You should #
# not use a loop over dimension, nor use np.linalg.norm(). #
#####################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
dists[i][j] = np.sum((X[i] - self.X_train[j])**2) ** 0.5

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return dists

def compute_distances_one_loop(self, X):
"""
Compute the distance between each test point in X and each training point
in self.X_train using a single loop over the test data.
Input / Output: Same as compute_distances_two_loops
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
#######################################################################
# TODO: #
# Compute the l2 distance between the ith test point and all training #
# points, and store the result in dists[i, :]. #
# Do not use np.linalg.norm(). #
#######################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
dists[i, :] = np.sqrt(np.sum(np.square(self.X_train - X[i]), axis=1))

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return dists

def compute_distances_no_loops(self, X):
"""
Compute the distance between each test point in X and each training point
in self.X_train using no explicit loops.
Input / Output: Same as compute_distances_two_loops
"""
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
#########################################################################
# TODO: #
# Compute the l2 distance between all test points and all training #
# points without using any explicit loops, and store the result in #
# dists. #
# #
# You should implement this function using only basic array operations; #
# in particular you should not use functions from scipy, #
# nor use np.linalg.norm(). #
# #
# HINT: Try to formulate the l2 distance using matrix multiplication #
# and two broadcast sums. #
#########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# Euclidian sum = sum(a**2) - 2*a.b + sum(b**2)

a = np.sum(np.square(X).reshape(X.shape[0], 1, X.shape[1]), axis=2)

b = np.sum(np.square(self.X_train), axis =1)

dot_product = 2*(X.dot(self.X_train.T))

dists = np.sqrt(a - dot_product + b)



# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return dists

def predict_labels(self, dists, k=1):
"""
Given a matrix of distances between test points and training points,
predict a label for each test point.
Inputs:
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
gives the distance betwen the ith test point and the jth training point.
Returns:
- y: A numpy array of shape (num_test,) containing predicted labels for the
test data, where y[i] is the predicted label for the test point X[i].
"""
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
# A list of length k storing the labels of the k nearest neighbors to
# the ith test point.
closest_y = []
#########################################################################
# TODO: #
# Use the distance matrix to find the k nearest neighbors of the ith #
# testing point, and use self.y_train to find the labels of these #
# neighbors. Store these labels in closest_y. #
# Hint: Look up the function numpy.argsort. #
#########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
k_nearest_idx = np.argsort(dists[i])[:k]
closest_y = self.y_train[k_nearest_idx]

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#########################################################################
# TODO: #
# Now that you have found the labels of the k nearest neighbors, you #
# need to find the most common label in the list closest_y of labels. #
# Store this label in y_pred[i]. Break ties by choosing the smaller #
# label. #
#########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

y_pred[i] = np.argmax(np.bincount(closest_y))

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

return y_pred
Loading

0 comments on commit 15a9417

Please sign in to comment.