Skip to content

Commit

Permalink
init commit: Eigen vectors, LR from ground up and Decision trees
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunprakash027 committed Dec 8, 2024
1 parent cac305c commit 36e57b8
Show file tree
Hide file tree
Showing 20 changed files with 6,273 additions and 0 deletions.
Binary file added Image_Compression/data/d1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
772 changes: 772 additions & 0 deletions Image_Compression/notebooks/clustering.ipynb

Large diffs are not rendered by default.

792 changes: 792 additions & 0 deletions Image_Compression/notebooks/first_attempt.ipynb

Large diffs are not rendered by default.

48 changes: 48 additions & 0 deletions logistic_regression_ground_up/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"files.associations": {
"vector": "cpp",
"iostream": "cpp",
"array": "cpp",
"atomic": "cpp",
"bit": "cpp",
"*.tcc": "cpp",
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"random": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"initializer_list": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp"
}
}
22 changes: 22 additions & 0 deletions logistic_regression_ground_up/LinearModels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include <pybind11/pybind11.h>
#include <pybind11/stl.h> // Required for automatic conversion between Python list and C++ vector
#include "logisticRegression.hpp"

PYBIND11_MODULE(LinearModels,m) {
m.doc() = "LinearModels Module written in CPP interface in python";
m.def("logreg_fit",&LogisticRegression::fit,"Function to fit a logistic regression model");
}














Binary file added logistic_regression_ground_up/LinearModels.o
Binary file not shown.
Binary file added logistic_regression_ground_up/LinearModels.so
Binary file not shown.
12 changes: 12 additions & 0 deletions logistic_regression_ground_up/archive/array_utilities.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include <iostream>
#include "array_utilities.hpp"
#include <vector>

int array_utilities::sum(const std::vector<int>& vec){
int total = 0;
for (size_t i = 0; i < vec.size(); i++){
total += vec[i];
}
return total;
}

12 changes: 12 additions & 0 deletions logistic_regression_ground_up/archive/array_utilities.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef ARRAY_UTILITIES_H
#define ARRAY_UTILITIES_H

#include <string>
#include <vector>

class array_utilities {
public:
static int sum(const std::vector<int>& vec);
};

#endif
14 changes: 14 additions & 0 deletions logistic_regression_ground_up/archive/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <iostream>
#include <vector>
#include "array_utilities.hpp"

int main() {
std::vector<int> values = {1,4,6,7};
int result = array_utilities::sum(values);
std::cout << "Sum value = " << result << std::endl;
return 0;
}




18 changes: 18 additions & 0 deletions logistic_regression_ground_up/archive/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CXX = g++

CXXFLAGS = -Wall -Wextra -std=c++11

SRCS = array_utilities.cpp main.cpp

OBJS = $(SRCS:.cpp=.o)

TARGET = main

$(TARGET) : $(OBJS)
$(CXX) $(OBJS) -o $(TARGET)

%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@

clean:
rm -f $(OBJS)
22 changes: 22 additions & 0 deletions logistic_regression_ground_up/archive/py_bind.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include <pybind11/pybind11.h>
#include <pybind11/stl.h> // Required for automatic conversion between Python list and C++ vector
#include "array_utilities.hpp"

PYBIND11_MODULE(example,m) {
m.doc() = "Vector Addition Module";
m.def("sumcpp",&array_utilities::sum,"Function to add all values in a array");
}














137 changes: 137 additions & 0 deletions logistic_regression_ground_up/logisticRegression.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#include <iostream>
#include "logisticRegression.hpp"
#include <vector>
#include <stdexcept>
#include <cmath>

int checks(const std::vector<std::vector<double> >& X_Train, const std::vector<double>& Y_Train) {

std::size_t YSize = Y_Train.size();
std::size_t XDim = X_Train.size();

// Initial dimentionality and binomial checks
for (std::size_t i=0; i < XDim; ++i) {
if (YSize != X_Train[i].size()){
throw std::runtime_error("Error : Input and Output Features are of different sizes");
}
}

for (std::size_t target_val : Y_Train) {
if (target_val != 0 && target_val != 1) {
throw std::runtime_error("Error : Target variable can only be binary (0 or 1)");
}
}

return 0;
}

std::vector<double> calculate_linear_output (const std::vector<std::vector<double> >& X_Train, const std::vector<double>& W) {
std::size_t RecordSize = X_Train[0].size();
std::size_t FieldSize = X_Train.size();

// Declaring the output variable where the output gets saved to
std::vector<double> Z(RecordSize, 0.0);

for (std::size_t records = 0; records < RecordSize; ++records) {
for (std::size_t fields = 0; fields < FieldSize; ++fields) {
Z[records] += X_Train[fields][records] * W[fields];
}

}

return Z;
}

int sigmoid_function (std::vector<double>& Z) {

for (std::size_t outs = 0; outs < Z.size(); ++outs) {
Z[outs] = 1 / (1 + std::exp(-(Z[outs])));
}

return 0;
}

double log_loss (const std::vector<double>& Z, const std::vector<double>& Actual) {
std::size_t num_elements = Z.size();
double loss = 0.0;

for (std::size_t outs = 0; outs < num_elements; ++outs) {

double actual_outcome = Actual[outs];
double predicted_prob = Z[outs];
loss += - ((actual_outcome * std::log(predicted_prob)) + ((1.0 - actual_outcome) * std::log(1.0 - predicted_prob)));
}

return loss / num_elements;
}

std::vector<double> calculate_gradient (const std::size_t& Xdim, const std::size_t& YSize, const std::vector<double> Z, const std::vector<std::vector<double> >& X, const std::vector<double>& Y) {

std::vector<double> gradients(Xdim, 0.0);

for (std::size_t records = 0; records < YSize; ++records) {
for (std::size_t fields = 0; fields < Xdim; ++fields) {
gradients[fields] += ((Z[records] - Y[records]) * X[fields][records]) / YSize;
}
}

return gradients;
}

int update_weights (double& alpha,std::vector<double>& W, const std::vector<double>& gradient) {

for (std::size_t weight = 0; weight < W.size(); ++weight) {
W[weight] -= alpha * gradient[weight];
}

return 0;
}

int LogisticRegression::fit(const std::vector<std::vector<double> >& X_Train, const std::vector<double>& Y_Train, double learning_rate, std::size_t epochs) {

std::size_t YSize = Y_Train.size();
std::size_t XDim = X_Train.size();

// Initial dimentionality and binomial checks
checks(X_Train,Y_Train);
// Print dimensions of X_Train
std::cout << "X_Train dimensions: " << XDim << "x" << (X_Train.empty() ? 0 : X_Train[0].size()) << std::endl;
// Print number of elements in Y_Train
std::cout << "Y_Train size: " << YSize << std::endl;


// Initializing the weights for linear function
std::vector<double> w(XDim, 0.5);


for (std::size_t iter = 0; iter < epochs; ++iter) {
// Calculate the linear estimator value (z = summation(x*w))
std::vector<double> Z(YSize, 0.0);
Z = calculate_linear_output(X_Train,w);
sigmoid_function(Z);

// Calculate the gradient of loss function
std::vector<double> gradient(XDim, 0.0);
gradient = calculate_gradient(XDim,YSize,Z,X_Train,Y_Train);

// Update weights
update_weights(learning_rate, w, gradient);

// Calculate the log loss (Binary cross entropy)
double error = 0.0;
error = log_loss(Z,Y_Train);

std::cout << "Epoch: " << iter + 1 << ", Error: " << error << ", Weights: ";
for (const auto& weight : w) {
std::cout << weight << " ";
}
std::cout << ", Gradient: ";
for (const auto& grad : gradient) {
std::cout << grad << " ";
}
std::cout << std::endl;
}


return 0;
}
19 changes: 19 additions & 0 deletions logistic_regression_ground_up/logisticRegression.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef ARRAY_UTILITIES_HPP
#define ARRAY_UTILITIES_HPP

#include <string>
#include <vector>

class LogisticRegression {
public:
static int fit (const std::vector<std::vector<double> >& X_Train, const std::vector<double>& Y_Train, double learning_rate, std::size_t epochs);
};

int checks(const std::vector<std::vector<double> >& X_Train, const std::vector<double>& Y_Train);
std::vector<double> calculate_linear_output (const std::vector<std::vector<double> >& X_Train, const std::vector<double>& W);
int sigmoid_function (std::vector<double>& Z);
double log_loss (const std::vector<double>& Z, const std::vector<double>& Actual);
std::vector<double> calculate_gradient (const std::vector<double>& Z, const std::vector<double>& X, const std::vector<double>& Y);
int update_weights (double& alpha,std::vector<double>& W, const std::vector<double>& gradient);

#endif
Binary file not shown.
19 changes: 19 additions & 0 deletions logistic_regression_ground_up/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Compiler and flags
CXX = g++
CXXFLAGS = -O3 -Wall -shared -std=c++11 -fPIC
PYBIND_FLAGS = `python3 -m pybind11 --includes`
PYTHON_CONFIG = `python3-config --extension-suffix`

# Source files
SRCS = logisticRegression.cpp LinearModels.cpp

# Output file name
TARGET = LinearModels$(PYTHON_CONFIG)

# Rule to create the shared object
$(TARGET): $(SRCS)
$(CXX) $(CXXFLAGS) $(PYBIND_FLAGS) $(SRCS) -o $(TARGET)

# Clean rule to remove generated files
clean:
rm -f $(TARGET)
34 changes: 34 additions & 0 deletions logistic_regression_ground_up/makefile_mac
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Define variables
PYTHON_VERSION = 3.13
PYTHON_INCLUDE_DIR = /opt/homebrew/opt/python@$(PYTHON_VERSION)/Frameworks/Python.framework/Versions/$(PYTHON_VERSION)/include/python$(PYTHON_VERSION)
PYTHON_LIB_DIR = /opt/homebrew/opt/python@$(PYTHON_VERSION)/Frameworks/Python.framework/Versions/$(PYTHON_VERSION)/lib/python$(PYTHON_VERSION)/config-$(PYTHON_VERSION)-darwin
PYTHON_LIB = /opt/homebrew/opt/python@$(PYTHON_VERSION)/Frameworks/Python.framework/Versions/$(PYTHON_VERSION)/lib/libpython$(PYTHON_VERSION).dylib

# Set compiler flags for C++ (for arm64 architecture)
CXXFLAGS = -std=c++17 -Wall -g -O3 -fPIC -arch arm64 -I$(PYTHON_INCLUDE_DIR) -I/opt/homebrew/lib/python$(PYTHON_VERSION)/site-packages/pybind11/include -isystem /Library/Developer/CommandLineTools/SDKs/MacOSX15.1.sdk/usr/include/c++/v1/
LDFLAGS = -L$(PYTHON_LIB_DIR) -lpython$(PYTHON_VERSION) -framework CoreFoundation -ldl

# Define source and object files
SRCS = LinearModels.cpp logisticRegression.cpp
OBJS = $(SRCS:.cpp=.o)

# Output shared library name
TARGET = LinearModels.so

# Default target to build
all: $(TARGET)

# Rule for building the shared library
$(TARGET): $(OBJS)
$(CXX) $(LDFLAGS) -shared -o $@ $^

# Rule for compiling .cpp files to .o object files
%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@

# Clean object files and the shared library
clean:
rm -f $(OBJS) $(TARGET)

# Phony targets
.PHONY: all clean
3 changes: 3 additions & 0 deletions logistic_regression_ground_up/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import LinearModels as lm

print(lm.logreg_fit([[2,3,4,6],[2,3,4,5]],[0,1,0,1],0.1,100))
Loading

0 comments on commit 36e57b8

Please sign in to comment.