Skip to content

Commit

Permalink
nn works, but not as anticipated
Browse files Browse the repository at this point in the history
  • Loading branch information
shivendrra committed Aug 8, 2024
1 parent a14ac18 commit 7f863d1
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 0 deletions.
67 changes: 67 additions & 0 deletions csrc/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "nn.h"
#include "scalar.h"
#include <stdio.h>
#include <stdlib.h>

int main() {
double xs[4][3] = {
{2.0, 3.0, -1.0},
{3.0, 0.0, -0.5},
{0.5, 1.0, 1.0},
{1.0, 1.0, -1.0}
};
double ys[] = {1.0, -1.0, -1.0, 1.0};

size_t layers[] = {4, 4, 1};
MLP* model = init_mlp(3, layers, 3);

int epochs = 100;
double learning_rate = 0.01;

for (int k = 0; k < epochs; ++k) {
scalar loss = initialize_scalars(0.0, NULL, 0);

for (int i = 0; i < 4; ++i) {
tensor input = (tensor)malloc(3 * sizeof(scalar));
for (int j = 0; j < 3; ++j) {
input[j] = initialize_scalars(xs[i][j], NULL, 0);
}
tensor output = mlp_forward(model, input);
scalar target = initialize_scalars(ys[i], NULL, 0);

scalar error = sub_val(output[0], target);
scalar error_squared = mul_val(error, error);
scalar temp = add_val(loss, error_squared);
loss = temp;

free(input);
}

zero_grad(model);
backward(loss);

size_t param_count;
tensor params = mlp_parameters(model, &param_count);
for (size_t i = 0; i < param_count; ++i) {
params[i]->data -= learning_rate * params[i]->grad;
}

printf("Epoch %d -> Loss: %f\n", k, loss->data);
}

printf("Final outputs:\n");
for (int i = 0; i < 4; ++i) {
tensor input = (tensor)malloc(3 * sizeof(scalar));
for (int j = 0; j < 3; ++j) {
input[j] = initialize_scalars(xs[i][j], NULL, 0);
}
tensor output = mlp_forward(model, input);
printf("[%d] %f\n", i, output[0]->data);

free(input);
}

mlp_free(model);

return 0;
}
119 changes: 119 additions & 0 deletions csrc/nn.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#include "nn.h"
#include "scalar.h"
#include <stdlib.h>
#include <stdio.h>

Neuron* init_neuron(const size_t n_in, int nonlin) {
Neuron* neuron = (Neuron*)malloc(sizeof(Neuron));
neuron->wei = (scalar*)malloc(n_in * sizeof(scalar));
neuron->bias = initialize_scalars(0.0, NULL, 0);
neuron->n_in = n_in;
neuron->nonlin = nonlin;

for (size_t i = 0; i < n_in; ++i) {
neuron->wei[i] = initialize_scalars((double)rand() / RAND_MAX * 2 - 1, NULL, 0);
}
return neuron;
}

scalar neuron_forward(Neuron* neuron, tensor inputs) {
scalar sum = initialize_scalars(neuron->bias->data, NULL, 0);
for (size_t i = 0; i < neuron->n_in; ++i) {
Scalar* prod = mul_val(neuron->wei[i], inputs[i]);
Scalar* temp = add_val(sum, prod);
free(prod);
free(sum);
sum = temp;
}
if (neuron->nonlin) {
Scalar* temp = relu(sum);
free(sum);
return temp;
}
return sum;
}

Layer* init_layer(const size_t n_in, const size_t n_out, int nonlin) {
Layer* layer = (Layer*)malloc(sizeof(Layer));
layer->neurons = (Neuron*)malloc(n_out * sizeof(Neuron));
layer->n_neurons = n_out;

for (size_t i = 0; i < n_out; ++i) {
layer->neurons[i] = *init_neuron(n_in, nonlin);
}
return layer;
}

tensor layer_forward(Layer* layer, tensor inputs) {
tensor outputs = (tensor)malloc(layer->n_neurons * sizeof(scalar));
for (size_t i = 0; i < layer->n_neurons; ++i) {
outputs[i] = neuron_forward(&layer->neurons[i], inputs);
}
return outputs;
}

MLP* init_mlp(const size_t n_in, const size_t* n_out, size_t n_layers) {
MLP* mlp = (MLP*)malloc(sizeof(MLP));
mlp->layers = (Layer*)malloc(n_layers * sizeof(Layer));
mlp->n_layers = n_layers;

mlp->layers[0] = *init_layer(n_in, n_out[0], 1);
for (size_t i = 1; i < n_layers; ++i) {
mlp->layers[i] = *init_layer(n_out[i-1], n_out[i], i != n_layers - 1);
}
return mlp;
}

tensor mlp_forward(MLP* mlp, tensor inputs) {
tensor output = inputs;
for (size_t i = 0; i < mlp->n_layers; ++i) {
tensor temp = layer_forward(&mlp->layers[i], output);
if (i > 0) free(output);
output = temp;
}
return output;
}

void mlp_free(MLP* mlp) {
for (size_t i = 0; i < mlp->n_layers; ++i) {
for (size_t j = 0; j < mlp->layers[i].n_neurons; ++j) {
free(mlp->layers[i].neurons[j].wei);
free(mlp->layers[i].neurons[j].bias);
}
free(mlp->layers[i].neurons);
}
free(mlp->layers);
free(mlp);
}

void zero_grad(MLP* mlp) {
for (size_t i = 0; i < mlp->n_layers; ++i) {
for (size_t j = 0; j < mlp->layers[i].n_neurons; ++j) {
for (size_t k = 0; k < mlp->layers[i].neurons[j].n_in; ++k) {
mlp->layers[i].neurons[j].wei[k]->grad = 0.0;
}
mlp->layers[i].neurons[j].bias->grad = 0.0;
}
}
}

tensor mlp_parameters(MLP* mlp, size_t* param_count) {
*param_count = 0;
for (size_t i = 0; i < mlp->n_layers; ++i) {
for (size_t j = 0; j < mlp->layers[i].n_neurons; ++j) {
*param_count += mlp->layers[i].neurons[j].n_in + 1; // weights + bias
}
}

tensor params = (tensor)malloc(*param_count * sizeof(scalar));
size_t index = 0;
for (size_t i = 0; i < mlp->n_layers; ++i) {
for (size_t j = 0; j < mlp->layers[i].n_neurons; ++j) {
for (size_t k = 0; k < mlp->layers[i].neurons[j].n_in; ++k) {
params[index++] = mlp->layers[i].neurons[j].wei[k];
}
params[index++] = mlp->layers[i].neurons[j].bias;
}
}
return params;
}
36 changes: 36 additions & 0 deletions csrc/nn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef NN_H
#define NN_H

#include "scalar.h"

typedef struct Neuron {
tensor wei; // weights
scalar bias;
size_t n_in;
int nonlin; // indicates if non-linearity is applied
} Neuron;

typedef struct Layer {
Neuron* neurons;
size_t n_neurons;
} Layer;

typedef struct MLP {
Layer* layers;
size_t n_layers;
} MLP;

Neuron* init_neuron(const size_t n_in, int nonlin);
scalar neuron_forward(Neuron* neuron, tensor inputs);

Layer* init_layer(const size_t n_in, const size_t n_out, int nonlin);
tensor layer_forward(Layer* layer, tensor inputs);

MLP* init_mlp(const size_t n_in, const size_t* n_out, size_t n_layers);
tensor mlp_forward(MLP* mlp, tensor inputs);
void mlp_free(MLP* mlp);

void zero_grad(MLP* mlp);
tensor mlp_parameters(MLP* mlp, size_t* param_count);

#endif

0 comments on commit 7f863d1

Please sign in to comment.