-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconjugate_gradient_descent.cpp
104 lines (86 loc) · 3.36 KB
/
conjugate_gradient_descent.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include <bits/stdc++.h>
#include "matrix.h"
using namespace std;
/*
Conjugate Gradient Descent: (moves towards conjugate direction && solves exactly in n (n -> no. of variables) iterations)
--> 1. calculate ∇f(x) and H(x), set d_0 = 0, starting point: X_0
--> 2. Find the search direction d_k as: (d_k * H(x) * d_k-1 = 0)
--> d_k = -∇f(x_k) + B_K * d_k-1, where B_k = |∇f(x_k)|² / |∇f(x_k-1)|²
--> 3. Determine the optimal steplength:
--> a_k = ∇f(x_k) * ∇f(x_k) / d_k * H(x) * d_k
--> 4. x_k+1 = x_k + a_k * d_k
--> 5. Test the optimality for the newpoint x_k+1:
--> ∇f(x_k+1) = 0
--> If not reached, repeat step 2 to 5.
*/
int main() {
// A general 2 variable quadratic function - f(x) = a*x1*x1 + b*x2*x2 + c*x1*x2 + d*x1 + e*x2 + f
// Function to be minimized - f(x) = 2*x1*x1 + x2*x2 + 2*x1*x2 + 1*x1 + -1*x2 + 0
double a, b, c, d, e, f;
a = 2;
b = 1;
c = 2;
d = 1;
e = -1;
f = 0;
double x1, x2;
// Starting value
vector<vector<double>> x = {{0, 0}};
x1 = x[0][0];
x2 = x[0][1];
// Objective function
double f_x = a*x1*x1 + b*x2*x2 + c*x1*x2 + d*x1 + e*x2 + f;
// Partial derivative x1 - f'(x) = 2*a*x1 + c*x2 + d
// Partial derivative x2 - f'(x) = 2*b*x2 + c*x1 + e
double df_x1 = 2*a*x1 + c*x2 + d;
double df_x2 = 2*b*x2 + c*x1 + e;
// hessian matrix = [[2*a c][c 2*b]]
vector<vector<double>> h_x = {{2*a, c}, {c, 2*b}};
// Gradient matrix
vector<vector<double>> df_x = {{df_x1, df_x2}};
// Iteration count
int itr = 10;
cout << "Conjugate Gradient Descent: \n";
// Initialization
vector<vector<double>> d_k = {{0, 0}};
vector<vector<double>> df_x_prev = {{0, 0}}; // To store the previous df_x
double B_k = 0;
for(int i=1; i<=itr; i++) {
cout << "\nK: " << i << "\n";
cout << "------------------------------\n";
if(i>1) B_k = vector_magnitude_1xn_squared(df_x) / vector_magnitude_1xn_squared(df_x_prev);
vector<vector<double>> b_d_k = mx_constant_multiplication(B_k, d_k);
d_k = mx_addition(mx_constant_multiplication(-1, df_x), b_d_k);
double alpha = mx_to_constant(mx_multiplication(df_x, mx_transpose(df_x))) /
mx_to_constant(mx_multiplication(d_k, mx_multiplication(h_x, mx_transpose(d_k))));
x = mx_addition(x, mx_constant_multiplication(alpha, d_k));
// displaying result
cout <<"x"<<i<<": ";
print_matrix(x);
cout <<"alpha"<<i<<": "<<alpha<<"\n";
cout <<"d"<<i<<": ";
print_matrix(d_k);
cout <<"B_k"<<i<<": "<<B_k<<"\n";
cout << "\n";
x1 = x[0][0];
x2 = x[0][1];
df_x_prev = df_x; // storing the previous
df_x1 = 2*a*x1 + c*x2 + d;
df_x2 = 2*b*x2 + c*x1 + e;
df_x = {{df_x1, df_x2}};
cout << "∇f(x"<<i<<"): ";
print_matrix(df_x);
if(i>1){
cout << "∇f(x"<<i-1<<"): "; print_matrix(df_x_prev);
}
cout <<"------------------------------\n";
// optimality
if((df_x1 == 0 && df_x2 == 0) || (abs(df_x1) + abs(df_x2) <= 0.01)) {
f_x = a*x1*x1 + b*x2*x2 + c*x1*x2 + d*x1 + e*x2 + f;
cout << "\nOptimality Reached\n";
cout << "Min value is: " << f_x << "\n";
break;
}
}
return 0;
}