-
Notifications
You must be signed in to change notification settings - Fork 0
/
diff.mli
236 lines (180 loc) · 9.16 KB
/
diff.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
(** The [Diff] module calculates partial derivatives between different variables
represented by the var type through the use of backpropagation.
The value of a variable is stored in the value parameter.
The children variables that are used to create this variable through the
given operation are stored as an array in the children parameter. If the
variable is given (i.e. not the result of a operation), the children
parameter stores a empty array.
The function to be used when differentiating during backpropagation based on
the operation used to produce this variable is stored in the op parameter.
The incremental gradiant used while calculating the final gradiant during
backpropagation is stored in the cur_grad parameter of the variable in the
output variable's children list.
The final gradient calculated during backpropagation is stored in the grad
parameter.
To give a basic example, consider the derivative of x*x. First we would
create the variable for x by calling init with any float value,
let x = init 42.0;;
Next, we create the equation using StdOps,
let output = StdOps.mul x x;;
Lastly, run the backwards propagation on the equation by calling backward,
backward output;;
Now if we check output we get,
{value = 1764.; children = [|{value = 42.; children = [||]; op = <fun>;
cur_grad = 1.; grad = 84.}; {value = 42.; children = [||]; op = <fun>;
cur_grad = 1.; grad = 84.}|]; op = <fun>; cur_grad = 1.; grad = 0.}
If we look at the gradient of x, which can be either child as they are both x,
the gradient is 84.0 which is 2 times the initial value of x.
*)
(** The abstract type representing a variable. *)
type var
(** The abstract type of a model. *)
type model
(** The abstract type representing a matrix. *)
type mat = float array array
(** [get_value x] is the float stored in the value parameter of x. *)
val get_value : var -> mat
(** [get_children x] is the var list stored in the children parameter of x. *)
val get_children : var -> var array
(** [get_op x] is the function stored in the op parameter of x. *)
val get_op : var -> var array -> mat -> mat array
(** [get_value x] is the float stored in the grad parameter of x. *)
val get_grad : var -> mat
(** [init x] is a var record with a value of x, no children, an op function that
* yields an empty array, a cur_grad of 1.0, and a grad of 0.0. *)
val init : mat -> var
(** [backward arg0] runs backwards propagation, with arg0 as a starting point.
*)
val backward : var -> unit
(** [create_eltwise_op val_eval_f grad_eval_f] evaluates each element of a
* matrix using the function [val_eval_f] and using [grad_eval_f] to calculate
* the gradient of that element.*)
val create_eltwise_op : (float -> float) -> (float -> float -> float) ->
(var -> var)
(** [StdOps] is the module containing operations which can be used on variables,
* in order to create new variables which can be used in backwards
* differentiation. *)
module StdOps : sig
(** [add arg0 arg1] is the var record with a value of the sum of the values
* of arg0 and arg1, children of the array containing arg0 and arg1, op of a
* function that yields an array [|1.0;1.0|], a cur_grad of an array of the
* same size as value containing 1.0's, and a grad of the same size
* containing 0.0's.
*)
val add : var -> var -> var
(** [add arg0 arg1] is the var record with a value of the subraction of the
* value of arg1 from arg0, children of the array containing arg0 and arg1,
* op of a function that yields an array [|1.0;-1.0|], a cur_grad of an array
* of the same size as value containing 1.0's, and a grad of the same size
* containing 0.0's. *)
val sub : var -> var -> var
(** [mul arg0 arg1] is the var record with a value of the product of the
* values of arg0 and arg1, children of the array containing arg0 and arg1,
* op of a function that yields an array [|arg1.value;arg2.value|], a
* cur_grad of an array of the same size as value containing 1.0's,
* and a grad of the same size containing 0.0's. *)
val mul : var -> var -> var
(** [pow arg0 fl] is the var record with a value of arg0 raised to fl,
* children of the array containing arg0, op of a function that yields an
* array [|fl*.arg0.value**(fl -. 1.0)|], a cur_grad of an array of the same
* size as value containing 1.0's, and a grad of the same size containing
* 0.0's.*)
val pow : var -> float -> var
(** [sigmoid arg0] is the var record with the value of arg0 inputed into the
* sigmoid function element wise, children of the array containing arg0, op
* of a function that applies the derivative of the sigmoid function element
* wise, a cur_grad of an array of the same size as value containing 1.0's,
* and a grad of the same size containing 0.0's. *)
val sigmoid : var -> var
(** [relu arg0] is the var record with the value of arg0 inputed into the
* relu (rectified linear units) function element wise, children of the array
* containing arg0, op of a function that applies the derivative of the relu
* function element wise, a cur_grad of an array of the same size as value
* containing 1.0's, and a grad of the same size containing 0.0's. *)
val relu : var -> var
end
(** [Math] is the module containing functions which can be used on the mat type
* in order to either produce new matrices, or modify existing ones. *)
module Math : sig
(** [InvalidDims] is an exception that occurs when the dimensions of the
* matrices for a particular function are not the dimensions that are
* expeceted. For example in addition of matrices the two input matrices have
* to have the same dimenstions, otherwise InvalidDims is called. *)
exception InvalidDims
(**[mat_mul mat0 mat1] takes a matrix of size m x n and matrix of size n x z
* returns the outer product of the matrices with size m x z.
* raises: InvalidDims if mat0's second dim doesn't agree with mat1 second
* dim. *)
val mat_mul : mat -> mat -> mat
(**[mat_add mat0 mat1] takes two matrices of the same size and returns the
* sum.
* raises: InvalidDims if mat0 and mat1 don't have the same size. *)
val mat_add : mat -> mat -> mat
(**[mat_add mat0 mat1] takes two matrices of the same size and returns the
* sum. The difference between this function and [mat_add] is that we alter
* one of the given matrices rather than create a new one and thus we output
* a unit.
* raises: InvalidDims if mat0 and mat1 don't have the same size. *)
val add_in_place : mat -> mat -> unit
(**[mat_add mat0 mat1] takes two matrices of the same size and returns the
* subtraction.
* raises: InvalidDims if mat0 and mat1 don't have the same size. *)
val mat_sub : mat -> mat -> mat
(**[mat_negate mat0] takes one matrix and returns new matrix with
* the negated values of mat0. *)
val mat_negate : mat -> mat
(**[mat_random n m] generates a matrix of size n x m with random parameters*)
val mat_random : int -> int -> mat
(**[scale c M] returns the matrix M scaled by c (cM). *)
val scale : float -> mat -> mat
(** [map f mat0] returns a new matrix which is the result of applying function
* f to every value contained in the matrix mat0. *)
val map : (float -> float) -> mat -> mat
(** [map f mat0 mat1]
* f: takes two floats and output a float
* mat0: float matrix of size n x m
* mat1: float matrix of size n x m
* returns: a new matrix of size n x m that is the result of applying
* f on each pair of mat0 & mat1 (i.e. f mat0.(i)(j) mat1.(i)(j)) for
* all i < n, j < m
* raises: InvalidDims if mat0 and mat1 don't have the same size. *)
val map2 : (float -> float -> float) -> mat -> mat -> mat
(** [transpose mat0] returns the transpose of mat0. *)
val transpose : mat -> mat
end
(** The Layers module essentially generates layers that can be used as
* part of the module.
* currently it implements linear layers
* potential layers to add in the future:
* - convolution layer
* - max_pool layer
* - RNN layer *)
module Layers : sig
(** Layer abstract type. *)
type layer
(** [linear n m] creates a linear layer with weights matrix W of size n x m
* the forward of the layer (with argument x) is simply Wx. *)
val linear : int -> int -> layer
(** [forward l x] applies the layer l on variable x, and
* generate an output var. *)
val forward : layer -> var -> var
(** [params l] returns list of the parameters used in layer l. *)
val params : layer -> var list
end
(** The Optim module contains optimizers to update the model's parameters
* currently it implements basic gradient descent
* potential optimizers to add in the future:
* - Adam
* - SGD *)
module Optim : sig
(** Optimizer abstract type. *)
type optim
(**[step optm] performs a single step with optimizer optm. *)
val step : optim -> unit
(**[zero_grad optm] zeros out the gradients for all parameters
* optm is optimizing over. *)
val zero_grad : optim -> unit
(**[gd params lr] generates a classic gradient descent optimizer
* that optimizes over the variables in params with learning rate lr. *)
val gd : var list -> float -> optim
end