diff --git a/Project.toml b/Project.toml
index fb33371799..1a781daec3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,6 +26,7 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 TerminalLoggers = "5d786b92-1e48-4d6f-9151-6b4477ca9bed"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"
+GenericLinearAlgebra = "14197337-ba66-59df-a3e3-ca00e7dcff7a"
 
 [compat]
 Adapt = "3"
@@ -48,6 +49,7 @@ TerminalLoggers = "0.1"
 Zygote = "0.5, 0.6"
 ZygoteRules = "0.2"
 julia = "1.5"
+GenericLinearAlgebra = "0.2.5"
 
 [extras]
 DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
diff --git a/docs/src/examples/lagrangian_nn.jl b/docs/src/examples/lagrangian_nn.jl
new file mode 100644
index 0000000000..2a5e7423b4
--- /dev/null
+++ b/docs/src/examples/lagrangian_nn.jl
@@ -0,0 +1,24 @@
+# One point test
+using Flux, ReverseDiff, LagrangianNN
+
+m, k, b = 1, 1, 1
+
+X = rand(2,1)
+Y = -k.*X[1]/m
+
+g = Chain(Dense(2, 10, σ), Dense(10,1))
+model = LagrangianNN(g)
+params = model.params
+re = model.re
+
+loss(x, y, p) = sum(abs2, y .- model(x, p))
+
+opt = ADAM(0.01)
+epochs = 100
+
+for epoch in 1:epochs
+    x, y = X, Y
+    gs = ReverseDiff.gradient(p -> loss(x, y, p), params)
+    Flux.Optimise.update!(opt, params, gs)
+    @show loss(x,y,params)
+end
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
index e9c09e7637..b26ef3e4b9 100644
--- a/src/DiffEqFlux.jl
+++ b/src/DiffEqFlux.jl
@@ -2,7 +2,7 @@ module DiffEqFlux
 
 using GalacticOptim, DataInterpolations, DiffEqBase, DiffResults, DiffEqSensitivity,
       Distributions, ForwardDiff, Flux, Requires, Adapt, LinearAlgebra, RecursiveArrayTools,
-      StaticArrays, Base.Iterators, Printf, Zygote
+      StaticArrays, Base.Iterators, Printf, Zygote, GenericLinearAlgebra
 
 using DistributionsAD
 import ProgressLogging, ZygoteRules
@@ -82,11 +82,13 @@ include("tensor_product_basis.jl")
 include("tensor_product_layer.jl")
 include("collocation.jl")
 include("hnn.jl")
+include("lnn.jl")
 include("multiple_shooting.jl")
 
 export diffeq_fd, diffeq_rd, diffeq_adjoint
 export DeterministicCNF, FFJORD, NeuralODE, NeuralDSDE, NeuralSDE, NeuralCDDE, NeuralDAE, NeuralODEMM, TensorLayer, AugmentedNDELayer, SplineLayer, NeuralHamiltonianDE
 export HamiltonianNN
+export LagrangianNN
 export ChebyshevBasis, SinBasis, CosBasis, FourierBasis, LegendreBasis, PolynomialBasis
 export neural_ode, neural_ode_rd
 export neural_dmsde
diff --git a/src/lnn.jl b/src/lnn.jl
new file mode 100644
index 0000000000..97b9bed615
--- /dev/null
+++ b/src/lnn.jl
@@ -0,0 +1,42 @@
+"""
+Constructs a Lagrangian Neural Network [1].
+
+References:
+[1] Miles Cranmer, Sam Greydanus, Stephan Hoyer, Peter Battaglia, David Spergel, and Shirley Ho.Lagrangian Neural Networks.
+    In ICLR 2020 Workshop on Integration of Deep Neural Models and Differential Equations, 2020.
+"""
+
+struct LagrangianNN{M, R, P}
+    model::M
+    re::R
+    p::P
+
+    # Define inner constructor method
+    function LagrangianNN(model; p = nothing)
+        _p, re = Flux.destructure(model)
+        if p === nothing
+            p = _p
+        end
+        return new{typeof(model), typeof(re), typeof(p)}(model, re, p)
+    end
+end
+
+function (lnn::LagrangianNN)(x, p = lnn.p)
+    @assert size(x,1) % 2 === 0 # velocity df should be equal to coords degree of freedom
+    M = div(size(x,1), 2) # number of velocities degrees of freedom
+    re = lnn.re
+    hess = x -> Zygote.hessian_reverse(x->sum(re(p)(x)), x) # we have to compute the whole hessian
+    hess = hess(x)[M+1:end, M+1:end]  # takes only velocities
+    inv_hess = GenericLinearAlgebra.pinv(hess)
+
+    _grad_q = x -> Zygote.gradient(x->sum(re(p)(x)), x)[end]
+    _grad_q = _grad_q(x)[1:M,:] # take only coord derivatives
+    out1 =_grad_q
+
+    # Second term
+    _grad_qv = x -> Zygote.gradient(x->sum(re(p)(x)), x)[end]
+    _jac_qv = x -> Zygote.jacobian(x->_grad_qv(x), x)[end]
+    out2 = _jac_qv(x)[1:M,M+1:end] * x[M+1:end] # take only dqdq_dot derivatives
+
+    return inv_hess * (out1 .+ out2)
+end