diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 0000000..7631024 --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,23 @@ +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper/paper.pdf diff --git a/.gitignore b/.gitignore index 071858e..afb5c07 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ Manifest.toml docs/build docs/Manifest.toml -test/current/ \ No newline at end of file +test/current/ +paper/paper.pdf diff --git a/Project.toml b/Project.toml index 28d5f64..ccf69ce 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NoisySignalIntegration" uuid = "bdbab1c0-76f6-415c-8ec7-0d0463b9bd16" authors = ["Nils Luettschwager "] -version = "0.2.3" +version = "1.0.0rc1" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -16,13 +16,13 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -julia = "1.5" Distributions = "0.23, 0.24, 0.25" -LsqFit = "0.11, 0.12" -MonteCarloMeasurements = "0.9, 0.10" -Polynomials = "1.0, 2.0" +LsqFit = ">=0.11, <0.16" +MonteCarloMeasurements = "0.9, 1" +Polynomials = "1.0, <5.0" RecipesBase = "1.0" -StatsBase = "0.33" +StatsBase = "0.33, <0.35" +julia = ">=1.5" [extras] NumericalIntegration = "e7bfaba1-d571-5449-8927-abc22e82249b" diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..3208053 --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,117 @@ +@Article{karir2019, +author ="Karir, Ginny and Lüttschwager, Nils O. B. and Suhm, Martin A.", +title ="Phenylacetylene as a gas phase sliding balance for solvating alcohols", +journal ="Physical Chemistry Chemical Physics", +year ="2019", +volume ="21", +issue ="15", +pages ="7831-7840", +publisher ="The Royal Society of Chemistry", +doi ="10.1039/C9CP00435A", +url ="http://dx.doi.org/10.1039/C9CP00435A" +} + +@article{goebench, +author = {Gottschalk,Hannes C. and Poblotzki,Anja and Suhm,Martin A. and Al-Mogren,Muneerah M. and Antony,Jens and Auer,Alexander A. and Baptista,Leonardo and Benoit,David M. and Bistoni,Giovanni and Bohle,Fabian and Dahmani,Rahma and Firaha,Dzmitry and Grimme,Stefan and Hansen,Andreas and Harding,Michael E. and Hochlaf,Majdi and Holzer,Christof and Jansen,Georg and Klopper,Wim and Kopp,Wassja A. and Kröger,Leif C. and Leonhard,Kai and Mouhib,Halima and Neese,Frank and Pereira,Max N. and Ulusoy,Inga S. and Wuttke,Axel and Mata,Ricardo A. }, +title = {The furan microsolvation blind challenge for quantum chemical methods: First steps}, +journal = {The Journal of Chemical Physics}, +volume = {148}, +number = {1}, +pages = {014301}, +year = {2018}, +doi = {10.1063/1.5009011}, +URL = {https://doi.org/10.1063/1.5009011}, +eprint = {https://doi.org/10.1063/1.5009011} +} + +@article{distributions1, + author = {Mathieu Besançon and Theodore Papamarkou and David Anthoff and Alex Arslan and Simon Byrne and Dahua Lin and John Pearson}, + title = {Distributions.jl: Definition and Modeling of Probability Distributions in the JuliaStats Ecosystem}, + journal = {Journal of Statistical Software}, + volume = {98}, + number = {16}, + year = {2021}, + keywords = {Julia; distributions; modeling; interface; mixture; KDE; sampling; probabilistic programming; inference}, + issn = {1548-7660}, + pages = {1--30}, + doi = {10.18637/jss.v098.i16}, + url = {https://www.jstatsoft.org/v098/i16} +} +% reference for the software itself +@misc{distributions2, + author = {Dahua Lin and + John Myles White and + Simon Byrne and + Douglas Bates and + Andreas Noack and + John Pearson and + Alex Arslan and + Kevin Squire and + David Anthoff and + Theodore Papamarkou and + Mathieu Besançon and + Jan Drugowitsch and + Moritz Schauer and + {other contributors}}, + title = {{JuliaStats/Distributions.jl: a Julia package for probability distributions and associated functions}}, + month = july, + year = 2019, + doi = {10.5281/zenodo.2647458}, + url = {https://doi.org/10.5281/zenodo.2647458} +} + +@article{mcm, + author = {Fredrik {Bagge Carlson}}, + title = {MonteCarloMeasurements.jl: Nonlinear Propagation of Arbitrary Multivariate + Distributions by means of Method Overloading}, + year = 2020, + eid = {arXiv:2001.07625}, + pages = {arXiv:2001.07625}, + eprint = {2001.07625}, + primaryClass = {cs.MS}, + url = {https://arxiv.org/abs/2001.07625}, +} + +@misc{uncertainties, + author = "Eric O. Lebigot", + title = "Uncertainties: a Python package for calculations with uncertainties", + url = "https://uncertainties-python-package.readthedocs.io", + year = "" +} + +@article{metrolopy, + author = "Harold V. Parks", + title = "MetroloPy", + journal = "GitHub Repository", + url = "https://github.com/nrc-cnrc/MetroloPy/", + year = "2021" +} + +@article{measurements, + author = {{Giordano}, Mos{\`e}}, + title = "{Uncertainty propagation with functionally correlated quantities}", + pages = {arxiv:1610.08716}, + primaryClass = "physics.data-an", + year = 2016, + url = {https://arxiv.org/abs/1610.08716}, +} + +@article{gawrilow, + author = "Maxim Gawrilow and Martin A. Suhm", + title = "Quantifying Conformational Isomerism in Chain Molecules by Linear {R}aman Spectroscopy: The Case of Methyl Esters", + journal = "Molecules", + year = 2021, + volume = 26, + pages = 4523, + doi = "10.3390/molecules26154523", +} + +@article{zimmermann, + author = "Charlotte Zimmermann and Manuel Lange and Martin A. Suhm", + title = "Halogens in Acetophenones Direct the Hydrogen Bond Docking Preference of Phenol via Stacking Interactions", + journal = "Molecules", + year = 2021, + volume = 26, + pages = 4883, + doi = "10.3390/molecules26164883", +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..de6f80e --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,74 @@ +--- +title: 'NoisySignalIntegration.jl: A Julia package for uncertainty evaluation of numeric integrals' +tags: + - Julia + - chemistry + - physics + - measurement uncertainty + - noisy data + - numeric integration +authors: + - name: Nils O. B. Lüttschwager + orcid: 0000-0001-8459-1714 + affiliation: 1 +affiliations: + - name: Georg-August-University Göttingen, Institute of Physical Chemistry, Tammannstraße 6, DE-37077 Göttingen + index: 1 +date: 8 July 2021 +bibliography: paper.bib +--- + +# Summary + +The evaluation of peak or band areas is a recurring task in scientific data +evaluation. For example, in molecular spectroscopy, absorption line or band +areas are often used to determine substance abundance. +NoisySignalIntegration.jl provides functionality to evaluate such signal +areas and associated uncertainties using a Monte-Carlo approach. Uncertainties +may include contributions from (potentially correlated) Gaussian noise, +baseline subtraction, and uncertainty in placing integration bounds. Uncertain +integration bounds can be defined in several ways to constrain the integration +based on the physical system under investigation (asymmetric signals, symmetric +signals, signals with identical width). The package thus offers a more +objective uncertainty evaluation than a statement based on experience or +laborious manual analysis [@goebench]. + +NoisySignalIntegration.jl includes a [detailed +documentation](https://nluetts.github.io/NoisySignalIntegration.jl/stable/) that +covers the typical workflow with several examples. The API uses custom +datatypes and convenience functions to aid the data +analysis and permits flexible customizations: Any probability distribution from +Distributions.jl [@distributions1; @distributions2] is a valid input to express uncertainty +in integration bounds, thus allowing to adapt the uncertainty analysis as +needed to ones state of knowledge. The core integration function can be swapped +if the included trapezoidal integration is deemed unsatisfactory in terms of +accuracy. The package uses MonteCarloMeasurements.jl [@mcm] to express +uncertain numbers which enables immediate uncertainty propagation. + +# Statement of need + +Several open source options for uncertainty propagation are available, e.g. +the Python packages uncertainties [@uncertainties] or MetroloPy [@metrolopy] or +Julia packages Measurements.jl [@measurements] and the aforementioned +MonteCarloMeasurements.jl [@mcm], but uncertainty evaluation when integrating +experimental x-y data is not fully addressed and requires significant effort +from the user. A straightforward solution to this problem is to fit a peak +function of appropriate shape and derive the uncertainty from the fit. However, +the data may not be described by a peak function and/or the noise may not be +normally distributed, preventing a simple least squares regression. +NoisySignalIntegration.jl was developed as a solution to this problem. While +the package was developed specifically for the determination of band area +uncertainties in the context of molecular spectroscopy [@karir2019; @gawrilow; @zimmermann], +it is applicable in any research area where signals (peaks, lines, bands, +etc.)^[The name usually depends on the specific area and context.] in x-y data +need to be integrated and a thorough uncertainty analysis is desired. + +# Acknowledgements + +The author thanks Charlotte Zimmermann, Maxim Gawrilow, and Robert Medel for +testing and discussions during the development of NoisySignalIntegration.jl. +The development has furthermore profited from the benchmarking spirit and +efforts in the DFG research training group GRK 2455/389479699 to analyse +experiments as rigorously as possible. + +# References diff --git a/src/bounds.jl b/src/bounds.jl index 4439b1a..534ac35 100644 --- a/src/bounds.jl +++ b/src/bounds.jl @@ -87,15 +87,15 @@ julia> ubs = UncertainBound([3., 7.], scale_shift_beta(2, 2, 1.3, 1.5), uc) UncertainBound{Float64, 10000}(start = 6.3 ± 0.022, end = 7.7 ± 0.022) ``` """ -struct UncertainBound{T, N} - left::Particles{T, N} - right::Particles{T, N} +struct UncertainBound{T,N} + left::Particles{T,N} + right::Particles{T,N} # quantiles for local baseline subtraction and plotting _left_quantiles _right_quantiles end -function Base.show(io::IO, c::UncertainBound{T, N}) where {T, N} +function Base.show(io::IO, c::UncertainBound{T,N}) where {T,N} print(io, "UncertainBound{$T, $N}(start = $(c.left), end = $(c.right))") end @@ -103,16 +103,16 @@ end # Constructors # Base constructor that injects quantiles -function UncertainBound(left::Particles{T, N}, right::Particles{T, N}) where {T, N} +function UncertainBound(left::Particles{T,N}, right::Particles{T,N}) where {T,N} qs = BOUND_QUANTILES - lqs, rqs = [[quantile(lr, q) for q in qs] for lr in (left, right)] - return UncertainBound{T, N}(left, right, lqs, rqs) + lqs, rqs = [[pquantile(lr, q) for q in qs] for lr in (left, right)] + return UncertainBound{T,N}(left, right, lqs, rqs) end # Create a left/right bound -function UncertainBound(left::S, right::T, N::Int=10_000) where {S <: ContinuousUnivariateDistribution, T <: ContinuousUnivariateDistribution} - left = Particles(N, left) +function UncertainBound(left::S, right::T, N::Int=10_000) where {S<:ContinuousUnivariateDistribution,T<:ContinuousUnivariateDistribution} + left = Particles(N, left) right = Particles(N, right) return UncertainBound(left, right) end @@ -122,8 +122,8 @@ end function UncertainBound( pos::Vector{T}, width::ContinuousUnivariateDistribution, - uc::UncertainCurve{T, N} -) where {T, N} + uc::UncertainCurve{T,N} +) where {T,N} M = length(pos) left = Array{T}(undef, M, N) @@ -141,7 +141,7 @@ function UncertainBound( left[i, j], right[i, j] = left_right_from_peak(uc.x, cⱼ.y, pᵢ, wⱼ) end end - + return [UncertainBound(Particles(left[i, :]), Particles(right[i, :])) for i in 1:M] end @@ -150,8 +150,8 @@ end function UncertainBound( pos::T, width::ContinuousUnivariateDistribution, - uc::UncertainCurve{T, N} -) where {T, N} + uc::UncertainCurve{T,N} +) where {T,N} bnd = UncertainBound([pos], width, uc) return bnd[1] end @@ -177,5 +177,5 @@ Create a scaled and shifted `Beta(α, β)` distribution. Samples fall in the interval [`a`, `b`]. """ function scale_shift_beta(α, β, a, b) - return LocationScale(a, b - a, Beta(α, β)) + return Beta(α, β) * (b - a) + a end diff --git a/src/common.jl b/src/common.jl index a87aaaa..fded864 100644 --- a/src/common.jl +++ b/src/common.jl @@ -24,13 +24,13 @@ Find peak in interval `p ± w/2` and return (speak position - `w`/2, peak positi """ function left_right_from_peak(x, y, p, w) # find indices of interval [left, right] - left = p - w/2 - right = p + w/2 + left = p - w / 2 + right = p + w / 2 l = searchsortedfirst(x, left) # left index: x[l] <= p - w r = searchsortedlast(x, right) # right index: x[r] >= p + w - + m = l + argmax(view(y, l:r)) - 1 # index of local maximum at x[m] - return [x[m] - w/2, x[m] + w/2] + return [x[m] - w / 2, x[m] + w / 2] end @@ -41,118 +41,17 @@ end """Linearly interpolate y-value at position x between two points (x₀, y₀) and (x₁, y₁).""" @inline function lininterp(x, x₀, x₁, y₀, y₁) δx = x₁ - x₀ - y = (y₁*(x - x₀) + y₀*(x₁ - x)) / δx + y = (y₁ * (x - x₀) + y₀ * (x₁ - x)) / δx return y end """Linearly interpolate y-value at position `x` for x,y data; `xs` has to be sorted.""" -@inline function lininterp(x::T, xs::Vector{T}, ys::Vector{S}) where {T <: Number, S <: Number} +@inline function lininterp(x::T, xs::Vector{T}, ys::Vector{S}) where {T<:Number,S<:Number} i = searchsortedfirst(xs, x) (i < 2 || i > length(xs)) && throw(error("x is outside the support: $x ∉ ($(minimum(xs)), $(maximum(xs))].")) return lininterp(x, xs[i-1], xs[i], ys[i-1], ys[i]) end -""" - trapz(x::AbstractArray{T}, y::AbstractArray{T}, left, right) where {T<:AbstractFloat} - -Integrate vector `y` in interval [`left`, `right`] using trapezoidal integration. - -# Notes - -`left` and `right` must support conversion to the datatype `T`. - -If `left` and `right` do not fall on the `x`-grid, additional data points will be interpolated linearly. -(i.e. the width of the first and last trapezoid will be somewhat smaller). - -If `left` and/or `right` falls outside the `x`-range, the integration window will be cropped -to the available range. - -# Examples - -```jldoctest -julia> x = collect(Float64, 0:10); - -julia> y = ones(Float64, 11); - - -julia> trapz(x, y, 1, 3) -2.0 - - -julia> trapz(x, y, -1, 11) # at most, we can integrate the available x-range, 0 to 10 -10.0 - - -julia> trapz(x, y, -10, 20) -10.0 - - -julia> trapz(x, y, 1.1, 1.3) ≈ 0.2 # if we integrate "between" the grid, data points are interpolated -true -``` -""" -function trapz(x::AbstractArray{T}, y::AbstractArray{T}, left::T, right::T) where {T<:AbstractFloat} - - left, right = left < right ? (left, right) : (right, left) - - A = zero(eltype(y)) # Area to be returned - - N = length(x) - N != length(y) && throw(ArgumentError("Data arrays `x` and `y` must have the same length.")) - N < 2 && return A - - yl = yr = nothing # the y-values of the left and right integration bound, to be interpolated - lastiter = false - j = 2 - - while j <= N - - x₀ = x[j-1] - x₁ = x[j] - y₀ = y[j-1] - y₁ = y[j] - - if x₁ <= left - j += 1 - continue - elseif yl === nothing - # this will only run once, when we enter the integration window - # test whether x₀ should be replaced by `left` - if x₀ < left - y₀ = lininterp(left, x₀, x₁, y₀, y₁) - x₀ = left - else - # this case means that `left` <= x[1] - left = x₀ - end - yl = y₀ - end - - # test whether x₁ should be replaced by `right` - if x₁ >= right - # we move out of the integration window - yr = x₁ == right ? y₁ : lininterp(right, x₀, x₁, y₀, y₁) - x₁ = right - y₁ = yr - lastiter = true # we shall break the loop after this iteration - end - - A += singletrapz(x₀, x₁, y₀, y₁) - - lastiter && break - - j += 1 - end - - return A -end - -function trapz(x::AbstractArray{T}, y::AbstractArray{T}, left, right) where {T<:AbstractFloat} - left = T(left) - right = T(right) - return trapz(x, y, left, right) -end - """ @samples n::Integer e::Expression @@ -177,9 +76,9 @@ MonteCarloMeasurements.Particles{Float64, 9999} macro samples(n::Integer, e::Expr) err = ArgumentError("Expression not understood.") if length(e.args) != 3 - return :( throw($err) ) + return :(throw($err)) end - + op, x, y = e.args if op == :± @@ -195,6 +94,6 @@ macro samples(n::Integer, e::Expr) Particles($n, Uniform(a, b)) end else - return :( throw($err) ) + return :(throw($err)) end -end \ No newline at end of file +end diff --git a/src/curves.jl b/src/curves.jl index 211427d..37f97e9 100644 --- a/src/curves.jl +++ b/src/curves.jl @@ -39,7 +39,7 @@ Useful when cutting out signals from `Curves` for baseline correction purposes. """ stitch(curves...) = reduce(vcat, curves) -function Base.sort(c::T) where {T <: AbstractCurve} +function Base.sort(c::T) where {T<:AbstractCurve} x, y = c.x, c.y ind = sortperm(x) return T(x[ind], y[ind]) @@ -112,7 +112,7 @@ Curve{Float64}, 3 datapoints struct Curve{T} <: AbstractCurve x::Vector{T} y::Vector{T} - function Curve{T}(x, y) where {T <: Number} + function Curve{T}(x, y) where {T<:Number} verify_same_length(x, y) return new{T}(x, y) end @@ -123,7 +123,7 @@ function Curve(y) return Curve{T}(collect(T, 1:length(y)), y) end -Curve(x::Vector{T}, y::Vector{T}) where T = Curve{T}(x, y) +Curve(x::Vector{T}, y::Vector{T}) where {T} = Curve{T}(x, y) function Curve(x, y) verify_same_length(x, y) @@ -173,16 +173,16 @@ UncertainCurve{Float64, 2000}, 3 datapoints (3.0, 5.0 ± 1.0) ``` """ -struct UncertainCurve{T, N} <: AbstractCurve +struct UncertainCurve{T,N} <: AbstractCurve x::Vector{T} - y::Vector{Particles{T, N}} - function UncertainCurve{T, N}(x::Vector{T}, y::Vector{Particles{T, N}}) where {T, N} + y::Vector{Particles{T,N}} + function UncertainCurve{T,N}(x::Vector{T}, y::Vector{Particles{T,N}}) where {T,N} verify_same_length(x, y) - return new{T, N}(x, y) + return new{T,N}(x, y) end end -UncertainCurve(x::Vector{T}, y::Vector{Particles{T, N}}) where {T, N} = UncertainCurve{T, N}(x, y) -UncertainCurve(y::Vector{Particles{T, N}}) where {T, N} = UncertainCurve(collect(T, 1:length(y)), y) +UncertainCurve(x::Vector{T}, y::Vector{Particles{T,N}}) where {T,N} = UncertainCurve{T,N}(x, y) +UncertainCurve(y::Vector{Particles{T,N}}) where {T,N} = UncertainCurve(collect(T, 1:length(y)), y) """ @@ -266,7 +266,114 @@ Curve{Float64}, 4 datapoints function crop(s::AbstractCurve, left, right) T = eltype(s.x) S = typeof(s) - i = searchsorted(s.x, T(left)) |> first + i = searchsorted(s.x, T(left)) |> first j = searchsorted(s.x, T(right)) |> last return S(s.x[i:j], s.y[i:j]) -end \ No newline at end of file +end + +""" +An iterator over the curve yielding datapoints as a tuple (xi, yi). + +If start end endpoint do not fall on grid of the Curve object, +y-values will be interpolated linearly. +""" +function Base.iterate( + iter::Tuple{Curve{Float64},Float64,Float64}, + state::Tuple{Int64,Bool} +)::Union{Nothing,Tuple{Tuple{Float64,Float64},Tuple{Int64,Bool}}} + mv, left, right = iter + index, return_right = state + + while index < length(mv.x) + xi = mv.x[index] + xj = mv.x[index+1] + yi = mv.y[index] + yj = mv.y[index+1] + + state = let + in_bounds_xi = left <= xi <= right + in_bounds_xj = left <= xj <= right + left_between_ij = xi < left < xj + right_between_ij = xi < right < xj + in_bounds_xi, in_bounds_xj, left_between_ij, right_between_ij + end + + if state == (false, false, false, false) + # common case where we are outside the bounds + # we have two sub-cases to deal with: + # xj < left -> continue + # xi > right -> depleted + if xi > right + return nothing + end + # not yet in bounds, continue loop + index += 1 + continue + elseif state == (true, true, false, false) + # common case where we are inside the bounds + # but did not just enter them or are not just about to exit them + return ((xi, yi), (index + 1, false)) + elseif state == (false, true, true, false) + # moving inside the bounds + # xi left xj right -> return (left, y at left) + return ((left, lininterp(left, xi, xj, yi, yj)), (index + 1, false)) + elseif state == (true, false, false, true) + # exiting the bounds + # left xi right xj -> return (xi, yj), + # then return (right, y at right) in next iteration + if return_right + # if we already returned xi for this index, + # interpolate and return data point at `right` + yright = lininterp(right, xi, xj, yi, yj) + return ((right, yright), (index + 1, false)) + else + # we have to return (xi, yi) before returning + # interpolated data point at `right`; + # note that index is _not_ incremented and + # `return_right` flag is set + return ((xi, yi), (index, true)) + end + elseif state == (false, true, false, false) + # rare case where xj falls on left + # xi xj = left right -> continue + index += 1 + continue + elseif state == (true, false, false, false) + # rare case where xi falls on right + # left xi = right xj -> return (xi, yi) + return ((xi, yi), (index + 1, false)) + elseif state == (false, false, true, true) + # very rare case where xi left right xj + # -> return (left, y at left) and (right, y at right) in next iteration + # (note that left == right is not possible, + # because this case is explicitly handled + # when the iterator is constructed) + if return_right + yright = lininterp(right, xi, xj, yi, yj) + return ((right, yright), (index + 1, false)) + else + yleft = lininterp(left, xi, xj, yi, yj) + return ((left, yleft), (index, true)) + end + else + throw("Met an iterator state that should not have been possible") + end + end + # check if last element is still within bounds + if index == length(mv.x) && left <= mv.x[index] <= right + return ((mv.x[index], mv.y[index]), (index + 1, false)) + else + return nothing + end +end + +function Base.iterate( + iter::Tuple{Curve{Float64},Float64,Float64}, +)::Union{Nothing,Tuple{Tuple{Float64,Float64},Tuple{Int64,Bool}}} + mv, left, right = iter + left, right = min(left, right), max(left, right) + if left == right || mv.x[end] <= left || mv.x[1] >= right + return nothing + end + Base.iterate(iter, (1, false)) +end diff --git a/src/integration.jl b/src/integration.jl index a5c8b3a..4efb5a5 100644 --- a/src/integration.jl +++ b/src/integration.jl @@ -11,6 +11,64 @@ end _endpoint_to_endpoint_baseline(xs, ys, xₗ, xᵣ) = (xₗ, xᵣ, lininterp(xₗ, xs, ys), lininterp(xᵣ, xs, ys)) +function trapz(crv::Curve{T}, left::T, right::T) where {T<:AbstractFloat} + left, right = min(left, right), max(left, right) + area = zero(T) + for ((xi, yi), (xj, yj)) in zip((crv, left, right), Iterators.drop((crv, left, right), 1)) + area += singletrapz(xi, xj, yi, yj) + end + return area +end + +""" + trapz(x::AbstractArray{T}, y::AbstractArray{T}, left, right) where {T<:AbstractFloat} + +Integrate vector `y` in interval [`left`, `right`] using trapezoidal integration. + +# Notes + +`left` and `right` must support conversion to the datatype `T`. + +If `left` and `right` do not fall on the `x`-grid, additional data points will be interpolated linearly. +(i.e. the width of the first and last trapezoid will be somewhat smaller). + +If `left` and/or `right` falls outside the `x`-range, the integration window will be cropped +to the available range. + +# Examples + +```jldoctest +julia> x = collect(Float64, 0:10); + +julia> y = ones(Float64, 11); + + +julia> trapz(x, y, 1, 3) +2.0 + + +julia> trapz(x, y, -1, 11) # at most, we can integrate the available x-range, 0 to 10 +10.0 + + +julia> trapz(x, y, -10, 20) +10.0 + + +julia> trapz(x, y, 1.1, 1.3) ≈ 0.2 # if we integrate "between" the grid, data points are interpolated +true +``` +""" +function trapz(xs::AbstractArray{T}, ys::AbstractArray{T}, left::T, right::T) where {T<:AbstractFloat} + trapz(Curve(xs, ys), left, right) +end + +function trapz(x::AbstractArray{T}, y::AbstractArray{T}, left, right) where {T<:AbstractFloat} + left = T(left) + right = T(right) + return trapz(x, y, left, right) +end + """ mc_integrate(uc::UncertainCurve{T, N}, bnds::Vector{UncertainBound{T, M}}; intfun=trapz) @@ -36,7 +94,7 @@ If true, for each draw a local linear baseline defined by the integration window The y-values of the start and end point are derived from a weighted average over the start and end point distributions, see [the documentation](https://nluetts.github.io/NoisySignalIntegration.jl/dev/baseline/#Build-in) for further information. """ -function mc_integrate(uc::UncertainCurve{T, N}, bnds::Vector{UncertainBound{T, M}}; intfun=trapz, subtract_baseline=false, local_baseline=false) where {T, M, N} +function mc_integrate(uc::UncertainCurve{T,N}, bnds::Vector{UncertainBound{T,M}}; intfun=trapz, subtract_baseline=false, local_baseline=false) where {T,M,N} M != N && error("Samples sizes incompatible") subtract_baseline && @warn("subtract_baseline keyword argument is deprecated, use local_baseline instead.") @@ -58,9 +116,9 @@ function mc_integrate(uc::UncertainCurve{T, N}, bnds::Vector{UncertainBound{T, M end end end - return [Particles(areas[:,i]) for i in 1:size(areas)[2]] + return [Particles(areas[:, i]) for i in 1:size(areas)[2]] end -function mc_integrate(uc::S, bnd::T; intfun=trapz, subtract_baseline=false, local_baseline=false) where {S <: UncertainCurve, T <: UncertainBound} +function mc_integrate(uc::S, bnd::T; intfun=trapz, subtract_baseline=false, local_baseline=false) where {S<:UncertainCurve,T<:UncertainBound} return mc_integrate(uc, [bnd]; intfun=intfun, subtract_baseline=subtract_baseline, local_baseline=local_baseline)[1] -end \ No newline at end of file +end diff --git a/src/noise.jl b/src/noise.jl index 10a5cd0..46e7258 100644 --- a/src/noise.jl +++ b/src/noise.jl @@ -35,14 +35,14 @@ Construct `NoiseSample` objects with a high enough order struct NoiseSample{T} <: AbstractCurve x::Vector{T} y::Vector{T} - function NoiseSample{T}(x, y, n::Int=0) where T + function NoiseSample{T}(x, y, n::Int=0) where {T} verify_same_length(x, y) return new{T}(x, detrend(x, y, n)) end end -NoiseSample(x::Vector{T}, y::Vector{T}, n::Int=0) where T = NoiseSample{T}(x, y, n) -NoiseSample(y::Vector{T}, n::Int=0) where T = NoiseSample(collect(T, 1:length(y)), y, n) +NoiseSample(x::Vector{T}, y::Vector{T}, n::Int=0) where {T} = NoiseSample{T}(x, y, n) +NoiseSample(y::Vector{T}, n::Int=0) where {T} = NoiseSample(collect(T, 1:length(y)), y, n) NoiseSample(s::Curve, n::Int=0) = NoiseSample(s.x, s.y, n) """ @@ -73,11 +73,11 @@ Model to describe noise following a univariate Gaussian distribution - `σ::T` : standard deviation """ -struct GaussianNoiseModel{T <: Real} <: AbstractNoiseModel +struct GaussianNoiseModel{T<:Real} <: AbstractNoiseModel σ::T end -Base.eltype(::GaussianNoiseModel{T}) where T = T +Base.eltype(::GaussianNoiseModel{T}) where {T} = T function Base.show(io::IO, nm::GaussianNoiseModel) println(io, "GaussianNoiseModel(σ = $(nm.σ))") @@ -94,13 +94,13 @@ Model to describe noise following a multivariate Gaussian distribution - `α::T` : autocovariance amplitude - `λ::T` : autocovaraiance lag """ -struct MvGaussianNoiseModel{T <: Real} <: AbstractNoiseModel +struct MvGaussianNoiseModel{T<:Real} <: AbstractNoiseModel δx::T α::T λ::T end -Base.eltype(::MvGaussianNoiseModel{T}) where T = T +Base.eltype(::MvGaussianNoiseModel{T}) where {T} = T function Base.show(io::IO, nm::MvGaussianNoiseModel) println(io, "MvGaussianNoiseModel(α = $(nm.α), λ = $(nm.λ))") @@ -116,7 +116,7 @@ Fit function for noise autocorrelation. """ function gauss_kernel(Δx, β) α, λ = β - return @. α^2 * exp(-0.5 * (Δx/λ)^2) + return @. α^2 * exp(-0.5 * (Δx / λ)^2) end """ @@ -125,19 +125,19 @@ end Return covariance matrix `Σ` with size `n` × `n` for the exponentiated quadratic kernel with amplitude `α` and lag `λ` calculated on a grid of spacing `δx`. """ -function get_cov(δx::T, n::Int, α::T, λ::T) where {T <: Real} +function get_cov(δx::T, n::Int, α::T, λ::T) where {T<:Real} β = [α, λ] Σ = Array{T}(undef, n, n) for i = (1:n), j = (1:n) Δx = δx * (i - j) - Σ[i,j] = gauss_kernel(Δx, β) + Σ[i, j] = gauss_kernel(Δx, β) if i == j - Σ[i,j] *= 1.0000001 # for numerical stability (?) + Σ[i, j] *= 1.0000001 # for numerical stability (?) end end return Σ end -get_cov(nm::MvGaussianNoiseModel{T}, n::Int) where {T <: Real} = get_cov(nm.δx, n, nm.α, nm.λ) +get_cov(nm::MvGaussianNoiseModel{T}, n::Int) where {T<:Real} = get_cov(nm.δx, n, nm.α, nm.λ) function estimate_autocov(n::NoiseSample) @@ -145,9 +145,9 @@ function estimate_autocov(n::NoiseSample) if !allapproxequal(diff(n.x)) throw(ArgumentError("Noise analysis only works on an evenly spaced spectral grid.")) end - δx = abs(n.x[1]-n.x[2]) # x-values must be evenly spaced + δx = abs(n.x[1] - n.x[2]) # x-values must be evenly spaced lag_units = begin - N = convert(Integer, round(length(n)/2)) + N = convert(Integer, round(length(n) / 2)) collect(1:N) end lags = δx .* lag_units @@ -155,9 +155,9 @@ function estimate_autocov(n::NoiseSample) return lags, acov end -function _fit_noise(lags::Vector{T}, autocov::Vector{T}; α_guess=1.0, λ_guess=1.0) where {T <: Real} +function _fit_noise(lags::Vector{T}, autocov::Vector{T}; α_guess=1.0, λ_guess=1.0) where {T<:Real} ma = maximum(autocov) # rescale autocov to make fit more robust - fit = curve_fit(gauss_kernel, lags, autocov./ma, [α_guess, λ_guess]) + fit = curve_fit(gauss_kernel, lags, autocov ./ ma, [α_guess, λ_guess]) α, λ = fit.param return MvGaussianNoiseModel(lags[1], α * √ma, λ) end @@ -183,7 +183,7 @@ end """ Generate correlated noise from a noise sample or noise model. """ -function generate_noise(nm::MvGaussianNoiseModel{T}, len::Int, samples::Int) where {T <: Real} +function generate_noise(nm::MvGaussianNoiseModel{T}, len::Int, samples::Int) where {T<:Real} Σ = get_cov(nm, len) return Particles(samples, MvNormal(Σ)) end @@ -196,7 +196,7 @@ end """ Generate uncorrelated noise from a GaussianNoiseModel. """ -function generate_noise(nm::GaussianNoiseModel{T}, len::Int, samples::Int) where {T <: Real} +function generate_noise(nm::GaussianNoiseModel{T}, len::Int, samples::Int) where {T<:Real} return Particles(samples, MvNormal(zeros(T, len), nm.σ)) end diff --git a/test/iter_alloc.jl b/test/iter_alloc.jl new file mode 100644 index 0000000..2b67ccf --- /dev/null +++ b/test/iter_alloc.jl @@ -0,0 +1,63 @@ +module IterAllocTest +using NoisySignalIntegration +using NoisySignalIntegration: get_draw, _local_baseline, _endpoint_to_endpoint_baseline, singletrapz +using MonteCarloMeasurements +using Random: seed! +using Test + +function integrate_draw(crv::Curve{T}, left::T, right::T) where {T} + area = zero(T) + for ((xi, yi), (xj, yj)) in zip((crv, left, right), Iterators.drop((crv, left, right), 1)) + area += singletrapz(xi, xj, yi, yj) + end + area +end + +function mc_integrate_2(uc::UncertainCurve{T,N}, bnds::Vector{UncertainBound{T,M}}; intfun=trapz, subtract_baseline=false, local_baseline=false) where {T,M,N} + + M != N && error("Samples sizes incompatible") + subtract_baseline && @warn("subtract_baseline keyword argument is deprecated, use local_baseline instead.") + (subtract_baseline && local_baseline) && error("local_baseline and subtract_baseline cannot both be true.") |> throw + + areas = Array{T}(undef, N, length(bnds)) + for i ∈ 1:N + i % 1000 == 0 && print("Integrating draw $i/$N \r") + cᵢ = get_draw(i, uc) + for (j, b) in enumerate(bnds) + xₗ, xᵣ = get_draw(i, b) + areas[i, j] = integrate_draw(cᵢ, xₗ, xᵣ) + if local_baseline + areas[i, j] -= singletrapz(_local_baseline(cᵢ.x, cᵢ.y, xₗ, xᵣ, b)...) + end + if subtract_baseline + areas[i, j] -= singletrapz(_endpoint_to_endpoint_baseline(cᵢ.x, cᵢ.y, xₗ, xᵣ)...) + end + end + end + return [Particles(areas[:, i]) for i in 1:size(areas)[2]] +end + +function mc_integrate_2(uc::S, bnd::T; intfun=trapz, subtract_baseline=false, local_baseline=false) where {S<:UncertainCurve,T<:UncertainBound} + return mc_integrate_2(uc, [bnd]; intfun=intfun, subtract_baseline=subtract_baseline, local_baseline=local_baseline)[1] +end + +function get_test_spectrum(seed) + seed!(seed) + x = collect(0:0.1:200) + y = @. exp(-(x - 15)^2) + 2 * exp(-(x - 30)^2) + @. y += 1.0 + x * 1.5e-2 - (x - 50)^3 * 3e-7 # add polynomial baseline + n = length(x) + δx = x[2] - x[1] + return Curve(x, y .+ (get_cov(δx, n, 0.1, 0.5) |> MvNormal |> rand)) +end + +function main() + c = crop(get_test_spectrum(1), 10, 40) + uc = add_noise(c, MvGaussianNoiseModel(0.1, 0.1, 0.5)) + ub = UncertainBound(15.0, scale_shift_beta(2.0, 2.0, 3.0, 4.0), uc) + @time area = mc_integrate_2(uc, ub, local_baseline=true) + @time area = mc_integrate(uc, ub, local_baseline=true) +end + + +end diff --git a/test/runtests.jl b/test/runtests.jl index 41781e4..6b6eb57 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,8 +14,9 @@ if false else include("test_common.jl") include("test_curves.jl") + include("test_iteration.jl") include("test_bounds.jl") include("test_noise.jl") include("test_integration.jl") include("test_regression.jl") -end \ No newline at end of file +end diff --git a/test/test_bounds.jl b/test/test_bounds.jl index 305d9ae..dcf2f3a 100644 --- a/test/test_bounds.jl +++ b/test/test_bounds.jl @@ -1,3 +1,6 @@ +using MonteCarloMeasurements: pstd + + num_part(::UncertainBound{T, N}) where {T, N} = N ub_eltype(::UncertainBound{T, N}) where {T, N} = T @@ -25,8 +28,8 @@ end ub = UncertainBound(dleft, dright) @test num_part(ub) == 10_000 # default no. of samples from constructor @test ub_eltype(ub) == Float64 # Distributions.jl should yield Float64 ny default - @test ub.left |> std ≈ 1 atol = 1e-4 - @test ub.right |> std ≈ 1/√(12) atol = 1e-4 + @test ub.left |> pstd ≈ 1 atol = 1e-4 + @test ub.right |> pstd ≈ 1/√(12) atol = 1e-4 ub = UncertainBound(dleft, dright, 1000) @test num_part(ub) == 1000 end @@ -91,4 +94,4 @@ end @test minimum(xs) ≈ 1 atol = 1e-3 @test maximum(xs) ≈ 2 atol = 1e-3 @test mean(xs) ≈ 1.5 atol = 1e-3 -end \ No newline at end of file +end diff --git a/test/test_common.jl b/test/test_common.jl index 11f0f3e..f1756d8 100644 --- a/test/test_common.jl +++ b/test/test_common.jl @@ -1,3 +1,5 @@ +using MonteCarloMeasurements: pmean, pstd + @testset "allapproxequal()" begin @test nsi.allapproxequal([]) == true @test nsi.allapproxequal([1.0]) == true @@ -92,38 +94,38 @@ end @testset "Normal" begin ref = Particles(10_000, Normal(1, 1)) out = @samples 10_000 1 ± 1 - @test mean(ref) ≈ mean(out) - @test std(ref) ≈ std(out) + @test pmean(ref) ≈ pmean(out) + @test pstd(ref) ≈ pstd(out) # pass expression to macro out = @samples 10_000 (2 / 2) ± (3 - 2) - @test mean(ref) ≈ mean(out) - @test std(ref) ≈ std(out) + @test pmean(ref) ≈ pmean(out) + @test pstd(ref) ≈ pstd(out) # pass variables to macro out = let a = 1 b = 1 @samples 10_000 a ± b end - @test mean(ref) ≈ mean(out) - @test std(ref) ≈ std(out) + @test pmean(ref) ≈ pmean(out) + @test pstd(ref) ≈ pstd(out) end @testset "Uniform" begin ref = Particles(10_000, Uniform(1, 2)) out = @samples 10_000 1 .. 2 - @test mean(ref) ≈ mean(out) - @test std(ref) ≈ std(out) + @test pmean(ref) ≈ pmean(out) + @test pstd(ref) ≈ pstd(out) # pass expression to macro out = @samples 10_000 (2 / 2) .. (3 - 1) - @test mean(ref) ≈ mean(out) - @test std(ref) ≈ std(out) + @test pmean(ref) ≈ pmean(out) + @test pstd(ref) ≈ pstd(out) # pass variables to macro out = let a = 1 b = 2 @samples 10_000 a .. b end - @test mean(ref) ≈ mean(out) - @test std(ref) ≈ std(out) + @test pmean(ref) ≈ pmean(out) + @test pstd(ref) ≈ pstd(out) end @test_throws ArgumentError (@samples 1 1 + 1) @test_throws ArgumentError (@samples 1 1 + 1 + 1) diff --git a/test/test_integration.jl b/test/test_integration.jl index 36726ac..5fc440b 100644 --- a/test/test_integration.jl +++ b/test/test_integration.jl @@ -1,3 +1,5 @@ +using MonteCarloMeasurements: pmean, pstd + function get_test_spectrum(seed) seed!(seed) x = collect(0:0.1:200) @@ -14,18 +16,22 @@ end uc = add_noise(c, MvGaussianNoiseModel(0.1, 0.1, 0.5)) ub = UncertainBound(15.0, scale_shift_beta(2.0, 2.0, 3.0, 4.0), uc) area = mc_integrate(uc, ub, subtract_baseline=true) - @test mean(area) ≈ 1.808961690 atol = 1e-8 - @test std(area) ≈ 0.2488841198 atol = 1e-8 + # note that random numbers in Julia might not be reproducible + # when switching minor versions, https://docs.julialang.org/en/v1/stdlib/Random/#Reproducibility + @test pmean(area) ≈ 1.71985533 atol = 1e-8 + @test pstd(area) ≈ 0.249306770 atol = 1e-8 end @testset "integration of two peaks" begin c = crop(get_test_spectrum(1), 10, 40) uc = add_noise(c, MvGaussianNoiseModel(0.1, 0.1, 0.5)) ubs = UncertainBound([15., 30.], scale_shift_beta(2.0, 2.0, 3.0, 4.0), uc) area1, area2 = mc_integrate(uc, ubs, subtract_baseline=true) - @test mean(area1) ≈ 1.808961690 atol = 1e-8 - @test std(area1) ≈ 0.2488841198 atol = 1e-8 - @test mean(area2) ≈ 3.442573414 atol = 1e-8 - @test std(area2) ≈ 0.3122904192 atol = 1e-8 + # note that random numbers in Julia might not be reproducible + # when switching minor versions, https://docs.julialang.org/en/v1/stdlib/Random/#Reproducibility + @test pmean(area1) ≈ 1.71985533 atol = 1e-8 + @test pstd(area1) ≈ 0.249306770 atol = 1e-8 + @test pmean(area2) ≈ 3.116804434 atol = 1e-8 + @test pstd(area2) ≈ 0.2673814109 atol = 1e-8 end end @@ -35,8 +41,8 @@ end uc = add_noise(c, MvGaussianNoiseModel(0.1, 0.1, 0.5)) ub = UncertainBound(15.0, scale_shift_beta(2.0, 2.0, 3.0, 4.0), uc) area = mc_integrate(uc, ub; intfun=f) - @test mean(area) == 0.0 - @test std(area) == 0.0 + @test pmean(area) == 0.0 + @test pstd(area) == 0.0 end @testset "integrate using Simpsons rule from external package" begin @@ -51,8 +57,8 @@ end uc = add_noise(c, MvGaussianNoiseModel(0.1, 0.1, 0.5)) ub = UncertainBound(15.0, scale_shift_beta(2.0, 2.0, 3.0, 4.0), uc) area_s = mc_integrate(uc, ub; intfun=simps) - @test mean(area_s) ≈ 5.6868358 atol = 1e-7 - @test std(area_s) ≈ 0.32897085 atol = 1e-7 + @test pmean(area_s) ≈ 5.8287047 atol = 1e-7 + @test pstd(area_s) ≈ 0.34293816 atol = 1e-7 end @testset "error if local_baseline and subtract_baseline true" begin @@ -128,4 +134,4 @@ end @test yₗ ≈ 0.77916842211701 # manually derived in LibreCalc @test yᵣ ≈ -0.7210156 end -end \ No newline at end of file +end diff --git a/test/test_iteration.jl b/test/test_iteration.jl new file mode 100644 index 0000000..4f8d89a --- /dev/null +++ b/test/test_iteration.jl @@ -0,0 +1,76 @@ +@testset "test left-right iterator" begin + ds = let + x = -1:10 |> collect + Curve{Float64}(x, x * 2) + end + # iterate window inside data range + # both left and right fall on data points + iter = Iterators.Stateful((ds, 1.0, 3.0)) + @test iterate(iter) |> first == (1.0, 2.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> first == (3.0, 6.0) + @test iterate(iter) |> isnothing + # right falls on data point, left does not + iter = Iterators.Stateful((ds, 1.5, 3.0)) + @test iterate(iter) |> first == (1.5, 3.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> first == (3.0, 6.0) + @test iterate(iter) |> isnothing + # left falls on data point, right does not + iter = Iterators.Stateful((ds, 1.0, 2.5)) + @test iterate(iter) |> first == (1.0, 2.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> first == (2.5, 5.0) + @test iterate(iter) |> isnothing + # left and right do not fall on data point + iter = Iterators.Stateful((ds, 1.5, 3.4)) + @test iterate(iter) |> first == (1.5, 3.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> first == (3.0, 6.0) + @test iterate(iter) |> first == (3.4, 6.8) + @test iterate(iter) |> isnothing + # left to right interval is smaller than x-spacing, + # left falls on x-grid + iter = Iterators.Stateful((ds, 1.0, 1.5)) + @test iterate(iter) |> first == (1.0, 2.0) + @test iterate(iter) |> first == (1.5, 3.0) + @test iterate(iter) |> isnothing + # left to right interval is smaller than x-spacing, + # right falls on x-grid + iter = Iterators.Stateful((ds, 1.5, 2.0)) + @test iterate(iter) |> first == (1.5, 3.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> isnothing + # left to right interval is exactly as wide as x-spacing + iter = Iterators.Stateful((ds, 1.0, 2.0)) + @test iterate(iter) |> first == (1.0, 2.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> isnothing + # iterate starting outside data range + iter = Iterators.Stateful((ds, -2.0, 2.0)) + @test iterate(iter) |> first == (-1.0, -2.0) + @test iterate(iter) |> first == (0.0, 0.0) + @test iterate(iter) |> first == (1.0, 2.0) + @test iterate(iter) |> first == (2.0, 4.0) + @test iterate(iter) |> isnothing + # starting further outside bound should not matter + iter = Iterators.Stateful((ds, -20.0, 2.0)) + @test iterate(iter) |> first == (-1.0, -2.0) + # iterate ending outside data range + iter = Iterators.Stateful((ds, 8.0, 11.0)) + @test iterate(iter) |> first == (8.0, 16.0) + @test iterate(iter) |> first == (9.0, 18.0) + @test iterate(iter) |> first == (10.0, 20.0) + @test iterate(iter) |> isnothing + # iterate starting and ending outside data range + iter = Iterators.Stateful((ds, -8.0, 18.0)) + @test iterate(iter) |> first == (-1.0, -2.0) + [iterate(iter) for _ in 1:10] # skip central elements + @test iterate(iter) |> first == (10.0, 20.0) + @test iterate(iter) |> isnothing + # left and right fall between data points + iter = Iterators.Stateful((ds, 2.25, 2.75)) + @test iterate(iter) |> first == (2.25, 4.5) + @test iterate(iter) |> first == (2.75, 5.5) + @test iterate(iter) |> isnothing +end diff --git a/test/test_regression.jl b/test/test_regression.jl index 698d343..1a98cac 100644 --- a/test/test_regression.jl +++ b/test/test_regression.jl @@ -1,3 +1,5 @@ +using MonteCarloMeasurements: pquantile + """ Regression tests @@ -28,7 +30,7 @@ end dat = joinpath(@__DIR__, "t1.dat") |> read_test_data x, y = dat[:, 1], dat[:, 2] # remove numeric noise from x and make grid uniform - x = x[1]:(diff(x) |> mean):x[end] |> collect + x = x[1]:(diff(x)|>mean):x[end] |> collect # integration workflow spec = crop(Curve(x, y), 3600.0, 3680.0) noise = NoiseSample(crop(Curve(x, y), 3750.0, 3850.0)) @@ -36,12 +38,12 @@ end uspec = add_noise(spec, nm) bnds = UncertainBound([3620.0, 3639.0], scale_shift_beta(2, 2, 4.75, 5.25), uspec) areas = mc_integrate(uspec, bnds, subtract_baseline=true) - result = areas[2]/areas[1] + result = areas[2] / areas[1] # compare percentiles to previous results prev_result = [3.79e-01, 5.92e-01, 8.60e-01] # 2.5, 50 and 97.5 percentiles from https://doi.org/10.1039/C9CP00435A for (p, pr) in zip([2.5, 50, 97.5], prev_result) - r = percentile(result, p) - @test abs(r - pr)/r < 0.025 # here, the new implementation reproduces the previous results within 2.5% + r = pquantile(result, p / 100.0) + @test abs(r - pr) / r < 0.025 # here, the new implementation reproduces the previous results within 2.5% end end @@ -52,7 +54,7 @@ end dat = joinpath(@__DIR__, "t2.dat") |> read_test_data x, y = dat[:, 1], dat[:, 2] # remove numeric noise from x and make grid uniform - x = x[1]:(diff(x) |> mean):x[end] |> collect + x = x[1]:(diff(x)|>mean):x[end] |> collect # integration workflow spec = crop(Curve(x, y), 2650.0, 2710.0) noise = NoiseSample(crop(Curve(x, y), 2500.0, 2600.0), 1) @@ -60,17 +62,17 @@ end uspec = add_noise(spec, nm) bnds = UncertainBound([2671.0, 2685.0], scale_shift_beta(2, 2, 4.75, 5.25), uspec) areas = mc_integrate(uspec, bnds, subtract_baseline=true) - result = areas[2]/areas[1] + result = areas[2] / areas[1] # compare percentiles to previous results p25_pr = 2.24e-01 # previous 2.5 percentile ... p50_pr = 5.27e-01 # previous 50 percentile ... p975_pr = 9.18e-01 # previous 97.5 percentiles from https://doi.org/10.1039/C9CP00435A # in case of methanol-d-3-methylphenylacetylene, the percentiles are less well reproduced # but the agreement is still better than 7% - r = percentile(result, 2.5) - @test abs(r - p25_pr)/r ≈ 0.0739333525 rtol = 1e-7 # worst disagreement (but for smallest number, 0.24 vs. 0.22 is not that bad ...) - r = percentile(result, 50) - @test abs(r - p50_pr)/r ≈ 0.000140994154 rtol = 1e-6 # the median is very well reproduced - r = percentile(result, 97.5) - @test abs(r - p975_pr)/r ≈ 0.04008646654 rtol = 1e-7 -end \ No newline at end of file + r = pquantile(result, 2.5 / 100.0) + @test abs(r - p25_pr) / r ≈ 0.070584472 rtol = 1e-7 # worst disagreement (but for smallest number, 0.24 vs. 0.22 is not that bad ...) + r = pquantile(result, 50 / 100.0) + @test abs(r - p50_pr) / r ≈ 0.0046179427 rtol = 1e-7 # the median is very well reproduced + r = pquantile(result, 97.5 / 100.0) + @test abs(r - p975_pr) / r ≈ 0.028591554 rtol = 1e-7 +end diff --git a/test/tmp.jl b/test/tmp.jl new file mode 100644 index 0000000..2fe7520 --- /dev/null +++ b/test/tmp.jl @@ -0,0 +1,143 @@ +module New + +import Base.iterate + +using Distributions +using NoisySignalIntegration +using NoisySignalIntegration: AbstractNoiseModel, lininterp +using Plots + +mutable struct LeftRightIterator{T} + curve::Curve{T} + left::T + right::T + index::Integer + next::Union{Nothing,Tuple{T,T}} + depleted::Bool +end + +function iterate_left_right(crv::Curve{T}, left::T, right::T) where {T} + left, right = min(left, right), max(left, right) + depleted = false + if crv.x[end] <= left || crv.x[1] >= right || left == right + depleted = true + end + LeftRightIterator{T}(crv, left, right, 1, nothing, depleted) +end + +function Base.iterate(lri::LeftRightIterator{T})::Union{Nothing,Tuple{Tuple{T,T},LeftRightIterator}} where {T} + lri.depleted && return nothing + if !isnothing(lri.next) + lri.depleted = true + return (lri.next, lri) + end + + while true + x1 = get(lri.curve.x, lri.index, nothing) + x2 = get(lri.curve.x, lri.index + 1, nothing) + y1 = get(lri.curve.y, lri.index, nothing) + y2 = get(lri.curve.y, lri.index + 1, nothing) + + lri.index += 1 + + # get state + state = let + left_between_x1x2 = (isnothing(x1) || isnothing(x2)) ? false : x1 < lri.left < x2 + right_between_x1x2 = (isnothing(x1) || isnothing(x2)) ? false : x1 < lri.right < x2 + in_bounds_x1 = isnothing(x1) ? nothing : lri.left <= x1 <= lri.right + in_bounds_x2 = isnothing(x2) ? nothing : lri.left <= x2 <= lri.right + (in_bounds_x1, in_bounds_x2, left_between_x1x2, right_between_x1x2) + end + + # common case where we are outside the bounds + # we have two sub-cases to deal with: + # x2 < a -> continue (is executed implicitly by doing nothing) + # x1 > b -> depleted + if state == (false, false, false, false) + if x1 > lri.right + @show lri.depleted = true + return nothing + end + # common case where we are inside the bounds + # but did not just enter them or are about to exit them + elseif state == (true, true, false, false) + return ((x1, y1), lri) + # moving inside the bounds + # x1 a x2 b -> return (a, y at a) + elseif state == (false, true, true, false) + return ((lri.left, lininterp(lri.left, x1, x2, y1, y2)), lri) + # exiting the bounds + # a x1 b x2 -> return (x1, y1) + # -> set next_b = Some((b, y at b)) + elseif state == (true, false, false, true) + lri.next = (lri.right, lininterp(lri.right, x1, x2, y1, y2)) + return ((x1, y1), lri) + # no elements left -> depleted + elseif state == (nothing, nothing, false, false) + lri.depleted = true + return nothing + # only one element left, but not inside bounds + # -> depleted (4 states) + elseif !state[1] && isnothing(state[2]) + lri.depleted = true + return nothing + # only one element left, which is inside bounds + # -> return (x1, y1) -> depleted (4 states) + elseif state[1] && isnothing(state[2]) + lri.depleted = true + return ((x1, y1), lri) + # rare case where x2 falls on a + # x1 x2 = a b -> continue + elseif state == (false, true, false, false) + continue + # rare case where x1 falls on b + # a x1 = b x2 -> return (x1, y1) + elseif state == (true, false, false, false) + return ((x1, y1), lri) + # very rare case where x1 a b x2 + # -> return (a, y at a) + # -> set next_b = Some((b, y at b)) + # (note that a == b is not possible, + # because this case is explicitly handled + # when the iterator is constructed) + elseif state == (false, false, true, true) + self.next = (lri.right, lininterp(lri.right, x1, x2, y1, y2)) + return ((lri.left, lininterp(lri.left, x1, x2, y1, y2)), lri) + # unreachable states + else + throw("Reached an iterator state that should be impossible, consider writing a bug report!") + end + end +end + +Base.iterate(lri::LeftRightIterator, _::LeftRightIterator) = Base.iterate(lri::LeftRightIterator) + +function update!(lri::LeftRightIterator{T}, crv::Curve{T}) where {T} + length(crv) != length(lri.curve) && throw("Can only update iterator with curve of same length!") + for i in 1:length(lri.curve) + lri.curve.x[i] = crv.x[i] + lri.curve.y[i] = crv.y[i] + end + lri.next = nothing + lri.index = 1 + lri.depleted = false +end + +function main() + spectrum = NoisySignalIntegration.testdata_1() + slice_bands = crop(spectrum, 5.0, 40.0) + slice_noise = crop(spectrum, 40.0, 100.0) + noise_model = fit_noise(NoiseSample(slice_noise, 3)) + ucrv = add_noise(slice_bands, noise_model) + cov = get_cov(noise_model, length(slice_bands)) + iter = iterate_left_right(slice_bands, 10.37, 20.52) + for i in 1:10000 + crv = NoisySignalIntegration.get_draw(i, ucrv) + update!(iter, crv) + for (x1, y1) in iter + # print(x1, " ", y1, "\r") + end + end +end + +end