diff --git a/Project.toml b/Project.toml index d877ed66..5e9c0e12 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.3.2" [deps] Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/README.md b/README.md index cdfb3b45..cac86db8 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ data, events = simulate( PinkNoise(), ); ``` -All components (design, components, onsets, noise) can be easily modified and you can simply plugin your own! +All simulation ingredients (design, components, onsets, noise) can be easily modified and you can simply plugin your own! ## Contributions Contributions of any kind are very welcome. Please have a look at [CONTRIBUTING.md](https://github.com/unfoldtoolbox/UnfoldSim.jl/blob/main/CONTRIBUTING.md) for guidance on contributing to UnfoldSim.jl. diff --git a/docs/literate/HowTo/multichannel.jl b/docs/literate/HowTo/multichannel.jl index 8aa5111f..50d4b5fa 100644 --- a/docs/literate/HowTo/multichannel.jl +++ b/docs/literate/HowTo/multichannel.jl @@ -1,10 +1,22 @@ +# # Generate multi channel data +# Here you will learn how to simulate EEG data for multiple channels/electrodes. +# The idea is to specify a signal on source level and then use a head model or a manual projection matrix to project the source signal to a number of electrodes. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages using UnfoldSim using UnfoldMakie using CairoMakie using DataFrames using Random - +# ```@raw html +#
+# ``` # ## Specifying a design @@ -17,7 +29,7 @@ c2 = LinearModelComponent(; basis = p300(), formula = @formula(0 ~ 1), β = [1]) # ## The multichannel component -# next similar to the nested design above, we can nest the component in a `MultichannelComponent`. We could either provide the projection marix manually, e.g.: +# Next, similar to the nested design above, we can nest the component in a `MultichannelComponent`. We could either provide the projection matrix manually, e.g.: mc = UnfoldSim.MultichannelComponent(c, [1, 2, -1, 3, 5, 2.3, 1]) # or maybe more convenient: use the pair-syntax: Headmodel=>Label which makes use of a headmodel (HaRTmuT is currently easily available in UnfoldSim) @@ -26,7 +38,7 @@ mc = UnfoldSim.MultichannelComponent(c, hart => "Left Postcentral Gyrus") mc2 = UnfoldSim.MultichannelComponent(c2, hart => "Right Occipital Pole") # !!! hint -# You could also specify a noise-specific component which is applied prior to projection & summing with other components +# You could also specify a noise-specific component which is applied prior to projection & summing with other components. # # finally we need to define the onsets of the signal onset = UniformOnset(; width = 20, offset = 4); diff --git a/docs/literate/HowTo/newComponent.jl b/docs/literate/HowTo/newComponent.jl index bb8bee72..91da1245 100644 --- a/docs/literate/HowTo/newComponent.jl +++ b/docs/literate/HowTo/newComponent.jl @@ -1,6 +1,12 @@ -# # New component: Duration + Shift +# # Define a new component (with variable duration and shift) -# We want a new component that changes its duration and shift depending on a column in the event-design. This is somewhat already implemented in the HRF + Pupil bases +# We want a new component that changes its duration and shift depending on a column in the event design. This is somewhat already implemented in the HRF + Pupil bases. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` using UnfoldSim using Unfold using Random @@ -8,6 +14,9 @@ using DSP using CairoMakie, UnfoldMakie sfreq = 100; +# ```@raw html +#
+# ``` # ## Design # Let's generate a design with two columns, shift + duration @@ -19,7 +28,8 @@ design = UnfoldSim.SingleSubjectDesign(; ) -# We also need a new AbstractComponent +# ## Implement a new AbstractComponent +# We also need a new `AbstractComponent` struct TimeVaryingComponent <: AbstractComponent basisfunction::Any maxlength::Any @@ -51,7 +61,7 @@ function basis_shiftduration(evts, maxlength) end end - +# ## Simulate data with the new component type erp = UnfoldSim.simulate( MersenneTwister(1), TimeVaryingComponent(basis_shiftduration, 50), diff --git a/docs/literate/HowTo/newDesign.jl b/docs/literate/HowTo/newDesign.jl index 955044d2..ec0fc256 100644 --- a/docs/literate/HowTo/newDesign.jl +++ b/docs/literate/HowTo/newDesign.jl @@ -1,13 +1,24 @@ +# # Define a new (imbalanced) design + +# A design specifies how much data is generated, and how the event-table(s) +# should be generated. Already implemented examples are `MultiSubjectDesign` and `SingleSubjectDesign`. + +# We need 3 things for a new design: a `struct<:AbstractDesign`, a `size` and a `generate` function. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` using UnfoldSim using StableRNGs using DataFrames using Parameters -# ## Define a new Design -# A design specifies how much data is generated, and how the event-table(s) -# should be generated. Already implemented examples are `MultiSubjectDesign` and `SingleSubjectDesign` -# -# We need 3 things for a new design: a `struct<:AbstractDesign`, a `size` and a `generate` function -# +# ```@raw html +#
+#
+# ``` + # #### 1) `type` # We need a `ImbalanceSubjectDesign` struct. You are free to implement it as you wish, as long as the other two functions are implemented # diff --git a/docs/literate/HowTo/predefinedData.jl b/docs/literate/HowTo/predefinedData.jl index 9a394e9b..5ed27f74 100644 --- a/docs/literate/HowTo/predefinedData.jl +++ b/docs/literate/HowTo/predefinedData.jl @@ -2,10 +2,20 @@ # Let's say you want to use the events data frame (containing the levels of the experimental variables and the event onsets (latencies)) from a previous study in your simulation. +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages using UnfoldSim using DataFrames using Random using CairoMakie # for plotting +# ```@raw html +#
+#
+# ``` # From a previous study, we (somehow, e.g. by using [pyMNE.jl](https://unfoldtoolbox.github.io/Unfold.jl/dev/HowTo/pymne/)) imported an event data frame like this: my_events = DataFrame(:condition => [:A, :B, :B, :A, :A], :latency => [7, 13, 22, 35, 41]) diff --git a/docs/literate/HowTo/repeatTrials.jl b/docs/literate/HowTo/repeatTrials.jl index 046bfb75..b85db6b0 100644 --- a/docs/literate/HowTo/repeatTrials.jl +++ b/docs/literate/HowTo/repeatTrials.jl @@ -1,12 +1,10 @@ -using UnfoldSim - - -# ## Repeating Design entries +# # [Get multiple trials with identical subject/item combinations](@id howto_repeat_design) # Sometimes we want to repeat a design, that is, have multiple trials with identical values, but it is not always straight forward to implement. # For instance, there is no way to easily modify `MultiSubjectDesign` to have multiple identical subject/item combinations, # without doing awkward repetitions of condition-levels or something. # If you struggle with this problem `RepeatDesign` is an easy tool for you: +using UnfoldSim designOnce = MultiSubjectDesign(; n_items = 2, diff --git a/docs/literate/HowTo/sequence.jl b/docs/literate/HowTo/sequence.jl new file mode 100644 index 00000000..986446a6 --- /dev/null +++ b/docs/literate/HowTo/sequence.jl @@ -0,0 +1,77 @@ +using Base: add_sum +using UnfoldSim +using CairoMakie +using StableRNGs + +# ## Stimulus - Response design + +# Let's say we want to simulate a stimulus response, followed by a button press response. +# +# First we generate the minimal design of the experiment by specifying our conditins (a one-condition-two-levels design in our case) +design = SingleSubjectDesign(conditions = Dict(:condition => ["one", "two"])) +generate_events(design) +# Next we use the `SequenceDesign` and nest our initial design in it. "`SR_`" is code for an "`S`" event and an "`R`" event - only single letter events are supported! The "`_`" is a signal for the Onset generator to generate a bigger pause - no overlap between adjacend "`SR`" pairs +design = SequenceDesign(design, "SR_", StableRNG(1)) +generate_events(design) +# The main thing that happened is that the design was repeated for every event (each 'letter') of the sequence, and an `eventtype` column was added. +# !!! hint +# more advaned sequences are possible as well, like "SR{1,3}", or "A[BC]". Infinite sequences are **not** possible like "AB*" + +# Finally, let's repeat the current design 4 times +design = RepeatDesign(design, 4) +generate_events(design) + +#design = UnfoldSim.AddSaccadeAmplitudeDesign4(design,:rt,Normal(0,1),MersenneTwister(1)) +#generate_events(design) + + +# This results in 16 trials that nicely follow our sequence + +# Next we have to specify for both events `S` and `R` what the responses should look like. +p1 = LinearModelComponent(; + basis = p100(), + formula = @formula(0 ~ 1 + condition), + β = [1, 0.5], +) + +n1 = LinearModelComponent(; + basis = n170(), + formula = @formula(0 ~ 1 + condition), + β = [1, 0.5], +) + +p3 = LinearModelComponent(; + basis = UnfoldSim.hanning(Int(0.5 * 100)), # sfreq = 100 for the other bases + formula = @formula(0 ~ 1 + condition), + β = [1, 0], +) + +resp = LinearModelComponent(; + basis = UnfoldSim.hanning(Int(0.5 * 100)), # sfreq = 100 for the other bases + formula = @formula(0 ~ 1 + condition), + β = [1, 2], + offset = -10, +) +nothing ## hide + + +# We combine them into a dictionary with a sequence-`Char` as key and simulate +components = Dict('S' => [p1, n1, p3], 'R' => [resp]) + +data, evts = simulate( + StableRNG(1), + design, + components, + UniformOnset(offset = 40, width = 10), + NoNoise(), +) +nothing ## hide + +# Finally we can plot the results +lines(data) +vlines!(evts.latency[evts.event.=='S'], color = (:darkblue, 0.5)) +vlines!(evts.latency[evts.event.=='R'], color = (:darkred, 0.5)) +xlims!(0, 500) +current_figure() + +# As visible, the `R` response always follows the `S` response. Due to the "`_`" we have large breaks between the individual sequences. \ No newline at end of file diff --git a/docs/literate/reference/basistypes.jl b/docs/literate/reference/basistypes.jl index c04d73cc..188951bd 100644 --- a/docs/literate/reference/basistypes.jl +++ b/docs/literate/reference/basistypes.jl @@ -1,16 +1,25 @@ -using UnfoldSim -using CairoMakie -using DSP -using StableRNGs - -# ## Basistypes +# # Overview: Basis function (component) types # There are several basis types directly implemented. They can be easily used for the `components`. # # !!! note # You can use any arbitrary shape defined by yourself! We often make use of `hanning(50)` from the DSP.jl package. +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages +using UnfoldSim +using CairoMakie +using DSP +using StableRNGs +# ```@raw html +#
+# ``` + # ## EEG -# By default, the EEG bases assume a sampling rate of 100, which can easily be changed by e.g. p100(;sfreq=300) +# By default, the EEG bases assume a sampling rate of 100, which can easily be changed by e.g. p100(; sfreq=300) f = Figure() ax = f[1, 1] = Axis(f) for b in [p100, n170, p300, n400] diff --git a/docs/literate/reference/designtypes.jl b/docs/literate/reference/designtypes.jl new file mode 100644 index 00000000..eca80e26 --- /dev/null +++ b/docs/literate/reference/designtypes.jl @@ -0,0 +1,107 @@ +# # Overview: Experimental design types + +# The experimental design specifies the experimental conditions and other variables that are supposed to have an influence on the simulated data. +# Currently, there are three types of designs implemented: `SingleSubjectDesign`, `MultiSubjectDesign` and `RepeatDesign`. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages +using UnfoldSim +using Random +# ```@raw html +#
+# ``` + +# ## Single-subject designs +# As the name suggests, the `SingleSubjectDesign` type can be used to specify the experimental design for a single subject. Using the `conditions` arguments, +# the user can specify all relevant conditions or predictors and their levels or value range. + +# The current implementation assumes a full factorial design (also called fully crossed design) +# in which each level of a factor occurs with each level of the other factors. Moreover, in the current implementation, there is exactly one instance of each of these factor combinations. + +# Example: +design_single = SingleSubjectDesign(; + conditions = Dict( + :stimulus_type => ["natural", "artificial"], + :contrast_level => range(0, 1, length = 3), + ), +); + +# In order to inspect the design, we can use the `generate_events` function to create an event table based on the design we specified. +generate_events(design_single) + +# To change the order of the trials e.g. to sort or shuffle them, one can use the `event_order_function` argument. +# Example: Randomize the order of trials +design_single_shuffled = SingleSubjectDesign(; + conditions = Dict( + :stimulus_type => ["natural", "artificial"], + :contrast_level => range(0, 1, length = 3), + ), + event_order_function = x -> shuffle(MersenneTwister(42), x), +); +# ```@raw html +#
+# Click to expand event table +# ``` +generate_events(design_single_shuffled) +# ```@raw html +#
+# ``` + +# ## Multi-subject designs +# The `MultiSubjectDesign` type can be used to simulate data for an experiment with multiple subjects. Internally, it uses the [MixedModelsSim.jl package](https://github.com/RePsychLing/MixedModelsSim.jl). +# One needs to specify the number of subjects `n_subjects` and the number of items `n_items` i.e. stimuli. +# In addition, one needs to decide for every experimental factor whether it should be between- or within-subject (and item). + +# !!! note +# For factors that are not listed in `items_between` it is assumed that they vary within-item (accordingly for `subjects_between`). + +design_multi = MultiSubjectDesign( + n_subjects = 6, + n_items = 4, + items_between = Dict(:colour => ["red", "blue"]), + subjects_between = Dict(:age_group => ["young", "old"]), + both_within = Dict(:luminance => range(0, 1, length = 3)), +); + +# ```@raw html +#
+# Click to expand event table +# ``` +generate_events(design_multi) +# ```@raw html +#
+#
+# ``` + +# As with the `SingleSubjectDesign` one can use the `event_order_function` argument to determine the order of events/trials. + +# !!! important +# The number of subjects/items has to be a divisor of the number of factor level combinations, i.e. it is assumed that the design is balanced +# which means that there is an equal number of observations for all possible factor level combinations. + +# ## Repeat designs +# The `RepeatDesign` type is a functionality to encapsulate single- or multi-subject designs. It allows to repeat a generated event table multiple times. +# In other words, the `RepeatDesign` type allows to have multiple instances of the same item/subject/factor level combination. + +# Example: +# Assume, we have the following single-subject design from above: +# ```@raw html +#
+# Click to expand event table +# ``` +generate_events(design_single) +# ```@raw html +#
+#
+# ``` + + +# But instead of having only one instance of the factor combinations e.g. `stimulus_type`: `natural` and `contrast_level`: `0`, we will repeat the design three times such that there are three occurrences of each combination. +design_repeated = RepeatDesign(design_single, 3); +generate_events(design_repeated) + +# [Here](@ref howto_repeat_design) one can find another example of how to repeat design entries for multi-subject designs. diff --git a/docs/literate/reference/noisetypes.jl b/docs/literate/reference/noisetypes.jl index fe3efc4a..03d35b4b 100644 --- a/docs/literate/reference/noisetypes.jl +++ b/docs/literate/reference/noisetypes.jl @@ -1,10 +1,16 @@ +# # Overview: Noise types + +# There are different types of noise signals which differ in their power spectra. +# If you are not familiar with different types/colors of noise yet, have a look at the[colors of noise Wikipedia page](https://en.wikipedia.org/wiki/Colors_of_noise). + +# There are several noise types directly implemented in UnfoldSim.jl. Here is a comparison: + + using UnfoldSim using CairoMakie using DSP using StableRNGs import StatsBase.autocor -# ## What's the noise? -# There are several noise-types directly implemented. Here is a comparison: f = Figure() ax_sig = diff --git a/docs/literate/reference/onsettypes.jl b/docs/literate/reference/onsettypes.jl index 279a2226..305f946d 100644 --- a/docs/literate/reference/onsettypes.jl +++ b/docs/literate/reference/onsettypes.jl @@ -1,13 +1,13 @@ -# # Onset types +# # Overview: Onset types # The onset types determine the distances between event onsets in the continuous EEG signal. The distances are sampled from a certain probability distribution. # Currently, there are two types of onset distributions implemented: `UniformOnset` and `LogNormalOnset`. -# ## Setup +# ### Setup # ```@raw html #
# Click to expand # ``` - +## Load required packages using UnfoldSim using CairoMakie using Random @@ -58,11 +58,11 @@ let # hide design, # hide ) # hide - hist!( - ax, - distances, - bins = range(0, 100, step = 1), - label = "($width, $offset)", + hist!( # hide + ax, # hide + distances, # hide + bins = range(0, 100, step = 1), # hide + label = "($width, $offset)", # hide ) # hide if label == "offset" && offset != 0 # hide @@ -282,3 +282,25 @@ end # - if `offset` < `length(signal.basis)` -> there might be overlap, depending on the other parameters of the onset distribution # [^1]: Wikipedia contributors. (2023, December 5). Log-normal distribution. In Wikipedia, The Free Encyclopedia. Retrieved 12:27, December 7, 2023, from https://en.wikipedia.org/w/index.php?title=Log-normal_distribution&oldid=1188400077# + + + + +# ## Design-dependent `FormulaXOnset` + +# For additional control we provide `FormulaUniformOnset` and `FormulaLogNormalOnset` types, that allow to control all parameters by specifying formulas +o = UnfoldSim.FormulaUniformOnset( + width_formula = @formula(0 ~ 1 + cond), + width_β = [50, 20], +) +events = generate_events(design) +onsets = UnfoldSim.simulate_interonset_distances(MersenneTwister(42), o, design) + +f = Figure() +ax = f[1, 1] = Axis(f) +hist!(ax, onsets[events.cond.=="A"], bins = range(0, 100, step = 1), label = "cond: A") +hist!(ax, onsets[events.cond.=="B"], bins = range(0, 100, step = 1), label = "cond: B") +axislegend(ax) +f + +# Voila - the inter-onset intervals are `20` samples longer for condition `B`, exactly as specified.` \ No newline at end of file diff --git a/docs/literate/reference/overview.jl b/docs/literate/reference/overview.jl index 80145bcf..74cf3bc1 100644 --- a/docs/literate/reference/overview.jl +++ b/docs/literate/reference/overview.jl @@ -1,7 +1,17 @@ # # Overview of functionality -# UnfoldSim has many modules, here we try to collect them to provide you with an overview. +# A UnfoldSim simulation has four ingredients: Design, Component, Onset and Noise. Here we provide a short overview of the implemented types. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages using UnfoldSim using InteractiveUtils +# ```@raw html +#
+# ``` # ## Design # Designs define the experimental design. They can be nested, e.g. `RepeatDesign(SingleSubjectDesign,10)` would repeat the generated design-dataframe 10x. diff --git a/docs/literate/tutorials/multisubject.jl b/docs/literate/tutorials/multisubject.jl index b63e58d4..61570240 100644 --- a/docs/literate/tutorials/multisubject.jl +++ b/docs/literate/tutorials/multisubject.jl @@ -1,10 +1,23 @@ +# # Multi-subject simulation + +# In this tutorial, you will learn how to simulate data for multiple subjects. In particular, you will learn how to specify fixed and random effects and what their influence on the simulated data looks like. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages using UnfoldSim using Unfold using CairoMakie using UnfoldMakie using DataFrames +# ```@raw html +#
+#
+# ``` -# # Multi-subject simulation # Similar to the single subject case, multi-subject simulation depends on: # - `Design` (typically a `MultiSubjectDesign`) # - `Components` (typically a `MixedModelComponent`) diff --git a/docs/literate/tutorials/poweranalysis.jl b/docs/literate/tutorials/poweranalysis.jl index bbe7f1a5..fbc79b85 100644 --- a/docs/literate/tutorials/poweranalysis.jl +++ b/docs/literate/tutorials/poweranalysis.jl @@ -1,14 +1,24 @@ +# # Power analysis +# For a power analysis, we will repeatedly simulate data, and check whether we can find a significant effect. +# We perform the power analysis on epoched data. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages using UnfoldSim using Statistics using HypothesisTests using DataFrames using Random +# ```@raw html +#
+#
+# ``` - -# ## Simple Poweranalysis Script -# For a power analysis, we will repeatedly simulate data, and check whether we can find a significant effect. -# -# We perform the power analysis on epoched data. +# ## Simulation loop pvals = fill(NaN, 100) @time for seed in eachindex(pvals) ## Simulate data of 30 subjects @@ -33,5 +43,5 @@ pvals = fill(NaN, 100) pvals[seed] = pvalue(OneSampleTTest(y_big, y_small)) end -# let's calculate the power +# Let's calculate the power power = mean(pvals .< 0.05) * 100 diff --git a/docs/literate/tutorials/quickstart.jl b/docs/literate/tutorials/quickstart.jl index 86896185..0270a9a8 100644 --- a/docs/literate/tutorials/quickstart.jl +++ b/docs/literate/tutorials/quickstart.jl @@ -1,43 +1,67 @@ -using UnfoldSim -using Random -using CairoMakie +# # Quickstart +# To get started with data simulation, the user needs to provide four ingredients: +# 1) an experimental design, defining which conditions and how many events/"trials" exist +# 2) an event basis function, defining the simulated event-related response for every event (e.g. the ERP shape in EEG) +# 3) an inter-onset event distribution, defining the distances in time of the event sequence +# 4) a noise specification, defining the type of noise signal that is added to the simulated signal (e.g. pink noise) # !!! tip # Use `subtypes(AbstractNoise)` (or `subtypes(AbstractComponent)` etc.) to find already implemented building blocks. -# ## "Experimental" Design -# Define a 1 x 2 design with 20 trials. That is, one condition (`condaA`) with two levels. +# ## Specify the simulation ingredients + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages +using UnfoldSim +using Random # to get an RNG +using CairoMakie # for plotting + +# ```@raw html +#
+# ``` + +# ### Experimental Design +# Define a 1 x 2 design with 20 trials. That is, one condition (`cond_A`) with two levels. design = - SingleSubjectDesign(; conditions = Dict(:condA => ["levelA", "levelB"])) |> + SingleSubjectDesign(; conditions = Dict(:cond_A => ["level_A", "level_B"])) |> x -> RepeatDesign(x, 10); -# #### Component / Signal +# ### Event basis function (Component) # Define a simple component and ground truth simulation formula. Akin to ERP components, we call one simulation signal a component. # # !!! note # You could easily specify multiple components by providing a vector of components, which are automatically added at the same onsets. This procedure simplifies to generate some response that is independent of simulated condition, whereas other depends on it. signal = LinearModelComponent(; basis = [0, 0, 0, 0.5, 1, 1, 0.5, 0, 0], - formula = @formula(0 ~ 1 + condA), + formula = @formula(0 ~ 1 + cond_A), β = [1, 0.5], ); -# #### Onsets and Noise -# We will start with a uniform (but overlapping, `offset` < `length(signal.basis)`) onset-distribution +# ### Onsets and Noise +# We will start with a uniform (but overlapping, `offset` < `length(signal.basis)`) inter-onset distribution. onset = UniformOnset(; width = 20, offset = 4); # And we will use some noise noise = PinkNoise(; noiselevel = 0.2); # ## Combine & Generate -# Finally, we will simulate some data +# Finally, we will combine all ingredients and simulate some data. data, events = simulate(MersenneTwister(1), design, signal, onset, noise); -# `Data` is a `n-sample` Vector (but could be a Matrix for e.g. `MultiSubjectDesign`). +# `data` is a `n-sample` Vector (but could be a Matrix for e.g. `MultiSubjectDesign` or epoched data). -# `Events` is a DataFrame that contains a column `latency` with the onsets of events. +# `events` is a DataFrame that contains a column `latency` with the onsets of events (in samples). # ## Plot them! lines(data; color = "black") -vlines!(events.latency; color = ["orange", "teal"][1 .+ (events.condA.=="levelB")]) +vlines!(events.latency; color = ["orange", "teal"][1 .+ (events.cond_A.=="level_B")]) + +current_axis().title = "Simulated data" +current_axis().xlabel = "Time [samples]" +current_axis().ylabel = "Amplitude [μV]" + current_figure() diff --git a/docs/literate/tutorials/simulateERP.jl b/docs/literate/tutorials/simulateERP.jl index b3798096..c71f613d 100644 --- a/docs/literate/tutorials/simulateERP.jl +++ b/docs/literate/tutorials/simulateERP.jl @@ -1,12 +1,33 @@ +# # Simulate event-related potentials (ERPs) + +# One subfield of EEG research focuses on so-called event-related potentials (ERPs) which are defined as brain responses time-locked to a certain event e.g. stimulus onset. +# The waveform of an ERP usually consists of multiple ERP components which denote the peaks and troughs of the waveform. + +# ERP components are characterized (and named) by their timing relative to the event, their polarity (positive or negative) and their scalp topography. +# For example, the N170 describes a negative deflection which occurrs roughly 170 ms after the onset of (certain) visual stimuli. +# Often, researchers are interested how a component (e.g. its amplitude or timing) changes depending on certain experimental factors. +# For example, N170 has been shown to be related to face processing and its amplitude is modulated by whether the stimulus is a face or an object e.g. a car. +# ([Source](https://neuraldatascience.io/7-eeg/components.html)) + +# Here we will learn how to simulate a typical ERP complex with P100, N170, P300. + +# ### Setup +# ```@raw html +#
+# Click to expand +# ``` +## Load required packages using UnfoldSim using CairoMakie using Random using Unfold using UnfoldMakie -# ## ERP Complex -# Here we will learn how to simulate a typical ERP complex with P100, N170, P300. +# ```@raw html +#
+# ``` -# Let's grab a SingleSubjectDesign and add a continuous predictor +# ## Simulation +# Let's grab a `SingleSubjectDesign` and add a continuous predictor design = SingleSubjectDesign(; conditions = Dict( diff --git a/docs/make.jl b/docs/make.jl index ffba630f..a6655831 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -19,7 +19,7 @@ DocMeta.setdocmeta!(UnfoldSim, :DocTestSetup, :(using UnfoldSim); recursive = tr makedocs(; modules = [UnfoldSim], - authors = "Luis Lips, Benedikt Ehinger, Judith Schepers", + authors = "Judith Schepers, Luis Lips, Maanik Marathe, Benedikt Ehinger", #repo="https://github.com/unfoldtoolbox/UnfoldSim.jl/blob/{commit}{path}#{line}", repo = Documenter.Remotes.GitHub("unfoldtoolbox", "UnfoldSim.jl"), sitename = "UnfoldSim.jl", @@ -27,30 +27,33 @@ makedocs(; prettyurls = get(ENV, "CI", "false") == "true", canonical = "https://unfoldtoolbox.github.io/UnfoldSim.jl", edit_link = "main", + sidebar_sitename = false, assets = String[], ), pages = [ "Home" => "index.md", "Tutorials" => [ "Quickstart" => "generated/tutorials/quickstart.md", - "Simulate ERPs" => "generated/tutorials/simulateERP.md", - "Poweranalysis" => "generated/tutorials/poweranalysis.md", + "Simulate event-related potentials (ERPs)" => "generated/tutorials/simulateERP.md", + "Power analysis" => "generated/tutorials/poweranalysis.md", "Multi-subject simulation" => "generated/tutorials/multisubject.md", ], "Reference" => [ - "Overview: Toolbox Functions" => "./generated/reference/overview.md", - "Overview: NoiseTypes" => "./generated/reference/noisetypes.md", - "Overview: OnsetTypes" => "./generated/reference/onsettypes.md", - "Overview: Components (EEG, fMRI, Pupil)" => "./generated/reference/basistypes.md", + "Overview of functionality" => "./generated/reference/overview.md", + "Overview: Experimental design types" => "./generated/reference/designtypes.md", + "Overview: Basis function (component) types" => "./generated/reference/basistypes.md", + "Overview: Onset types" => "./generated/reference/onsettypes.md", + "Overview: Noise types" => "./generated/reference/noisetypes.md", ], "HowTo" => [ - "Define a new, (imbalanced) design" => "./generated/HowTo/newDesign.md", - "Repeating a design" => "./generated/HowTo/repeatTrials.md", - "Define a new duration & jitter component" => "./generated/HowTo/newComponent.md", + "Define a new (imbalanced) design" => "./generated/HowTo/newDesign.md", + "Get multiple trials with identical subject/item combinations" => "./generated/HowTo/repeatTrials.md", + "Define a new component (with variable duration and shift)" => "./generated/HowTo/newComponent.md", "Generate multi channel data" => "./generated/HowTo/multichannel.md", - "Use predefined design / onsets data" => "./generated/HowTo/predefinedData.md", + "Use existing experimental designs & onsets in the simulation" => "./generated/HowTo/predefinedData.md", + "Produce specific sequences of events" => "./generated/HowTo/sequence.md", ], - "API / DocStrings" => "api.md", + "API / Docstrings" => "api.md", ], ) diff --git a/docs/src/assets/logo.png b/docs/src/assets/logo.png new file mode 100644 index 00000000..6793fe6c Binary files /dev/null and b/docs/src/assets/logo.png differ diff --git a/docs/src/index.md b/docs/src/index.md index 415199d4..e18f03e6 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,14 +2,14 @@ CurrentModule = UnfoldSim ``` -# UnfoldSim +# UnfoldSim.jl -Documentation for [UnfoldSim](https://github.com/behinger/UnfoldSim.jl). +Documentation for [UnfoldSim.jl](https://github.com/unfoldtoolbox/UnfoldSim.jl): a Julia package for simulating multivariate timeseries data with a special focus on EEG data. -## Start simulating timeseries -We offer some predefined signals, check them out! +## Start simulating time series data +We offer some predefined (EEG) signals, check them out! -For instance an P1/N170/P300 complex. +For instance a P1/N170/P300 complex (containing three typical ERP components). ```@example main using UnfoldSim using CairoMakie # plotting diff --git a/src/UnfoldSim.jl b/src/UnfoldSim.jl index cd1b0287..126264b8 100644 --- a/src/UnfoldSim.jl +++ b/src/UnfoldSim.jl @@ -14,6 +14,7 @@ using LinearAlgebra using ToeplitzMatrices # for AR Expo. Noise "Circulant" using StatsModels using HDF5, Artifacts, FileIO +using Automa # for sequence using LinearAlgebra # headmodel @@ -30,6 +31,7 @@ include("onset.jl") include("predefinedSimulations.jl") include("headmodel.jl") include("helper.jl") +include("sequence.jl") include("bases.jl") export size, length @@ -46,7 +48,7 @@ export Simulation export MixedModelComponent, LinearModelComponent # export designs -export MultiSubjectDesign, SingleSubjectDesign, RepeatDesign +export MultiSubjectDesign, SingleSubjectDesign, RepeatDesign, SequenceDesign # noise functions export PinkNoise, RedNoise, WhiteNoise, NoNoise, ExponentialNoise #,RealNoise (not implemented yet) diff --git a/src/component.jl b/src/component.jl index 6c94e8b1..65ef5e38 100644 --- a/src/component.jl +++ b/src/component.jl @@ -21,14 +21,17 @@ MixedModelComponent(; ``` """ +# backwards compatability after introducing the `offset` field` @with_kw struct MixedModelComponent <: AbstractComponent basis::Any formula::Any # e.g. 0~1+cond β::Vector σs::Dict # Dict(:subject=>[0.5,0.4]) or to specify correlationmatrix Dict(:subject=>[0.5,0.4,I(2,2)],...) contrasts::Dict = Dict() + offset::Int = 0 end - +MixedModelComponent(basis, formula, β, σs, contrasts) = + MixedModelComponent(basis, formula, β, σs, contrasts, 0) """ A multiple regression component for one subject @@ -50,13 +53,32 @@ LinearModelComponent(; ``` """ +# backwards compatability after introducing the `offset` field @with_kw struct LinearModelComponent <: AbstractComponent basis::Any formula::Any # e.g. 0~1+cond - left side must be "0" β::Vector contrasts::Dict = Dict() + offset::Int = 0 end +LinearModelComponent(basis, formula, β, contrasts) = + LinearModelComponent(basis, formula, β, contrasts, 0) +""" + offset(AbstractComponent) + +Should the `basis` be shifted? Returns c.offset for most components, if not implemented for a type, returns 0. Can be positive or negative, but has to be Integer +""" +offset(c::AbstractComponent)::Int = 0 +offset(c::LinearModelComponent)::Int = c.offset +offset(c::MixedModelComponent)::Int = c.offset + +maxoffset(c::Vector{<:AbstractComponent}) = maximum(offset.(c)) +maxoffset(d::Dict{<:Char,<:Vector{<:AbstractComponent}}) = maximum(maxoffset.(values(d))) +minoffset(c::Vector{<:AbstractComponent}) = minimum(offset.(c)) +minoffset(d::Dict{<:Char,<:Vector{<:AbstractComponent}}) = minimum(minoffset.(values(d))) + + """ Wrapper for an `AbstractComponent` to project it to multiple target-channels via `projection`. optional adds `noise` to the source prior to projection. @@ -94,15 +116,22 @@ For `MultichannelComponent` return the length of the projection vector. """ n_channels(c::MultichannelComponent) = length(c.projection) + """ For a vector of `MultichannelComponent`s, return the first but asserts all are of equal length. """ function n_channels(c::Vector{<:AbstractComponent}) all_channels = n_channels.(c) - @assert length(unique(all_channels)) == 1 "Error - projections of different channels cannot be different from eachother" + @assert length(unique(all_channels)) == 1 "Error - projections of different components have to be of the same output (=> channel) dimension" return all_channels[1] end +function n_channels(components::Dict) + all_channels = [n_channels(c) for c in values(components)] + @assert length(unique(all_channels)) == 1 "Error - projections of different components have to be of the same output (=> channel) dimension" + return all_channels[1] + +end """ simulate_component(rng,c::MultichannelComponent,design::AbstractDesign) Return the projection of a component from source to "sensor" space. @@ -124,10 +153,13 @@ Base.length(c::AbstractComponent) = length(c.basis) """ maxlength(c::Vector{<:AbstractComponent}) = maximum(length.(c)) + maxlength(components::Dict) maximum of individual component lengths """ maxlength(c::Vector{<:AbstractComponent}) = maximum(length.(c)) + +maxlength(components::Dict) = maximum([maximum(length.(c)) for c in values(components)]) """ simulate_component(rng, c::AbstractComponent, simulation::Simulation) By default call `simulate_component` with `(::Abstractcomponent,::AbstractDesign)` instead of the whole simulation. This allows users to provide a hook to do something completely different :) @@ -136,7 +168,7 @@ simulate_component(rng, c::AbstractComponent, simulation::Simulation) = simulate_component(rng, c, simulation.design) """ - simulate_component(rng, c::AbstractComponent, simulation::Simulation) + simulate_component(rng, c::LinearModelComponent, design::AbstractDesign) Generate a linear model design matrix, weight it by c.β and multiply the result with the given basis vector. julia> c = UnfoldSim.LinearModelComponent([0,1,1,0],@formula(0~1+cond),[1,2],Dict()) @@ -145,22 +177,32 @@ julia> simulate_component(StableRNG(1),c,design) """ function simulate_component(rng, c::LinearModelComponent, design::AbstractDesign) events = generate_events(design) + X = generate_designmatrix(c.formula, events, c.contrasts) + y = X * c.β + + return y' .* c.basis +end + +""" +Helper function to generate a designmatrix from formula, events and contrasts. +""" +function generate_designmatrix(formula, events, contrasts) # special case, intercept only # https://github.com/JuliaStats/StatsModels.jl/issues/269 - if c.formula.rhs == ConstantTerm(1) + if formula.rhs == ConstantTerm(1) X = ones(nrow(events), 1) else - if isempty(c.contrasts) - m = StatsModels.ModelFrame(c.formula, events) + if isempty(contrasts) + m = StatsModels.ModelFrame(formula, events) else - m = StatsModels.ModelFrame(c.formula, events; contrasts = c.contrasts) + m = StatsModels.ModelFrame(formula, events; contrasts = contrasts) end X = StatsModels.modelmatrix(m) end - y = X * c.β - return y' .* c.basis + return X end + """ simulate_component(rng, c::MixedModelComponent, design::AbstractDesign) Generates a MixedModel and simulates data according to c.β and c.σs. @@ -173,13 +215,13 @@ Currently, it is not possible to use a different basis for fixed and random effe julia> design = MultiSubjectDesign(;n_subjects=2,n_items=50,items_between=(;:cond=>["A","B"])) julia> c = UnfoldSim.MixedModelComponent([0.,1,1,0],@formula(0~1+cond+(1|subject)),[1,2],Dict(:subject=>[2],),Dict()) -julia> simulate(StableRNG(1),c,design) +julia> simulate_component(StableRNG(1),c,design) """ function simulate_component( rng, c::MixedModelComponent, - design::AbstractDesign; + design::AbstractDesign, return_parameters = false, ) events = generate_events(design) @@ -299,30 +341,104 @@ function simulate_responses( components::Vector{<:AbstractComponent}, simulation::Simulation, ) - if n_channels(components) > 1 - epoch_data = - zeros(n_channels(components), maxlength(components), length(simulation.design)) - else - epoch_data = zeros(maxlength(components), length(simulation.design)) - end + epoch_data = init_epoch_data(components, simulation.design) + simulate_responses!(rng, epoch_data, components, simulation) + return epoch_data +end +function simulate_responses!( + rng, + epoch_data::AbstractArray, + components::Vector, + simulation::Simulation, +) for c in components simulate_and_add!(epoch_data, c, simulation, rng) end return epoch_data end +function init_epoch_data(components, design) + max_offset = maxoffset(components) + min_offset = minoffset(components) + range_offset = (max_offset - min_offset) + if n_channels(components) > 1 + epoch_data = zeros( + n_channels(components), + maxlength(components) + range_offset, + length(design), + ) + else + epoch_data = zeros(maxlength(components) + range_offset, length(design)) + end + return epoch_data +end +function simulate_responses(rng, event_component_dict::Dict, s::Simulation) + #@debug rng.state + epoch_data = init_epoch_data(event_component_dict, s.design) + #@debug rng.state + evts = generate_events(s.design) + #@debug rng.state + @debug size(epoch_data), size(evts) + multichannel = n_channels(event_component_dict) > 1 + for key in keys(event_component_dict) + if key == '_' + continue + end + s_key = Simulation( + s.design |> x -> SubselectDesign(x, key), + event_component_dict, + s.onset, + s.noisetype, + ) + ix = evts.event .== key + if multichannel + simulate_responses!( + rng, + @view(epoch_data[:, :, ix]), + event_component_dict[key], + s_key, + ) + else + #@debug sum(ix), size(simulate_responses(rng, event_component_dict[key], s_key)), key + simulate_responses!( + rng, + @view(epoch_data[:, ix]), + event_component_dict[key], + s_key, + ) + end + end + return epoch_data +end """ simulate_and_add!(epoch_data::AbstractMatrix, c, simulation, rng) simulate_and_add!(epoch_data::AbstractArray, c, simulation, rng) Helper function to call `simulate_component` and add it to a provided Array. """ -function simulate_and_add!(epoch_data::AbstractMatrix, c, simulation, rng) +function simulate_and_add!( + epoch_data::AbstractMatrix, + component::AbstractComponent, + simulation, + rng, +) @debug "matrix" - @views epoch_data[1:length(c), :] .+= simulate_component(rng, c, simulation) + + off = offset(component) - minoffset(simulation.components) + + + @views epoch_data[1+off:length(component)+off, :] .+= + simulate_component(rng, component, simulation) end -function simulate_and_add!(epoch_data::AbstractArray, c, simulation, rng) +function simulate_and_add!( + epoch_data::AbstractArray, + component::AbstractComponent, + simulation, + rng, +) @debug "3D Array" - @views epoch_data[:, 1:length(c), :] .+= simulate_component(rng, c, simulation) + off = offset(component) - minoffset(simulation.components) + @views epoch_data[:, 1+off:length(component)+off, :] .+= + simulate_component(rng, component, simulation) end diff --git a/src/design.jl b/src/design.jl index 42e02328..740217bb 100644 --- a/src/design.jl +++ b/src/design.jl @@ -1,6 +1,9 @@ """ - MultiSubjectDesign + MultiSubjectDesign <: AbstractDesign +A type for specifying the experimental design for multiple subjects (based on the given random-effects structure). + +### Fields - `n_subjects`::Int -> number of subjects - `n_items`::Int -> number of items (sometimes ≈trials) - `subjects_between` = Dict{Symbol,Vector} -> effects between subjects, e.g. young vs old @@ -8,10 +11,10 @@ - `both_within` = Dict{Symbol,Vector} -> effects completly crossed - `event_order_function` = `x->x`; # can be used to sort, or e.g. `x->shuffle(MersenneTwister(42),x)` - be sure to fix/update the rng accordingly!! -tipp: check the resulting dataframe using `generate_events(design)` - +Tip: Check the resulting dataframe using `generate_events(design)` +### Example ```julia # declaring same condition both sub-between and item-between results in a full between subject/item design design = MultiSubjectDesign(; @@ -21,6 +24,7 @@ design = MultiSubjectDesign(; items_between = Dict(:cond => ["levelA", "levelB"]), ); ``` +See also [`SingleSubjectDesign`](@ref), [`RepeatDesign`](@ref) """ @with_kw struct MultiSubjectDesign <: AbstractDesign n_subjects::Int @@ -33,6 +37,11 @@ end """ + SingleSubjectDesign <: AbstractDesign + +A type for specifying the experimental for a single subject (based on the given conditions). + +### Fields - conditions = Dict{Symbol,Vector} of conditions, e.g. `Dict(:A=>["a_small","a_big"],:B=>["b_tiny","b_large"])` - `event_order_function` = x->x; # can be used to sort, or x->shuffle(MersenneTwister(42),x) - be sure to fix/update the rng accordingly!! @@ -42,7 +51,17 @@ To increase the number of repetitions simply use `RepeatDesign(SingleSubjectDesi If conditions are omitted (or set to `nothing`), a single trial is simulated with a column `:dummy` and content `:dummy` - this is for convenience. -tipp: check the resulting dataframe using `generate_events(design)` +Tip: Check the resulting dataframe using `generate_events(design)` + +### Example +```julia +design = SingleSubjectDesign(; + conditions = Dict( + :stimulus_type => ["natural", "artificial"], + :contrast_level => range(0, 1, length = 5), +); +``` +See also [`MultiSubjectDesign`](@ref), [`RepeatDesign`](@ref) """ @with_kw struct SingleSubjectDesign <: AbstractDesign conditions::Dict{Symbol,Vector} = Dict() @@ -133,7 +152,7 @@ length(design::AbstractDesign) = *(size(design)...) # ---- """ - RepeatDesign{T} + RepeatDesign{T} <: AbstractDesign Repeat a design DataFrame multiple times to mimick repeatedly recorded trials. ```julia @@ -146,25 +165,133 @@ designOnce = MultiSubjectDesign(; design = RepeatDesign(designOnce,4); ``` +See also [`SingleSubjectDesign`](@ref), [`MultiSubjectDesign`](@ref) """ @with_kw struct RepeatDesign{T} <: AbstractDesign design::T repeat::Int = 1 end + +function check_sequence(s::String) + blankfind = findall('_', s) + @assert length(blankfind) <= 1 && (length(blankfind) == 0 || length(s) == blankfind[1]) "the blank-indicator '_' has to be the last sequence element" + return s +end + + +""" + SequenceDesign{T} <: AbstractDesign +Enforce a sequence of events for each entry of a provided `AbstractDesign`. +The sequence string can contain any number of `char`, but the `_` character is used to indicate a break between events without any overlap. + + +```julia +design = SingleSubjectDesign(conditions = Dict(:condition => ["one", "two"])) +design = SequenceDesign(design, "SCR_", StableRNG(1)) +``` +Would result in a `generate_events(design)` +```repl +6×2 DataFrame + Row │ condition event + │ String Char +─────┼────────────────── + 1 │ one S + 2 │ one C + 3 │ one R + 4 │ two S + 5 │ two C + 6 │ two R +``` + +See also [`SingleSubjectDesign`](@ref), [`MultiSubjectDesign`](@ref), [`RepeatDesign`](@ref) +""" +@with_kw struct SequenceDesign{T} <: AbstractDesign + design::T + sequence::String = "" + sequencelength::Int = 0 + rng = nothing + + SequenceDesign{T}(d, s, sl, r) where {T<:AbstractDesign} = + new(d, check_sequence(s), sl, r) +end +SequenceDesign(design, sequence, rng::AbstractRNG) = + SequenceDesign(design = design, sequence = sequence, rng = rng) +SequenceDesign(design, sequence) = SequenceDesign(design = design, sequence = sequence) + +generate_events(design::SequenceDesign{MultiSubjectDesign}) = error("not yet implemented") + + +generate_events(rng, design::AbstractDesign) = generate_events(design) +generate_events(design::SequenceDesign) = generate_events(deepcopy(design.rng), design) + +function generate_events(rng, design::SequenceDesign) + df = generate_events(design.design) + nrows_df = size(df, 1) + + rng = if isnothing(rng) + @warn "Could not (yet) find an rng for `SequenceDesign` - ignore this message if you called `generate_events` yourself, be worried if you called `simulate` and still see this message. Surpress this message by defining the `rng` when creating the `SequenceDesign`" + MersenneTwister(1) + else + rng + end + # @debug design.sequence + currentsequence = sequencestring(rng, design.sequence) + # @debug currentsequence + currentsequence = replace(currentsequence, "_" => "") + df = repeat(df, inner = length(currentsequence)) + + df.event .= repeat(collect(currentsequence), nrows_df) + + return df + +end + + + +get_rng(design::AbstractDesign) = nothing +get_rng(design::SequenceDesign) = design.rng + """ - UnfoldSim.generate_events(design::RepeatDesign{T}) + generate_events(rng,design::RepeatDesign{T}) In a repeated design, iteratively calls the underlying {T} Design and concatenates. In case of MultiSubjectDesign, sorts by subject. """ -function UnfoldSim.generate_events(design::RepeatDesign) - df = map(x -> generate_events(design.design), 1:design.repeat) |> x -> vcat(x...) +function generate_events(design::RepeatDesign) + design = deepcopy(design) + df = + map(x -> generate_events(get_rng(design.design), design.design), 1:design.repeat) |> + x -> vcat(x...) if isa(design.design, MultiSubjectDesign) sort!(df, [:subject]) end return df end + + +""" +Internal helper design to subset a sequence design in its individual components +""" +struct SubselectDesign{T} <: AbstractDesign + design::T + key::Char +end + +function generate_events(design::SubselectDesign) + return subset(generate_events(design.design), :event => x -> x .== design.key) +end + + Base.size(design::RepeatDesign{MultiSubjectDesign}) = size(design.design) .* (design.repeat, 1) Base.size(design::RepeatDesign{SingleSubjectDesign}) = size(design.design) .* design.repeat +#Base.size(design::SequenceDesign) = +#size(design.design) .* length(replace(design.sequence, "_" => "",r"\{.*\}"=>"")) + +#Base.size(design::) = size(design.design) .* design.repeat + +# No way to find out what size it is without actually generating first... +Base.size( + design::Union{<:SequenceDesign,<:SubselectDesign,<:RepeatDesign{<:SequenceDesign}}, +) = size(generate_events(design), 1) \ No newline at end of file diff --git a/src/noise.jl b/src/noise.jl index 6b7b8592..fcd376ec 100644 --- a/src/noise.jl +++ b/src/noise.jl @@ -43,7 +43,7 @@ end """ RealisticNoise <: AbstractNoise -Not implemented - planned to use Artefacts.jl to provide real EEG data to add. +Not implemented - planned to use Artifacts.jl to provide real EEG data to add. """ @with_kw struct RealisticNoise <: AbstractNoise noiselevel = 1 @@ -72,7 +72,7 @@ Noise with exponential decay in AR spectrum. `noiselevel` is used to scale the noise !!! warning - Current implementation: Cholesky of NxN matrix needs to be calculated, which might need lots of RAM. + With the current implementation we try to get exponential decay over the whole autoregressive (AR) spectrum, which is N samples (the total number of samples in the signal) long. This involves the inversion of a Cholesky matrix of size NxN matrix, which will need lots of RAM for non-trivial problems. """ @with_kw struct ExponentialNoise <: AbstractNoise noiselevel = 1 diff --git a/src/onset.jl b/src/onset.jl index 4ad97aa7..a6989c26 100644 --- a/src/onset.jl +++ b/src/onset.jl @@ -7,6 +7,8 @@ Provide a Uniform Distribution of the inter-event-distances. `width` is the width of the uniform distribution (=> the jitter). Since the lower bound is 0, `width` is also the upper bound. `offset` is the minimal distance. The maximal distance is `offset + width`. + +For a more advanced parameter specification, see `FormulaUniformOnset``, which allows to specify the onset-parameters depending on the `Design` employed via a linear regression model """ @with_kw struct UniformOnset <: AbstractOnset width = 50 # how many samples jitter? @@ -17,6 +19,8 @@ end Log-normal inter-event distances using the `Distributions.jl` truncated LogNormal distribution. Be careful with large `μ` and `σ` values, as they are on logscale. σ>8 can quickly give you out-of-memory sized signals! + +For a more advanced parameter specification, see `FormulaLogNormalOnset, which allows to specify the onset-parameters depending on the `Design` employed via linear regression model """ @with_kw struct LogNormalOnset <: AbstractOnset μ::Any # mean @@ -34,6 +38,8 @@ struct NoOnset <: AbstractOnset end """ simulate_interonset_distances(rng, onset::UniformOnset, design::AbstractDesign) simulate_interonset_distances(rng, onset::LogNormalOnset, design::AbstractDesign) + simulate_interonset_distances(rng, onset::FormulaUniformOnset, design::AbstractDesign) + simulate_interonset_distances(rng, onset::FormulaLogNormalOnset, design::AbstractDesign) Generate the inter-event-onset vector in samples (returns Int). """ @@ -53,6 +59,13 @@ function simulate_interonset_distances(rng, onset::LogNormalOnset, design::Abstr end +#function simulate_interonset_distances(rng, onset::AbstractOnset,design::) + + +contains_design(d::AbstractDesign, target::Type) = false +contains_design(d::Union{RepeatDesign,SequenceDesign,SubselectDesign}, target::Type) = + d.design isa target ? true : contains_design(d.design, target) + """ simulate_onsets(rng, onset::AbstractOnset, simulation::Simulation) @@ -64,6 +77,23 @@ function simulate_onsets(rng, onset::AbstractOnset, simulation::Simulation) # sample different onsets onsets = simulate_interonset_distances(rng, onset, simulation.design) + + if contains_design(simulation.design, SequenceDesign) + currentsequence = sequencestring(deepcopy(rng), simulation.design) + if !isnothing(findfirst("_", currentsequence)) + + @assert currentsequence[end] == '_' "the blank-indicator '_' has to be the last sequence element" + df = generate_events(simulation.design) + nrows_df = size(df, 1) + stepsize = length(currentsequence) - 1 + # add to every stepsize onset the maxlength of the response + #@debug onsets[stepsize:stepsize:end] + @debug stepsize + onsets[stepsize+1:stepsize:end] .+= 2 .* maxlength(simulation.components) + #@debug onsets[stepsize:stepsize:end] + end + end + if maximum(onsets) > 10000 @warn "Maximum of inter-event-distances was $(maximum(onsets)) - are you sure this is what you want?" end @@ -72,3 +102,81 @@ function simulate_onsets(rng, onset::AbstractOnset, simulation::Simulation) return onsets_accum end + +""" + FormulaUniformOnset <: AbstractOnset +provide a Uniform Distribution of the inter-event-distances, but with regression formulas. +This is helpful if your overlap/event-distribution should be dependend on some condition, e.g. more overlap in cond='A' than cond='B'. + + - `width`: is the width of the uniform distribution (=> the jitter). Since the lower bound is 0, `width` is also the upper bound. + -`width_formula`: choose a formula depending on your `Design` + -`width_β`: Choose a vector of betas, number needs to fit the formula chosen + -`width_contrasts` (optional): Choose a contrasts-dictionary according to the StatsModels specifications + `offset` is the minimal distance. The maximal distance is `offset + width`. + -`offset_formula`: choose a formula depending on your `design` + -`offset_β`: Choose a vector of betas, number needs to fit the formula chosen + -`offset_contrasts` (optional): Choose a contrasts-dictionary according to the StatsModels specifications + +See `UniformOnset` for a simplified version without linear regression specifications +""" +@with_kw struct FormulaUniformOnset <: AbstractOnset + width_formula = @formula(0 ~ 1) + width_β::Vector = [50] + width_contrasts::Dict = Dict() + offset_formula = @formula(0 ~ 1) + offset_β::Vector = [0] + offset_contrasts::Dict = Dict() +end + + +function simulate_interonset_distances(rng, o::FormulaUniformOnset, design::AbstractDesign) + events = generate_events(design) + widths = + UnfoldSim.generate_designmatrix(o.width_formula, events, o.width_contrasts) * + o.width_β + offsets = + UnfoldSim.generate_designmatrix(o.offset_formula, events, o.offset_contrasts) * + o.offset_β + + return Int.( + round.(reduce(vcat, rand.(deepcopy(rng), range.(offsets, offsets .+ widths), 1))) + ) +end + + +@with_kw struct FormulaLogNormalOnset <: AbstractOnset + μ_formula = @formula(0 ~ 1) + μ_β::Vector = [0] + μ_contrasts::Dict = Dict() + σ_formula = @formula(0 ~ 1) + σ_β::Vector = [0] + σ_contrasts::Dict = Dict() + offset_formula = @formula(0 ~ 1) + offset_β::Vector = [0] + offset_contrasts::Dict = Dict() + truncate_upper = nothing # truncate at some sample? +end + +function simulate_interonset_distances( + rng, + o::FormulaLogNormalOnset, + design::AbstractDesign, +) + events = generate_events(design) + + + μs = UnfoldSim.generate_designmatrix(o.μ_formula, events, o.μ_contrasts) * o.μ_β + σs = UnfoldSim.generate_designmatrix(o.σ_formula, events, o.σ_contrasts) * o.σ_β + offsets = + UnfoldSim.generate_designmatrix(o.offset_formula, events, o.offset_contrasts) * + o.offset_β + + + funs = LogNormal.(μs, σs) + if !isnothing(o.truncate_upper) + fun = truncated.(fun; upper = o.truncate_upper) + end + return Int.(round.(offsets .+ rand.(deepcopy(rng), funs, 1))) +end + + diff --git a/src/predefinedSimulations.jl b/src/predefinedSimulations.jl index 874bb870..8022c7ee 100644 --- a/src/predefinedSimulations.jl +++ b/src/predefinedSimulations.jl @@ -7,36 +7,36 @@ predef_eeg(; kwargs...) = predef_eeg(MersenneTwister(1); kwargs...) # without rn predef_eeg(nsubjects::Int; kwargs...) = predef_eeg(MersenneTwister(1), nsubjects; kwargs...) # without rng always call same one """ -predef_eeg(;kwargs...) -predef_eeg(rng;kwargs...) -predef_eeg(rng,n_subjects;kwargs...) + predef_eeg(; kwargs...) + predef_eeg(rng; kwargs...) + predef_eeg(rng, n_subjects; kwargs...) Generate a P1/N1/P3 complex. In case `n_subjects` is defined - `MixedModelComponents` are generated, else `LinearModelComponents`. -The most used `kwargs` is: `return_epoched=true` which returns already epoched data. If you want epoched data without overlap, specify `onset=NoOnset()` and `return_epoched=true` +The most used `kwargs` is: `return_epoched=true` which returns already epoched data. If you want epoched data without overlap, specify `onset = NoOnset()` and `return_epoched = true` +## Default parameters: -## Default params: +#### Design +- `n_repeats = 100`, +- `event_order_function = x -> shuffle(deepcopy(rng), x)`, # random trial order +- `conditions = Dict(...)`, -- n_repeats = 100 -- event_order_function = x->shuffle(deepcopy(rng),x # random trial order -- conditions = Dict(...), +#### Component / Signal +- `sfreq = 100`, +- `p1 = (p100(; sfreq = sfreq), @formula(0 ~ 1), [5], Dict())`, # P1 amp 5, no effects +- `n1 = (n170(; sfreq = sfreq), @formula(0 ~ 1 + condition), [5,-3], Dict())`, # N1 amp 5, dummy-coded condition effect (levels "car", "face") of -3 +- `p3 = (p300(; sfreq = sfreq), @formula(0 ~ 1 + continuous), [5,1], Dict())`, # P3 amp 5, continuous effect range [-5,5] with slope 1 -#### component / signal -- sfreq = 100, -- p1 = (p100(;sfreq=sfreq), @formula(0~1),[5],Dict()), # P1 amp 5, no effects -- n1 = (n170(;sfreq=sfreq), @formula(0~1+condition),[5,-3],Dict()), # N1 amp 5, dummycoded condition effect (levels "car", "face") of -3 -- p3 = (p300(;sfreq=sfreq), @formula(0~1+continuous),[5,1],Dict()), # P3 amp 5, continuous effect range [-5,5] with slope 1 +#### Onset +- `overlap = (0.5,0.2)`, # offset + width/length of Uniform noise. put offset to 1 for no overlap. put width to 0 for no jitter +- `onset = UniformOnset(; offset = sfreq * 0.5 * overlap[1], width = sfreq * 0.5 * overlap[2])`, -#### noise -- noiselevel = 0.2, -- noise = PinkNoise(;noiselevel=noiselevel), - -#### onset -- overlap = (0.5,0.2), # offset + width/length of Uniform noise. put offset to 1 for no overlap. put width to 0 for no jitter -- onset=UniformOnset(;offset=sfreq*0.5*overlap[1],width=sfreq*0.5*overlap[2]), +#### Noise +- `noiselevel = 0.2`, +- `noise = PinkNoise(; noiselevel = noiselevel)`, """ function predef_eeg( rng; @@ -79,7 +79,7 @@ function predef_eeg( kwargs..., ) - components = [] + components = AbstractComponent[] for c in comps append!(components, [T(c...)]) end @@ -139,34 +139,33 @@ function predef_eeg( end """ - predef_2x2(rng::AbstractRNG;kwargs...) + predef_2x2(rng::AbstractRNG; kwargs...) -The most used `kwargs` is: `return_epoched=true` which returns already epoched data. If you want epoched data without overlap, specify `onset=NoOnset()` and `return_epoched=true` +The most used `kwargs` is: `return_epoched = true` which returns already epoched data. If you want epoched data without overlap, specify `onset = NoOnset()` and `return_epoched = true` -#### design -- `n_items`=100, -- `n_subjects`=1, -- `conditions` = Dict(:A=>["a_small","a_big"],:B=>["b_tiny","b_large"]), -- `event_order_function` = x->shuffle(deepcopy(rng),x), +#### Design +- `n_items = 100`, +- `n_subjects = 1`, +- `conditions = Dict(:A => ["a_small","a_big"], :B => ["b_tiny","b_large"])`, +- `event_order_function = x -> shuffle(deepcopy(rng), x)`, -#### component / signal -- `signalsize` = 100, length of simulated hanning window -- `basis`` = hanning(signalsize), the actual "function", `signalsize` is only used here -- `β` = [1,-0.5,.5,+1], the parameters -- `σs` = Dict(:subject=>[1,0.5,0.5,0.5],:item=>[1]), - only in n_subjects>=2 case, specifies the random effects -- `contrasts` = Dict(:A=>EffectsCoding(),:B=>EffectsCoding()) - effect coding by default -- `formula` = n_subjects==1 ? @formula(0~1+A*B) : @formula(dv~1+A*B+(A*B|subject)+(1|item)), +#### Component / Signal +- `signalsize = 100`, # Length of simulated hanning window +- `basis = hanning(signalsize)`, # The actual "function", `signalsize` is only used here +- `β = [1, -0.5, .5, +1]`, # The parameters +- `σs = Dict(:subject => [1, 0.5, 0.5, 0.5],:item => [1])`, # Only in n_subjects >= 2 case, specifies the random effects +- `contrasts = Dict(:A => EffectsCoding(), :B => EffectsCoding())` # Effect coding by default +- `formula = n_subjects == 1 ? @formula(0 ~ 1 + A*B) : @formula(dv ~ 1 + A*B + (A*B|subject) + (1|item))`, -#### noise -- `noiselevel` = 0.2, -- `noise` = PinkNoise(;noiselevel=noiselevel), +#### Onset +- `overlap = (0.5,0.2)`, +- `onset = UniformOnset(; offset = signalsize * overlap[1], width = signalsize * overlap[2])`, # Put offset to 1 for no overlap. put width to 0 for no jitter -#### onset -- `overlap` = (0.5,0.2), -- `onset`=UniformOnset(;offset=signalsize*overlap[1],width=signalsize*overlap[2]), #put offset to 1 for no overlap. put width to 0 for no jitter +#### Noise +- `noiselevel = 0.2`, +- `noise = PinkNoise(; noiselevel = noiselevel)`, - -Careful if you modify n_items with n_subjects = 1, n_items has to be a multiple of 4 (or your equivalent conditions factorial, e.g. all combinations length) +Be careful if you modify n_items with n_subjects = 1, n_items has to be a multiple of 4 (or your equivalent conditions factorial, e.g. all combinations length) """ function predef_2x2( rng::AbstractRNG; diff --git a/src/sequence.jl b/src/sequence.jl new file mode 100644 index 00000000..8322455f --- /dev/null +++ b/src/sequence.jl @@ -0,0 +1,51 @@ +function rand_re(rng::AbstractRNG, machine::Automa.Machine) + out = IOBuffer() + node = machine.start + + while true + if node.state ∈ machine.final_states + (rand() ≤ 1 / (length(node.edges) + 1)) && break + end + + edge, node = rand(rng, node.edges) + label = rand(rng, collect(edge.labels)) + print(out, Char(label)) + end + + return String(take!(out)) +end + +sequencestring(rng, d::SequenceDesign) = sequencestring(rng, d.sequence) + +function sequencestring(rng, str::String) + #match curly brackets and replace them + @assert isnothing(findfirst("*", str)) && isnothing(findfirst("+", str)) "'infinite' sequences currently not supported" + crly = collect(eachmatch(r"(\{[\d]+,[\d]+\})", str)) + for c in reverse(crly) + m = replace(c.match, "{" => "", "}" => "") + rep_minimum, rep_maximum = parse.(Int, split(m, ",")) + #@info str[c.offset-1] + if str[c.offset-1] == ']' + #@info "brackets found" + bracket_end_idx = c.offset - 1 + bracket_start_idx = findlast("[", str[1:bracket_end_idx])[1] + #@info bracket_end_idx,bracket_start_idx + repeat_string = "[" * str[bracket_start_idx+1:bracket_end_idx-1] * "]" + else + bracket_start_idx = c.offset - 1 + bracket_end_idx = c.offset - 1 + repeat_string = string(str[c.offset-1]) + end + + replacement_string = repeat(repeat_string, rand(rng, rep_minimum:rep_maximum)) + #@info "rep" replacement_string + str = + str[1:bracket_start_idx-1] * + replacement_string * + str[bracket_end_idx+length(c.match)+1:end] + # @debug str + end + return rand_re(rng, Automa.compile(RE(str))) +end + +sequencestring(rng, d::RepeatDesign) = sequencestring(rng, d.design) diff --git a/src/simulation.jl b/src/simulation.jl index 2376e8ff..e5e85531 100755 --- a/src/simulation.jl +++ b/src/simulation.jl @@ -6,9 +6,15 @@ Simulation( noisetype::AbstractNoise, ) = Simulation(design, [component], onset, noisetype) + +function simulate(design::AbstractDesign, signal, onset::AbstractOnset, args...; kwargs...) + @warn "No random generator defined, used the default (`Random.MersenneTwister(1)`) with a fixed seed. This will always return the same results and the user is strongly encouraged to provide their own random generator!" + simulate(MersenneTwister(1), design, signal, onset, args...; kwargs...) +end + """ simulate( - rng, + [rng::AbstractRNG,] design::AbstractDesign, signal, onset::AbstractOnset, @@ -29,6 +35,8 @@ Some remarks to how the noise is added: - If `return_epoched = true` and `onset =NoOnset()` the noise is added to the epoched data matrix - If `onset` is not `NoOnset`, a continuous signal is created and the noise is added to this i.e. this means that the noise won't be the same as in the `onset = NoOnset()` case even if `return_epoched = true`. - The case `return_epoched = false` and `onset = NoOnset()` is not possible and therefore covered by an assert statement + - `simulate(rng,design::SequenceDesign,...)` + If no `design.rng` was defined for `SequenceDesign`, we replace it with the `simulation`-function call `rng` object """ @@ -38,14 +46,33 @@ function simulate(design::AbstractDesign, signal, onset::AbstractOnset, args...; simulate(MersenneTwister(1), design, signal, onset, args...; kwargs...) end -simulate( +function simulate( rng::AbstractRNG, design::AbstractDesign, signal, onset::AbstractOnset, noise::AbstractNoise = NoNoise(); kwargs..., -) = simulate(rng, Simulation(design, signal, onset, noise); kwargs...) +) + + if is_SequenceDesign(design) + design = sequencedesign_add_rng(rng, design) + end + simulate(rng, Simulation(design, signal, onset, noise); kwargs...) +end + + +sequencedesign_add_rng(rng, design::AbstractDesign) = design +sequencedesign_add_rng(rng, design::RepeatDesign) = + RepeatDesign(sequencedesign_add_rng(rng, design.design), design.repeat) +sequencedesign_add_rng(rng, design::SequenceDesign) = + isnothing(design.rng) ? + SequenceDesign(design.design, design.sequence, design.sequencelength, rng) : design + + +is_SequenceDesign(d::AbstractDesign) = false +is_SequenceDesign(d::RepeatDesign) = is_SequenceDesign(d.design) +is_SequenceDesign(d::SequenceDesign) = true function simulate(rng::AbstractRNG, simulation::Simulation; return_epoched::Bool = false) @@ -127,11 +154,13 @@ function create_continuous_signal(rng, responses, simulation) # combine responses with onsets max_length_component = maxlength(components) - max_length_continuoustime = Int(ceil(maximum(onsets))) .+ max_length_component + offset_range = maxoffset(simulation.components) - minoffset(simulation.components) + max_length_continuoustime = + Int(ceil(maximum(onsets))) .+ max_length_component .+ offset_range signal = zeros(n_chan, max_length_continuoustime, n_subjects) - + @debug size(signal), offset_range for e = 1:n_chan for s = 1:n_subjects for i = 1:n_trials @@ -141,7 +170,9 @@ function create_continuous_signal(rng, responses, simulation) responses, e, s, - one_onset:one_onset+max_length_component-1, + one_onset+minoffset(simulation.components):one_onset+max_length_component-1+maxoffset( + simulation.components, + ), (s - 1) * n_trials + i, ) end @@ -172,6 +203,8 @@ function add_responses!(signal, responses::Vector, e, s, tvec, erpvec) @views signal[e, tvec, s] .+= responses[:, erpvec] end function add_responses!(signal, responses::Matrix, e, s, tvec, erpvec)# + # @debug size(signal), size(responses), e, s, size(tvec), size(erpvec) + #@debug tvec, erpvec @views signal[e, tvec, s] .+= responses[:, erpvec] end function add_responses!(signal, responses::AbstractArray, e, s, tvec, erpvec) diff --git a/src/types.jl b/src/types.jl index fb29ef53..7154340c 100644 --- a/src/types.jl +++ b/src/types.jl @@ -13,7 +13,10 @@ abstract type AbstractHeadmodel end struct Simulation design::AbstractDesign - components::Vector{AbstractComponent} + components::Union{ + <:Dict{<:Char,<:Vector{<:AbstractComponent}}, + <:Vector{<:AbstractComponent}, + } onset::AbstractOnset noisetype::AbstractNoise end diff --git a/test/runtests.jl b/test/runtests.jl index 723fc891..2cbc1de5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,4 +8,5 @@ include("setup.jl") include("onset.jl") include("simulation.jl") include("helper.jl") + include("sequence.jl") end diff --git a/test/sequence.jl b/test/sequence.jl new file mode 100644 index 00000000..e4a9ace0 --- /dev/null +++ b/test/sequence.jl @@ -0,0 +1,11 @@ +@testset "Check Sequences" begin + @test isa(UnfoldSim.check_sequence("bla_"), String) + @test isa(UnfoldSim.check_sequence("bla"), String) + @test_throws AssertionError UnfoldSim.check_sequence("b_la_") + @test_throws AssertionError UnfoldSim.check_sequence("b_la") + @test_throws AssertionError UnfoldSim.check_sequence("_bla") + +end +@test length(UnfoldSim.sequencestring(StableRNG(1), "A{10,10}")) == 10 +@test length(UnfoldSim.sequencestring(StableRNG(1), "A{10,10}B")) == 11 +@test length(UnfoldSim.sequencestring(StableRNG(1), "A{10,20}")) >= 10 \ No newline at end of file