diff --git a/paper/figures/code/example_objects.py b/paper/figures/code/example_objects.py index 21fb0ff..c985192 100644 --- a/paper/figures/code/example_objects.py +++ b/paper/figures/code/example_objects.py @@ -1,5 +1,4 @@ from deepbench.image import SkyImage, ShapeImage -from deepbench.physics_object import HamiltonianPendulum, Pendulum import matplotlib.pyplot as plt import numpy as np diff --git a/paper/figures/code/example_pendulums.py b/paper/figures/code/example_pendulums.py index a58d0f3..95525ba 100644 --- a/paper/figures/code/example_pendulums.py +++ b/paper/figures/code/example_pendulums.py @@ -26,7 +26,9 @@ # Plot that against the time and with scatter and line options pendulum_noiseless = pendulum.create_object(time, noiseless=True) subplots[0].plot(time, pendulum_noiseless, color="black") -subplots[0].scatter(time, pendulum_noiseless, color="black", label="Noiseless") +subplots[0].scatter( + time, pendulum_noiseless, color="black", label="Noiseless", marker=">" +) # Use the noiseless=False to do the same with a noiseless pendulum pendulum_noisy = pendulum.create_object(time, noiseless=False) @@ -46,22 +48,22 @@ }, ) -# Cacluate the pendulum positions and engeries +# Calculate the pendulum positions and energies pendulum_data = pendulum.create_object(time) # Plot the line and scatterplot versions of the position wrt time subplots[1].plot(pendulum_data[4], pendulum_data[0], color="black") subplots[1].scatter( - pendulum_data[4], pendulum_data[0], color="black", label="Noiseless" + pendulum_data[4], pendulum_data[0], color="black", label="Noiseless", marker=">" ) -# Repeat the process with the noisely pendulum +# Repeat the process with the noisy pendulum pendulum = HamiltonianPendulum( pendulum_arm_length=10.0, starting_angle_radians=np.pi / 4, acceleration_due_to_gravity=9.8, noise_std_percent={ - "pendulum_arm_length": 0.2, + "pendulum_arm_length": 0.1, "starting_angle_radians": 0.0, "acceleration_due_to_gravity": 0.0, }, @@ -81,9 +83,9 @@ # plot.set(xticks=[], yticks=[]) plot.set_xlabel("Time (s)") - plot.set_ylabel("X Position") + plot.set_ylabel("X Position (m)") # Assign legend location subplots[1].legend(loc="center left", bbox_to_anchor=(1.02, 1)) -plt.savefig("../pendulums.png") +plt.savefig("./pendulums.png") diff --git a/paper/figures/pendulums.png b/paper/figures/pendulums.png old mode 100644 new mode 100755 index c8cfabf..25d45e8 Binary files a/paper/figures/pendulums.png and b/paper/figures/pendulums.png differ diff --git a/paper/paper.md b/paper/paper.md index 63c695e..d69961b 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -10,11 +10,11 @@ authors: - name: M. Voetberg orcid: 0009-0005-2715-4709 equal-contrib: true - affiliation: "1" - - name: Ashia Livaudais + affiliation: "1" + - name: Ashia Livaudais orcid: 0000-0003-3734-335X equal-contrib: true - affiliation: "1" + affiliation: "1" - name: Becky Nevin orcid: 0000-0003-1056-8401 equal-contrib: false @@ -53,7 +53,7 @@ On the other hand, complex physics simulations (e.g., cosmological _N_-body simu The physical sciences community lacks sufficient datasets and software packages as benchmarks for the development of statistical and machine learning models. In particular, there currently does not exist simulation software packages that generates data underpinned by physical principles and that satisfies the following criteria: -* multi-domain +* multi-domain * multi-purpose * fast * reproducible @@ -70,7 +70,7 @@ First, benchmark datasets of natural images include MNIST [@dengMnistDatabaseHan -# DeepBench +# DeepBench Software The **DeepBench** software package simulates data for analysis tasks that require precise numerical calculations. First, the simulation models are fundamentally mechanistic: they are based on relatively simple analytic mathematical expressions, which are physically meaningful. This means that for each model, the number of input parameters that determine a simulation output is small (<10 for most models). These elements make the software package fast and the outputs interpretable: they are conceptually and mathematically relatable to the inputs. Second, **DeepBench** also includes methods to precisely prescribe noise for inputs, which are propagated to outputs. This permits studies and the development of statistical inference models that require uncertainty quantification, which is a significant challenge in modern machine learning research. Third, the software framework includes features that permit a high degree of reproducibility: e.g., random seeds at every key stage of input, a unique identification tag for each simulation run, and the tracking and storage of metadata (including input parameters) and the related outputs. Fourth, the primary user interface is a YAML configuration file, which allows the user to specify every aspect of the simulation: e.g., types of objects, numbers of objects, noise type, and number of classes. This feature---which is especially useful when building and studying complex models like deep learning neural networks---permits the user to incrementally decrease or increase the complexity of the simulation with a high level of granularity. @@ -87,23 +87,22 @@ The **DeepBench** software package simulates data for analysis tasks that requir * Readily extensible to new physics and outputs -# Primary Modules - -* Geometry objects: two-dimensional images generated with `matplotlib` [@hunterMatplotlib2DGraphics2007b]. The shapes include _n_-sided polygons, arcs, straight lines, and ellipses. They are solid, filled, or unfilled two-dimensional shapes with edges of variable thickness. -* Physics objects: one-dimensional profiles for two types of implementations of pendulum dynamics: one using Newtonian physics, the other using Hamiltonian physics. -* Astronomy objects: two-dimensional images generated based on radial profiles of typical astronomical objects. The star object is created using the Moffat distribution provided by the Astropy [@theastropycollaborationAstropyCommunityPython2013a] library. The spiral galaxy object is created with the function used to produce a logarithmic spiral [@ringermacherNewFormulaDescribing2009a]. The elliptical galaxy object is created using the Sérsic profile provided by the Astropy library. Two-dimensional models are representations of astronomical objects commonly found in datasets used for galaxy morphology classification. -* Image: two-dimensional images that are combinations and/or concatenations of geometry or astronomy objects. The combined images are stored within `matplotlib` meshgrid objects. Sky images are composed of any combination of astronomy objects, while geometric images comprise individual geometric shape objects. -* Collection: provides a framework for producing module images or objects at once and storing all parameters that were included in their generation, including exact noise levels, object hyper-parameters, and non-specified defaults. +# Primary Modules +* Geometry objects: two-dimensional images generated with `matplotlib` [@hunterMatplotlib2DGraphics2007b]. The shapes include $N$-sided polygons, arcs, straight lines, and ellipses. They are solid, filled or unfilled two-dimensional shapes with edges of variable thickness. +* Physics objects: one-dimensional profiles for two types of implementations of pendulum dynamics: one using Newtonian physics, the other using Hamiltonian. +* Astronomy objects: two-dimensional images generated based on radial profiles of typical astronomical objects. The star object is created using the Moffat distribution provided by the AstroPy [@theastropycollaborationAstropyCommunityPython2013a] library. The spiral galaxy object is created with the function used to produce a logarithmic spiral [@ringermacherNewFormulaDescribing2009a]. The elliptical Galaxy object is created using the Sérsic profile provided by the AstroPy library. Two-dimensional models are representations of astronomical objects commonly found in data sets used for galaxy morphology classification. +* Image: two-dimensional images that are combinations and/or concatenations of Geometry or Astronomy objects. The combined images are within `matplotlib` meshgrid objects. Sky images are composed of any combination of Astronomy objects, while geometric images comprise individual geometric shape objects. +* Collection: Provides a framework for producing module images or objects at once and storing all parameters that were included in their generation, including exact noise levels, object hyper-parameters, and non-specified defaults. All objects also come with the option to add noise to each object. For physics objects---i.e., the pendulum---the user may add Gaussian noise to parameters: initial angle $\theta_0$, the pendulum length $L$, the gravitational acceleration $g$, the planet properties $\Phi = (M/r^2)$, and Newton's gravity constant $G$. Note that $g = G * \Phi = G * M/r^2$: all parameters in this relationship can receive noise. For astronomy and geometry Objects, which are images, the user can add Poisson or Gaussian noise to the output images. Finally, the user can regenerate the same noise using the saved random seed. -# Example Outputs +# Example Outputs -![Example outputs of **DeepBench**, containing shapes, and astronomy objects. Variants include a single object, a noisy single object, two objects, and two noisy objects.](figures/example_objects.png) +![Example outputs of **DeepBench**, containing geometric and astronomy objects. Variants include a single object, a noisy single object, two objects, and two noisy objects. The geometric outputs are produced with filled ellipses and outlined rectangles, with a gaussian noise overlay for the noisy variants. The astronomy outputs feature a star and an elliptical galaxy profile with similarly applied noise.](figures/example_objects.png) -![Example physics simulations from **DeepBench**. Pendulums show noisy and non-noisy variants of the Newtonian (left) and Hamiltonian (right) mathematical simulations.](figures/pendulums.png) +![Example physics simulations from **DeepBench**. Pendulums show noisy and noiseless variants of the Newtonian (left) and Hamiltonian (right) mathematical simulations. Both use initial conditions of an arm length of 10 meters and a starting angle of $\pi/4$. The noisy variants introduce uncertainty to these input parameters, along with the measurement of acceleration due to gravity.](figures/pendulums.png) # Acknowledgments