Merge pull request #2 from sybila/dev-network-generating

General BN inference
sybila · Nov 13, 2023 · a474032 · a474032
2 parents 5e5c183 + 1cd6de3
commit a474032
Show file tree

Hide file tree

Showing 22 changed files with 1,275 additions and 293 deletions.
diff --git a/.githooks/README.txt b/.githooks/README.txt
diff --git a/.githooks/pre-commit b/.githooks/pre-commit
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -7,7 +7,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        platform: [ macos-latest, ubuntu-20.04, windows-latest ]
+        platform: [ macos-latest, ubuntu-latest, ubuntu-20.04, windows-latest ]
 
     name: Rustfmt
     runs-on: ${{ matrix.platform }}
@@ -26,7 +26,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        platform: [ macos-latest, ubuntu-20.04, windows-latest ]
+        platform: [ macos-latest, ubuntu-latest, ubuntu-20.04, windows-latest ]
 
     needs: fmt
     name: Check
@@ -49,7 +49,6 @@ jobs:
     env:
       RUSTFLAGS: "-D warnings"
     steps:
-      - uses: actions/checkout@v3
       - name: Checkout.
         uses: actions/checkout@v3
       - name: install Rust stable

diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,4 @@ idk.txt
 
 /experiment/
 /.idea/
+/test/
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "boolean-network-sketches"
-version = "0.1.0"
-authors = ["Ondřej Huvar <[email protected]>"]
+version = "0.2.0"
+authors = ["Ondřej Huvar <[email protected]>", "Samuel Pastva <[email protected]>"]
 edition = "2021"
 description = "Framework for Boolean network inference using BN sketches."
 homepage = "https://github.com/sybila/boolean-network-sketches"
@@ -11,14 +11,18 @@ keywords = ["inference", "boolean-network", "model-checking", "symbolic", "syste
 categories = ["science", "simulation"]
 license = "MIT"
 exclude = ["benchmark_models", ".github", ".githooks"]
-rust-version = "1.64"
+rust-version = "1.72"
 
 #[profile.release]
 #lto = true
 
 [profile.test]
 opt-level = 3
 
+[[bin]]
+name = "sketches-inference"
+path = "src/main.rs"
+
 [[bin]]
 name = "inference-with-attractors"
 path = "src/bin/bn_inference_with_attractors.rs"
@@ -35,14 +39,10 @@ path = "src/bin/case_study_arabidopsis.rs"
 name = "small-example"
 path = "src/bin/small_example.rs"
 
-[[bin]]
-name = "sketches-inference"
-path = "src/bin/sketches_inference.rs"
-
 [dependencies]
-biodivine-lib-bdd = "=0.4.2"
-biodivine-lib-param-bn = "=0.4.3"
-biodivine-hctl-model-checker = "=0.1.1"
-clap = { version = "=4.1.4", features = ["derive"] }
-termcolor = "=1.1.2"
-rand = "=0.8.5"
+biodivine-lib-bdd = ">=0.5.2, <1.0.0"
+biodivine-lib-param-bn = ">=0.4.7, <1.0.0"
+biodivine-hctl-model-checker = ">=0.2.0, <1.0.0"
+clap = { version = "4.1.4", features = ["derive"] }
+termcolor = "1.1.2"
+rand = "0.8.5"
diff --git a/README.md b/README.md
@@ -1,51 +1,118 @@
 # Boolean Network Sketches: A Unifying Framework for Logical Model Inference
 
-This is a small Rust library focusing on logical model inference through Boolean network sketches. 
-It contains the high-level implementation of the framework, as well as benchmark models and data used for experiments and case studies. 
+This is a small Rust library focusing on logical model inference through Boolean network sketches, as described in our [paper](https://doi.org/10.1093/bioinformatics/btad158). 
+The repository contains the high-level implementation of the framework, as well as benchmark models and data used for experiments and case studies. 
 This readme describes the repository contents and also contains instructions on how to replicate the main experimental results of the paper.
 The whole repository is available at [github](https://github.com/sybila/boolean-network-sketches), as well as at [zenodo](https://doi.org/10.5281/zenodo.7688740).
 
+This repository contains a newer release of the framework.
+The original release corresponding to the artefact of the paper can be downloaded [here](https://github.com/sybila/boolean-network-sketches/releases/tag/v0.1.0).
 
-## Requirements and Dependencies
+## Setup
 
-We recommend to run the benchmarks on a machine with at least 16GB RAM. 
-Otherwise, the largest benchmarks (models with 100+ variables) may take a long time to evaluate (however, even with less memory, it should be fine to run the code regarding both case studies or smaller benchmarks). 
-All displayed computation times were acquired on a standard laptop with an 11th Gen Intel i5 CPU and 16GB RAM.
-
-To run the experiments, you will need the Rust compiler. 
-We recommend following the instructions on [rustlang.org](https://www.rust-lang.org/learn/get-started) (default configuration should be sufficient) instead of using a package manager, however either method should work ok. 
-When using the official installer, everything is stored in `~/.cargo`, so admin privilages are not necessary. 
-Once you are done with the experiments, you can uninstall the compiler by running `rustup self uninstall`. 
+To run the experiments, you will need the Rust compiler.
+We recommend following the official instructions at [rustlang.org](https://www.rust-lang.org/learn/get-started) (default configuration should be sufficient) instead of using a package manager. 
+However, either method should work fine.
+When using the official installer, everything is stored in `~/.cargo`, so admin privileges are not necessary.
 The tested version of Rust is `1.64` (Sep 2022).
-Rust will automatically download and cache all other libraries necessary to compile the project. 
-You should therefore have internet access while running the commands for the first time. 
+Rust will automatically download and cache all other libraries necessary to compile the project.
+You should therefore have internet access while running the commands for the first time.
 You can force rust do download all dependencies by running `cargo fetch`.
 
-Moreover, for convenience, we have prepared Bash scripts to wrap all the commands needed. If you want to use them, you will need a Unix-based system with Bash. 
-On Windows, it should be sufficient to use [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) (we have tested the Ubuntu-20.04 WSL). 
-However, note that these scripts are not needed, as you can execute the Rust code directly (as described below).
 
+## Running the Inference Process
 
-## Sketch Format
+### Boolean network sketch
 
 Boolean network sketch represents a combination of prior knowledge, experimental data, and hypotheses regarding the modelled system.
-BN sketch is given by following components:
+BN sketch is given by the following components:
 - influence graph
 - partially specified Boolean network
 - properties of update functions
 - dynamic properties of the system
 
-The first three components of the sketch are all covered by the intuitive aeon model format (details [here](https://biodivine.fi.muni.cz/aeon/aeon-manual.pdf)). 
-This format allows description of:
-- possible influences between variables
-- kinds of influences (such as inhibiting/activating or observable/non-observable), from which we can derive update function properties
-- partially specified update functions 
+### Input format
 
-The dynamic properties are given in a form of HCTL formulae (our formulae format is described in detail [here](https://github.com/sybila/biodivine-hctl-model-checker)). 
-Several types of formulae can be generated from data on the run (via utilities in `src/create_inference_formulae.rs`).
+The input for the analysis is an [AEON model](https://biodivine.fi.muni.cz/aeon) annotated with HCTL formulas.
 
+The intuitive aeon model format (details [here](https://biodivine.fi.muni.cz/aeon/aeon-manual.pdf)) covers the first three components of the sketch.
+To get familar with AEON models, we recommend [this page](https://biodivine.fi.muni.cz/aeon/manual/v0.4.0/model_editor/import_export.html) 
+of the AEON manual. A wide range of real-world models for testing can be also found
+[here](https://github.com/sybila/biodivine-boolean-models) (however, these models do not contain any HCTL formulas).
 
-## Benchmark Models and Pre-Computed Raw Results
+The dynamic properties are given by HCTL formulas.
+The details of the HCTL syntax can be found [here](https://github.com/sybila/biodivine-hctl-model-checker).
+Note that several types of formulae can be generated from data on the run (via utilities provided in `src/data_processing/data_encoding.rs`).
+You can add a *named dynamic property* by annotating the model with a line in the following format:
+```
+#! dynamic_property: NAME: #`HCTL_FORMULA`#
+```
+
+Below, we show a simple example of how to include assertions and properties in a model file (we intentionally
+limit the use of hybrid operators to simplify the example). 
+
+```
+# This property `reach_apoptosis` states that every state must be able to reach a state where
+# variable `Apoptosis` is true. `#!` is used to start a "comment annotation"
+# The #` and `# serve as opening/closing escape characters for the HCTL formula.
+#! dynamic_property: reach_apoptosis: #`EF Apoptosis`#
+
+# Property `will_die` states that the system will always eventually reach `Apoptosis`.
+#! dynamic_property: will_die: #`AF AG Apoptosis`#
+
+# Property `cannot_be_undead` states that whenever `Apoptosis` is true, it must
+# stay true forver.
+#! dynamic_property: cannot_be_undead: #`Apoptosis => AG Apoptosis`#
+```
+
+Example of such annotated model is given in `benchmark_models/annotated_tlgl.aeon`.
+Note that the example formulae can be generated automatically, as shown in our case study.
+We are currently working on adding automatic encoding for various kinds of datasets directly to the inference program.
+
+### Running the Inference 
+
+To run the general inference process (code in `src/main.rs`), compile the program first using `cargo build --release`
+and then run it as
+```
+.\target\release\sketches-inference [OPTIONS] <MODEL_PATH>
+```
+- `MODEL_PATH` must point a file with a valid sketch model (as described above)
+
+For example, you may try running the inference on the provided TLGL model:
+```
+.\target\release\sketches-inference .\benchmark_models\annotated_tlgl.aeon
+```
+
+The program runs the inference, prints details regarding the computation progress and, once finished, shortly summarizes the results (how many satisfying BNs there are).
+
+By providing additional `OPTIONS` (CLI arguments), you can receive more detailed output.
+This enables, for example, to generate witness networks or summarize update functions of the satisfying BNs.
+To see the details regarding program's optional arguments, you can run 
+```
+.\target\release\sketches-inference --help`
+```
+
+
+
+## Benchmarks and Evaluation
+
+Here we describe how to execute the experiments described in the paper.
+You can either use prepared Bash wrapper scripts to re-run the experiments, or run them directly by compiling and executing the Rust binaries.
+
+### Requirements and Dependencies
+
+We recommend to run the benchmarks on a machine with at least 16GB RAM. 
+Otherwise, the largest benchmarks (models with 100+ variables) may take a long time to evaluate (however, even with less memory, it should be fine to run the code regarding both case studies or smaller benchmarks). 
+All displayed computation times were acquired on a standard laptop with an 11th Gen Intel i5 CPU and 16GB RAM.
+
+Follow the instructions in section `Setup` to install the Rust compiler.
+Once you are done with the experimenting, you can uninstall the compiler by running `rustup self uninstall`.
+
+For convenience, we have prepared Bash scripts to wrap all the commands needed. If you want to use them, you will need a Unix-based system with Bash. 
+On Windows, it should be sufficient to use [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) (we have tested the Ubuntu-20.04 WSL). 
+However, note that these scripts are not needed, as you can execute the Rust code directly (as described below).
+
+### Benchmark Models and Pre-Computed Raw Results
 
 All the models used for the evaluation (and some more) are in the `benchmark_models` directory. 
 Each model has its own subdirectory.
@@ -64,12 +131,7 @@ The sub-folders with models used for scalability evaluation contain four files e
 - `results.txt` - resulting output from the corresponding binary (see below)
 - `metadata.txt` - links to the model's original source and publication
 
-
-## Binaries and Scripts Regarding Experiments
-
-You can either use prepared Bash wrapper scripts to re-run the experiments, or run them directly by compiling and executing the Rust binaries.
-
-### Bash Wrapper Scripts
+### Running Experiments Through Prepared Scripts
 
 We have prepared four Bash scripts which encompass the compilation and execution of the underlying Rust code.
 Scripts `run_case_study_1.sh` and `run_case_study_2.sh` run the corresponding case studies - for each case study, two computations are executed, corresponding to the two respective sketch variants from the paper.
@@ -93,12 +155,9 @@ You will need a Python 3 (we have used Python 3.8 and 3.10).
 python3 run_all_experiments.py
 ```
 
-### Individual Binaries
+### Running Individual Experiments Directly
 
-To directly re-run individual benchmarks or case studies, compile the code first (on Windows, we recommend using Powershell 5+ to run the experiments):
-```
-cargo build --release
-```
+To directly re-run individual benchmarks or case studies, compile the code first with `cargo build --release` (on Windows, we recommend using Powershell 5+ to run the experiments).
 
 The Rust code regarding the two case studies from the paper can be found in `src/bin/` folder (particularly, `case_study_arabidopsis.rs` and `case_study_tlgl.rs`). 
 The resulting binaries will be in `target/release/`. 
@@ -127,12 +186,14 @@ You do not need to add any options to replicate the experimental results, but if
 ./target/release/inference-with-attractors -h
 ````
 
+### Tests 
+To run the test suite, use `cargo test` command.
 
 ## Availability
 
-This repository is available on [Github](https://github.com/sybila/boolean-network-sketches) and also at [zenodo](https://doi.org/10.5281/zenodo.7688740).
+This repository with the artefact is available on [Github](https://github.com/sybila/boolean-network-sketches) and also at [zenodo](https://doi.org/10.5281/zenodo.7688740).
 
-The implementation is based mainly on two of our Rust libraries: 
+The implementation is based mainly on two of our Rust libraries:
 - [biodivine-hctl-model-checker](https://github.com/sybila/biodivine-hctl-model-checker) for the underlying symbolic coloured model checking
 - [biodivine-lib-param-bn](https://github.com/sybila/biodivine-lib-param-bn) which facilitate the symbolic encoding of Boolean networks
 
@@ -141,7 +202,3 @@ The implementation itself is fairly minimal (includes i.e., high-level framework
 Everything is open-source and available with the permissive MIT License.
 
 To visualize and modify the partially specified BN models (aeon files) included in this repository, you can use the online interface of our tool [AEON](https://biodivine.fi.muni.cz/aeon). This tool can also be used to further analyse the attractors of Boolean network models.
-
-
-## Tests 
-To run the set of tests, use `cargo test` command.
diff --git a/benchmark_models/annotated_tlgl.aeon b/benchmark_models/annotated_tlgl.aeon
@@ -0,0 +1,66 @@
+## PROPERTIES
+#! dynamic_property: diseased_attr: #`(3{x}: (@{x}: (~Apoptosis_ & S1P & sFas & ~Fas & ~Ceramide_ & ~Caspase & MCL1 & ~BID_ & ~DISC_ & FLIP_ & ~IFNG_ & GPCR_ & (AG EF (~Apoptosis_ & S1P & sFas & ~Fas & ~Ceramide_ & ~Caspase & MCL1 & ~BID_ & ~DISC_ & FLIP_ & ~IFNG_ & GPCR_ & {x})))))`#
+#! dynamic_property: healthy_fixed_point: #`(3{x}: (@{x}: (Apoptosis_ & ~S1P & ~sFas & ~Fas & ~Ceramide_ & ~Caspase & ~MCL1 & ~BID_ & ~DISC_ & ~FLIP_ & ~CTLA4_ & ~TCR & ~IFNG_ & ~CREB & ~P2 & ~SMAD_ & ~GPCR_ & ~IAP_ & (AX (Apoptosis_ & ~S1P & ~sFas & ~Fas & ~Ceramide_ & ~Caspase & ~MCL1 & ~BID_ & ~DISC_ & ~FLIP_ & ~CTLA4_ & ~TCR & ~IFNG_ & ~CREB & ~P2 & ~SMAD_ & ~GPCR_ & ~IAP_)))))`#
+
+## MODEL
+$S1P:!Apoptosis_ & q(Ceramide_)
+Ceramide_ -| S1P
+Apoptosis_ -| S1P
+$sFas:!Apoptosis_ & p(S1P)
+S1P -> sFas
+Apoptosis_ -| sFas
+$Apoptosis_:function(Caspase, Apoptosis_)
+Caspase -> Apoptosis_
+Apoptosis_ -> Apoptosis_
+$Fas:!Apoptosis_ & o(sFas)
+Apoptosis_ -| Fas
+sFas -| Fas
+$Ceramide_:!Apoptosis_ & n(Fas, S1P)
+Fas -> Ceramide_
+S1P -| Ceramide_
+Apoptosis_ -| Ceramide_
+$Caspase:!Apoptosis_ & m(BID_, DISC_, IAP_)
+DISC_ -> Caspase
+IAP_ -| Caspase
+BID_ -> Caspase
+Apoptosis_ -| Caspase
+$MCL1:!Apoptosis_ & l(DISC_)
+DISC_ -| MCL1
+Apoptosis_ -| MCL1
+$BID_:!Apoptosis_ & k(MCL1)
+MCL1 -| BID_
+Apoptosis_ -| BID_
+$DISC_:!Apoptosis_ & j(Fas,Ceramide_,FLIP_)
+Fas -> DISC_
+Ceramide_ -> DISC_
+Apoptosis_ -| DISC_
+FLIP_ -| DISC_
+$FLIP_:!Apoptosis_ & i(DISC_)
+DISC_ -| FLIP_
+Apoptosis_ -| FLIP_
+$CTLA4_:!Apoptosis_ & h(TCR)
+TCR -> CTLA4_
+Apoptosis_ -| CTLA4_
+$TCR:!Apoptosis_ & g(CTLA4_)
+CTLA4_ -| TCR
+Apoptosis_ -| TCR
+$IFNG_:!Apoptosis_ & f(P2, SMAD_)
+P2 -| IFNG_
+SMAD_ -| IFNG_
+Apoptosis_ -| IFNG_
+$CREB:!Apoptosis_ & e(IFNG_)
+IFNG_ -> CREB
+Apoptosis_ -| CREB
+$P2:!Apoptosis_ & d(P2, IFNG_)
+P2 -> P2
+IFNG_ -> P2
+Apoptosis_ -| P2
+$SMAD_:!Apoptosis_ & c(GPCR_)
+GPCR_ -> SMAD_
+Apoptosis_ -| SMAD_
+$GPCR_:!Apoptosis_ & b(S1P)
+S1P -> GPCR_
+Apoptosis_ -| GPCR_
+$IAP_:!Apoptosis_ & a(BID_)
+BID_ -| IAP_
+Apoptosis_ -| IAP_
diff --git a/benchmark_models/small_example/formulae.txt b/benchmark_models/small_example/formulae.txt
@@ -1,9 +1,9 @@
-prior knowledge formula:
+# prior knowledge formula:
 3{a}: (3{b}: (3{c}: (@{c}: ((EF {a}) & (EF {b}) & (@{a}: AG EF {a}) & (@{b}: (AG EF {b} & ~EF {a}))))))
 
-data encoding formulae (attractors 001 and 110):
+# data encoding formulae (attractors 001 and 110):
 3{x}:@{x}: ~v_1 & ~v_2 & v_3 & AG EF {x}
 3{y}:@{y}: v_1 & v_2 & ~v_3 & AG EF {y}
 
-overall dynamic property combining all previous formulae:
+# overall dynamic property combining all previous formulae:
 (3{a}: (3{b}: (3{c}: (@{c}: ((EF {a}) & (EF {b}) & (@{a}: AG EF {a}) & (@{b}: (AG EF {b} & ~EF {a}))))))) & (3{x}:@{x}: ~v_1 & ~v_2 & v_3 & AG EF {x}) & (3{y}:@{y}: v_1 & v_2 & ~v_3 & AG EF {y})
diff --git a/src/bin/bn_inference_with_attractors.rs b/src/bin/bn_inference_with_attractors.rs
@@ -5,7 +5,7 @@ use biodivine_lib_param_bn::BooleanNetwork;
 use boolean_network_sketches::inference_attractor_data::*;
 use boolean_network_sketches::utils::check_if_result_contains_goal;
 
-use biodivine_hctl_model_checker::model_checking::get_extended_symbolic_graph;
+use biodivine_hctl_model_checker::mc_utils::get_extended_symbolic_graph;
 use std::fs::{read_to_string, File};
 use std::io::{BufRead, BufReader};
 use std::path::Path;
@@ -64,7 +64,7 @@ fn main() {
     println!("Loaded BN model with {} components.", bn.num_vars());
 
     // Create extended graph object with 1 HCTL var (we dont need more)
-    let graph = get_extended_symbolic_graph(&bn, 1);
+    let graph = get_extended_symbolic_graph(&bn, 1).unwrap();
     println!(
         "Model has {} symbolic parameters.",
         graph.symbolic_context().num_parameter_variables()