From d91cabce1b471327bc6a897c9c0dc873c967d19a Mon Sep 17 00:00:00 2001
From: Samuel Pastva <sam.pastva@gmail.com>
Date: Thu, 19 Sep 2024 19:05:43 +0200
Subject: [PATCH] Phenotype classification refactoring (#25)

* Create two variants of phenotype classification: one which "erases" attractor knowledge, and second which creates per-attractor classes.

* Fix line endings.

* Make a copy of the test model.
---
 biodivine_aeon/__init__.pyi                  |   8 +-
 src/bindings/bn_classifier/class.rs          |   8 +-
 src/bindings/bn_classifier/classification.rs | 147 ++++++++++++++++---
 tests/model-myeloid-3-unknown.aeon           |  47 ++++++
 tests/test_classification.py                 |  53 +++++++
 5 files changed, 234 insertions(+), 29 deletions(-)
 create mode 100644 tests/model-myeloid-3-unknown.aeon

diff --git a/biodivine_aeon/__init__.pyi b/biodivine_aeon/__init__.pyi
index bf4f0ec1..e4fb884d 100644
--- a/biodivine_aeon/__init__.pyi
+++ b/biodivine_aeon/__init__.pyi
@@ -1627,10 +1627,16 @@ class Classification:
     @staticmethod
     def classify_attractor_bifurcation(graph: AsynchronousGraph, attractor: Optional[list[ColoredVertexSet]] = None) -> dict[Class, ColorSet]: ...
     @staticmethod
+    def classify_phenotypes(graph: AsynchronousGraph,
+                            phenotypes: dict[Class, VertexSet],
+                            oscillation_types: Optional[dict[Class, PhenotypeOscillation]] = None,
+                            initial_trap: Optional[ColoredVertexSet] = None) -> dict[Class, ColorSet]: ...
+    @staticmethod
     def classify_attractor_phenotypes(graph: AsynchronousGraph,
                                       phenotypes: dict[Class, VertexSet],
                                       oscillation_types: Optional[dict[Class, PhenotypeOscillation]] = None,
-                                      initial_trap: Optional[ColoredVertexSet] = None) -> dict[Class, ColorSet]: ...
+                                      traps: Optional[list[ColoredVertexSet]] = None,
+                                      count_multiplicity: bool = True) -> dict[Class, ColorSet]: ...
     @staticmethod
     def classify_dynamic_properties(graph: AsynchronousGraph, properties: dict[str, HctlFormula], assertions: Optional[list[HctlFormula]] = None, substitution: Optional[dict[str, ColoredVertexSet]] = None) -> dict[Class, ColorSet]: ...
 
diff --git a/src/bindings/bn_classifier/class.rs b/src/bindings/bn_classifier/class.rs
index 1ffaea5a..481f7339 100644
--- a/src/bindings/bn_classifier/class.rs
+++ b/src/bindings/bn_classifier/class.rs
@@ -10,14 +10,14 @@ use crate::bindings::lib_param_bn::symbolic::set_color::ColorSet;
 use crate::pyo3_utils::richcmp_eq_by_key;
 use crate::{throw_runtime_error, throw_type_error};
 
-/// A `Class` is an immutable collection of sorted "features", such as each feature is
+/// A `Class` is an immutable collection of sorted "features", such that each feature is
 /// a described by its string name. A `Class` is used by the various classification workflows
 /// in AEON to label a specific mode of behavior that a system can exhibit.
 ///
 /// Depending on which operations are used, a class can behave either as a `set` (each feature
 /// can only appear once in a `Class`), or a `list` (multiple features of the same name appear
 /// multiple times). This entirely depends on the classification workflow used and is not
-/// a property of a `Class` (you can even mix the `set` and `list` behavior depending on
+/// a property of a `Class` itself (you can even mix the `set` and `list` behavior depending on
 /// the exact feature you are adding). Note that an "empty" class is allowed.
 ///
 /// The main reason why we even need `Class` is that lists and sets are not hash-able in Python,
@@ -89,11 +89,11 @@ impl Class {
         hasher.finish()
     }
 
-    fn __str__(&self) -> String {
+    pub fn __str__(&self) -> String {
         format!("{:?}", self.items)
     }
 
-    fn __repr__(&self) -> String {
+    pub fn __repr__(&self) -> String {
         format!("Class({})", self.__str__())
     }
 
diff --git a/src/bindings/bn_classifier/classification.rs b/src/bindings/bn_classifier/classification.rs
index 35711f27..35007030 100644
--- a/src/bindings/bn_classifier/classification.rs
+++ b/src/bindings/bn_classifier/classification.rs
@@ -385,45 +385,134 @@ impl Classification {
             .collect())
     }
 
+    /// Classify the *individual attractors* of an `AsynchronousGraph` according to their affinity
+    /// to biological phenotypes.
+    ///
+    /// This is similar to `Classification.classify_phenotypes`, but it actually returns
+    /// per-attractor results instead of lumping all attractors of one phenotype into
+    /// a single class. This results in a sort-of nested classification, where each class
+    /// consists of entries for individual attractors, but each entry is itself a class
+    /// that describes the phenotypes of that particular attractor.
+    ///
+    /// For example, given phenotypes `"a"` and `"b"`, the result could contain a class
+    /// `Class(["Class(['a'])", "Class(['b'])", "Class(['a', 'b'])"])`. This means that there
+    /// are three attractors: One with only the "a" phenotype, one with only the "b" phenotype,
+    /// and one with both phenotypes. Note that the "inner" classes are encoded as strings
+    /// (you can convert such string into a proper class object by running
+    /// `c = eval(class_string)`).
+    ///
+    /// The meaning of `oscillation_types` is the same as in `Classification.classify_phenotypes`:
+    /// `Forbidden` oscillation means the attractor must be a subset of the phenotype, `allowed`
+    /// oscillation means the attractor must intersect the phenotype, and `required` oscillation
+    /// means that the attractor must intersect the phenotype, but can't be its subset.
+    ///
+    /// Similar to `Classification.classify_phenotypes`, the method allows you to provide
+    /// a collection of `traps` that will be considered instead of the network attractors.
+    /// The method checks that these sets are indeed trap sets, but they do not have to be
+    /// minimal (i.e. attractors). If `traps` are given, the classification treats each trap
+    /// set as a separate "attractor".
+    ///
+    /// Finally, if `count_multiplicity` is set to `False`, the method will lump together
+    /// attractors that satisfy the same phenotypes, meaning the resulting `Class` object
+    /// will only contain up to one instance of the same phenotype configuration. For example,
+    /// if I have two attractors, each satisfying phenotype `"a"`, then by default the result
+    /// is a `Class(["Class(['a'])", "Class(['a'])"])` (and this situation is different compared
+    /// to the case when I only have one such attractor). If I set `count_multiplicity=False`,
+    /// the result will be `Class(["Class(['a'])"])`, regardless of how many attractors actually
+    /// satisfy the `"a"` phenotype.
+    #[staticmethod]
+    #[pyo3(signature = (graph, phenotypes, oscillation_types = None, traps = None, count_multiplicity = true))]
+    pub fn classify_attractor_phenotypes(
+        py: Python,
+        graph: &AsynchronousGraph,
+        phenotypes: HashMap<Class, VertexSet>,
+        oscillation_types: Option<HashMap<Class, String>>,
+        traps: Option<Vec<ColoredVertexSet>>,
+        count_multiplicity: bool,
+    ) -> PyResult<HashMap<Class, ColorSet>> {
+        // Initialize the attractor set.
+        let traps = if let Some(traps) = traps {
+            traps
+        } else {
+            Attractors::attractors(graph, None, None, py)?
+        };
+
+        let mut all_colors = graph.mk_empty_colors();
+        for attr in &traps {
+            all_colors = all_colors.union(&attr.colors());
+        }
+
+        let mut result = HashMap::new();
+        result.insert(Class::new_native(Vec::new()), all_colors);
+
+        for attr in &traps {
+            let attr_classes = Self::classify_phenotypes(
+                py,
+                graph,
+                phenotypes.clone(),
+                oscillation_types.clone(),
+                Some(attr.clone()),
+            )?;
+            for (cls, set) in attr_classes {
+                let cls_str = cls.__repr__();
+                let cls_py = Py::new(py, Class::new_native(vec![cls_str]))?;
+                if count_multiplicity {
+                    result = Self::append(result, cls_py.into_bound(py), set)?;
+                } else {
+                    result = Self::ensure(result, cls_py.into_bound(py), set)?;
+                }
+            }
+        }
+
+        Ok(result)
+    }
+
     /// Classify the colors of an `AsynchronousGraph` according to their affinity to biological
     /// phenotypes.
     ///
     /// Each phenotype is given as an arbitrary `VertexSet`, identified by a `Class` instance.
-    /// Most often, phenotypes are described through sub-spaces (see also
-    /// `AsynchronousGraph.mk_subspace_vertices`). Each phenotype either holds or does not hold
-    /// in each network attractor. The conditions for this depend on the `PhenotypeOscillation`
-    /// type, that can be optionally given as a second dictionary.
+    /// Most often, phenotypes are described through pair-wise disjoint sub-spaces (see also
+    /// `AsynchronousGraph.mk_subspace_vertices`). The result of this method then associates
+    /// each network color with a collection of phenotypes (i.e. `Class`) that are attainable
+    /// in the corresponding fully instantiated network.
     ///
-    ///  - [default] `forbidden`: Phenotype is satisfied if all attractor vertices belong to the
+    /// By default, the network exhibits a phenotype if it can stay in the phenotype set
+    /// forever (i.e. there exists at least one attractor that is fully in this set). However,
+    /// we also allow other types of phenotypes based on the `PhenotypeOscillation`:
+    ///
+    ///  - [default] `forbidden`: There *exists* an attractor that is a *subset* of the
     ///    given phenotype `VertexSet` (oscillation is forbidden).
-    ///  - `required`: Phenotype is satisfied if the attractor intersects the phenotype set,
-    ///    but is not a proper subset (oscillation always occurs).
-    ///  - `allowed`: Phenotype is satisfied if *some* attractor state belongs to the given
-    ///    phenotype set (i.e. the attractor can be a subset of the phenotype, or just have
-    ///    a non-empty intersection).
-    ///
-    /// A network color `c` then satisfies a phenotype if there *exists* an attractor in `c`
-    /// for which the phenotype holds. Colors that do not match any phenotype are returned with
-    /// an empty class (i.e. `Class([])`).
-    ///
-    /// Typically, phenotypes correspond to disjoint sets of vertices. However, this
-    /// is not a rule. Furthermore, the method can also deal with colors that satisfy multiple
-    /// phenotypes. Such colors are assigned a class that is the sum of all satisfied phenotype
-    /// classes.
-    ///
-    /// Note that you can (optionally) provide your own attractor states (or any other relevant
+    ///  - `required`: There *exists* an attractor that *intersects* the phenotype set, but
+    ///    is not a *proper subset* (phenotype is visited intermittently).
+    ///  - `allowed`: There *exists* an attractor that *intersects* the phenotype set, but
+    ///    can be also a proper subset (i.e. the network either stays in the phenotype forever,
+    ///    or visits it intermittently, we don't care which one).
+    ///
+    /// Colors that do not match any phenotype are returned with an empty class (i.e. `Class([])`).
+    ///
+    /// Note that this method does not count the number of attractors, and it can assign the
+    /// same attractor to multiple phenotypes (for example, a network with a fixed-point `111`
+    /// exhibits both phenotype `a=11*` and `b=*11`, i.e. the network will be returned within
+    /// the class `Class(['a','b'])`). This is usually not a problem for disjoint phenotypes with
+    /// forbidden oscillation, just beware that for more complex phenotypes,
+    /// you might need additional analysis of the relevant colors.
+    ///
+    /// If you do also need to map attractors to phenotypes, have a look at
+    /// `Classification.classify_attractor_phenotypes`.
+    ///
+    /// You can (optionally) provide your own attractor states (or any other relevant
     /// set of states) as the `initial_trap` argument (the method assumes this is a trap set).
     /// However, computing phenotype classification does not require precise knowledge of
     /// attractors. Thus, it can be often much faster than exact attractor computation (especially
     /// if the number of attractors is high). However, you can use this option to restrict the
-    /// input space if you want to explicitly ignore certain attractors.
+    /// input space if you want to explicitly ignore certain parts of the state space.
     ///
     /// You can also combine the results of this analysis with other classifications (e.g.
     /// attractor bifurcations, or HCTL properties) using `Classification.ensure` or
     /// `Classification.append`.
     #[staticmethod]
     #[pyo3(signature = (graph, phenotypes, oscillation_types = None, initial_trap = None))]
-    pub fn classify_attractor_phenotypes(
+    pub fn classify_phenotypes(
         py: Python,
         graph: &AsynchronousGraph,
         phenotypes: HashMap<Class, VertexSet>,
@@ -431,10 +520,20 @@ impl Classification {
         initial_trap: Option<ColoredVertexSet>,
     ) -> PyResult<HashMap<Class, ColorSet>> {
         let mut map = HashMap::new();
-        map.insert(Class::new_native(Vec::new()), graph.mk_unit_colors());
 
         let unit = graph.mk_unit_colored_vertices();
         let initial_trap = initial_trap.unwrap_or_else(|| graph.mk_unit_colored_vertices());
+        if !graph
+            .as_native()
+            .can_post_out(initial_trap.as_native())
+            .is_empty()
+        {
+            return throw_runtime_error(
+                "Given initial trap set is not a trap set (it can be escaped).",
+            );
+        }
+
+        map.insert(Class::new_native(Vec::new()), initial_trap.colors());
 
         for (cls, phenotype) in phenotypes {
             let p_type = oscillation_types
diff --git a/tests/model-myeloid-3-unknown.aeon b/tests/model-myeloid-3-unknown.aeon
new file mode 100644
index 00000000..b0b86537
--- /dev/null
+++ b/tests/model-myeloid-3-unknown.aeon
@@ -0,0 +1,47 @@
+CEBPa -> CEBPa
+GATA1 -| CEBPa
+FOG1 -| CEBPa
+SCL -| CEBPa
+
+$cJun:(PU1 & !Gfi1)
+PU1 -> cJun
+Gfi1 -| cJun
+
+$EgrNab:((PU1 & cJun) & !Gfi1)
+Gfi1 -| EgrNab
+cJun -> EgrNab
+PU1 -> EgrNab
+
+$EKLF:(GATA1 & !Fli1)
+GATA1 -> EKLF
+Fli1 -| EKLF
+
+EKLF -| Fli1
+GATA1 -> Fli1
+
+$FOG1:(GATA1)
+GATA1 -> FOG1
+
+$GATA1:(((GATA1 | GATA2) | Fli1) & !PU1)
+GATA1 -> GATA1
+GATA2 -> GATA1
+PU1 -| GATA1
+Fli1 -> GATA1
+
+FOG1 -| GATA2
+GATA1 -| GATA2
+PU1 -| GATA2
+GATA2 -> GATA2
+
+$Gfi1:(CEBPa & !EgrNab)
+CEBPa -> Gfi1
+EgrNab -| Gfi1
+
+CEBPa -> PU1
+PU1 -> PU1
+GATA2 -| PU1
+GATA1 -| PU1
+
+$SCL:(GATA1 & !PU1)
+GATA1 -> SCL
+PU1 -| SCL
diff --git a/tests/test_classification.py b/tests/test_classification.py
index 8c432c06..822e9766 100644
--- a/tests/test_classification.py
+++ b/tests/test_classification.py
@@ -143,3 +143,56 @@ def test_property_classification():
     assert str(l_ann) == str(annotations)
 
     os.remove("classification.test.2.zip")
+
+def test_phenotype_classification():
+    path = "./tests/model-myeloid-3-unknown.aeon"
+
+    bn = BooleanNetwork.from_file(path)
+
+    graph = AsynchronousGraph(bn)
+    phenotypes = {
+        Class('erythrocyte'): graph.mk_subspace_vertices({"EKLF": True}),
+        Class('megakaryocyte'): graph.mk_subspace_vertices({"Fli1": True}),
+        Class('monocyte'): graph.mk_subspace_vertices({"cJun": True}),
+        Class('granulocyte'): graph.mk_subspace_vertices({"Gfi1": True})
+    }
+    phenotype_mapping = Classification.classify_phenotypes(graph, phenotypes)
+    phenotype_attractor_mapping = Classification.classify_attractor_phenotypes(graph, phenotypes, count_multiplicity=False)
+
+    # An example of how to nicely print the mappings:
+    for (cls, colors) in phenotype_mapping.items():
+        print(cls, colors)
+    # Expected output:
+    # ["granulocyte", "megakaryocyte", "monocyte"] ColorSet(cardinality=1423062, symbolic_size=543)
+    # ["erythrocyte", "granulocyte", "megakaryocyte"] ColorSet(cardinality=4617, symbolic_size=197)
+    # ["granulocyte", "monocyte"] ColorSet(cardinality=58482, symbolic_size=295)
+    # ["granulocyte", "megakaryocyte"] ColorSet(cardinality=4617, symbolic_size=197)
+    # ["erythrocyte", "megakaryocyte"] ColorSet(cardinality=53865, symbolic_size=279)
+    # ["megakaryocyte"] ColorSet(cardinality=53865, symbolic_size=279)
+    # ["erythrocyte", "granulocyte", "megakaryocyte", "monocyte"] ColorSet(cardinality=1364580, symbolic_size=561)
+
+    for (cls, colors) in phenotype_attractor_mapping.items():
+        print([eval(x).feature_list() for x in cls.feature_list()], colors)
+
+    # Expected output:
+    # [['granulocyte', 'megakaryocyte'], ['megakaryocyte', 'monocyte'], ['megakaryocyte']] ColorSet(cardinality=1364976, symbolic_size=675)
+    # [['granulocyte', 'megakaryocyte'], ['megakaryocyte', 'monocyte']] ColorSet(cardinality=58086, symbolic_size=408)
+    # [['granulocyte', 'megakaryocyte'], ['megakaryocyte']] ColorSet(cardinality=4617, symbolic_size=197)
+    # [['erythrocyte', 'granulocyte'], ['erythrocyte'], ['granulocyte', 'megakaryocyte'], ['megakaryocyte']] ColorSet(cardinality=4221, symbolic_size=213)
+    # [['megakaryocyte']] ColorSet(cardinality=53865, symbolic_size=279)
+    # [['granulocyte'], ['monocyte']] ColorSet(cardinality=58086, symbolic_size=408)
+    # [['granulocyte'], ['monocyte'], []] ColorSet(cardinality=396, symbolic_size=141)
+    # [['erythrocyte'], ['granulocyte'], ['megakaryocyte'], ['monocyte'], []] ColorSet(cardinality=4617, symbolic_size=165)
+    # [['erythrocyte'], ['granulocyte'], ['megakaryocyte'], ['monocyte']] ColorSet(cardinality=1281078, symbolic_size=858)
+    # [['erythrocyte', 'granulocyte'], ['erythrocyte'], ['granulocyte', 'megakaryocyte'], ['granulocyte'], ['megakaryocyte'], ['monocyte']] ColorSet(cardinality=78885, symbolic_size=453)
+    # [['erythrocyte'], ['megakaryocyte']] ColorSet(cardinality=53865, symbolic_size=279)
+    # [['erythrocyte'], ['granulocyte'], ['megakaryocyte']] ColorSet(cardinality=396, symbolic_size=141)
+
+    # Test that every attractor class is a subset of the corresponding phenotype class:
+    for (cls, colors) in phenotype_attractor_mapping.items():
+        phenotype_class = Class([])
+        for inner in cls.feature_list():
+            inner_cls: Class = eval(inner)
+            for phenotype in inner_cls.feature_list():
+                phenotype_class = phenotype_class.ensure(phenotype)
+        assert colors.is_subset(phenotype_mapping[phenotype_class])