From d91cabce1b471327bc6a897c9c0dc873c967d19a Mon Sep 17 00:00:00 2001 From: Samuel Pastva Date: Thu, 19 Sep 2024 19:05:43 +0200 Subject: [PATCH] Phenotype classification refactoring (#25) * Create two variants of phenotype classification: one which "erases" attractor knowledge, and second which creates per-attractor classes. * Fix line endings. * Make a copy of the test model. --- biodivine_aeon/__init__.pyi | 8 +- src/bindings/bn_classifier/class.rs | 8 +- src/bindings/bn_classifier/classification.rs | 147 ++++++++++++++++--- tests/model-myeloid-3-unknown.aeon | 47 ++++++ tests/test_classification.py | 53 +++++++ 5 files changed, 234 insertions(+), 29 deletions(-) create mode 100644 tests/model-myeloid-3-unknown.aeon diff --git a/biodivine_aeon/__init__.pyi b/biodivine_aeon/__init__.pyi index bf4f0ec1..e4fb884d 100644 --- a/biodivine_aeon/__init__.pyi +++ b/biodivine_aeon/__init__.pyi @@ -1627,10 +1627,16 @@ class Classification: @staticmethod def classify_attractor_bifurcation(graph: AsynchronousGraph, attractor: Optional[list[ColoredVertexSet]] = None) -> dict[Class, ColorSet]: ... @staticmethod + def classify_phenotypes(graph: AsynchronousGraph, + phenotypes: dict[Class, VertexSet], + oscillation_types: Optional[dict[Class, PhenotypeOscillation]] = None, + initial_trap: Optional[ColoredVertexSet] = None) -> dict[Class, ColorSet]: ... + @staticmethod def classify_attractor_phenotypes(graph: AsynchronousGraph, phenotypes: dict[Class, VertexSet], oscillation_types: Optional[dict[Class, PhenotypeOscillation]] = None, - initial_trap: Optional[ColoredVertexSet] = None) -> dict[Class, ColorSet]: ... + traps: Optional[list[ColoredVertexSet]] = None, + count_multiplicity: bool = True) -> dict[Class, ColorSet]: ... @staticmethod def classify_dynamic_properties(graph: AsynchronousGraph, properties: dict[str, HctlFormula], assertions: Optional[list[HctlFormula]] = None, substitution: Optional[dict[str, ColoredVertexSet]] = None) -> dict[Class, ColorSet]: ... diff --git a/src/bindings/bn_classifier/class.rs b/src/bindings/bn_classifier/class.rs index 1ffaea5a..481f7339 100644 --- a/src/bindings/bn_classifier/class.rs +++ b/src/bindings/bn_classifier/class.rs @@ -10,14 +10,14 @@ use crate::bindings::lib_param_bn::symbolic::set_color::ColorSet; use crate::pyo3_utils::richcmp_eq_by_key; use crate::{throw_runtime_error, throw_type_error}; -/// A `Class` is an immutable collection of sorted "features", such as each feature is +/// A `Class` is an immutable collection of sorted "features", such that each feature is /// a described by its string name. A `Class` is used by the various classification workflows /// in AEON to label a specific mode of behavior that a system can exhibit. /// /// Depending on which operations are used, a class can behave either as a `set` (each feature /// can only appear once in a `Class`), or a `list` (multiple features of the same name appear /// multiple times). This entirely depends on the classification workflow used and is not -/// a property of a `Class` (you can even mix the `set` and `list` behavior depending on +/// a property of a `Class` itself (you can even mix the `set` and `list` behavior depending on /// the exact feature you are adding). Note that an "empty" class is allowed. /// /// The main reason why we even need `Class` is that lists and sets are not hash-able in Python, @@ -89,11 +89,11 @@ impl Class { hasher.finish() } - fn __str__(&self) -> String { + pub fn __str__(&self) -> String { format!("{:?}", self.items) } - fn __repr__(&self) -> String { + pub fn __repr__(&self) -> String { format!("Class({})", self.__str__()) } diff --git a/src/bindings/bn_classifier/classification.rs b/src/bindings/bn_classifier/classification.rs index 35711f27..35007030 100644 --- a/src/bindings/bn_classifier/classification.rs +++ b/src/bindings/bn_classifier/classification.rs @@ -385,45 +385,134 @@ impl Classification { .collect()) } + /// Classify the *individual attractors* of an `AsynchronousGraph` according to their affinity + /// to biological phenotypes. + /// + /// This is similar to `Classification.classify_phenotypes`, but it actually returns + /// per-attractor results instead of lumping all attractors of one phenotype into + /// a single class. This results in a sort-of nested classification, where each class + /// consists of entries for individual attractors, but each entry is itself a class + /// that describes the phenotypes of that particular attractor. + /// + /// For example, given phenotypes `"a"` and `"b"`, the result could contain a class + /// `Class(["Class(['a'])", "Class(['b'])", "Class(['a', 'b'])"])`. This means that there + /// are three attractors: One with only the "a" phenotype, one with only the "b" phenotype, + /// and one with both phenotypes. Note that the "inner" classes are encoded as strings + /// (you can convert such string into a proper class object by running + /// `c = eval(class_string)`). + /// + /// The meaning of `oscillation_types` is the same as in `Classification.classify_phenotypes`: + /// `Forbidden` oscillation means the attractor must be a subset of the phenotype, `allowed` + /// oscillation means the attractor must intersect the phenotype, and `required` oscillation + /// means that the attractor must intersect the phenotype, but can't be its subset. + /// + /// Similar to `Classification.classify_phenotypes`, the method allows you to provide + /// a collection of `traps` that will be considered instead of the network attractors. + /// The method checks that these sets are indeed trap sets, but they do not have to be + /// minimal (i.e. attractors). If `traps` are given, the classification treats each trap + /// set as a separate "attractor". + /// + /// Finally, if `count_multiplicity` is set to `False`, the method will lump together + /// attractors that satisfy the same phenotypes, meaning the resulting `Class` object + /// will only contain up to one instance of the same phenotype configuration. For example, + /// if I have two attractors, each satisfying phenotype `"a"`, then by default the result + /// is a `Class(["Class(['a'])", "Class(['a'])"])` (and this situation is different compared + /// to the case when I only have one such attractor). If I set `count_multiplicity=False`, + /// the result will be `Class(["Class(['a'])"])`, regardless of how many attractors actually + /// satisfy the `"a"` phenotype. + #[staticmethod] + #[pyo3(signature = (graph, phenotypes, oscillation_types = None, traps = None, count_multiplicity = true))] + pub fn classify_attractor_phenotypes( + py: Python, + graph: &AsynchronousGraph, + phenotypes: HashMap, + oscillation_types: Option>, + traps: Option>, + count_multiplicity: bool, + ) -> PyResult> { + // Initialize the attractor set. + let traps = if let Some(traps) = traps { + traps + } else { + Attractors::attractors(graph, None, None, py)? + }; + + let mut all_colors = graph.mk_empty_colors(); + for attr in &traps { + all_colors = all_colors.union(&attr.colors()); + } + + let mut result = HashMap::new(); + result.insert(Class::new_native(Vec::new()), all_colors); + + for attr in &traps { + let attr_classes = Self::classify_phenotypes( + py, + graph, + phenotypes.clone(), + oscillation_types.clone(), + Some(attr.clone()), + )?; + for (cls, set) in attr_classes { + let cls_str = cls.__repr__(); + let cls_py = Py::new(py, Class::new_native(vec![cls_str]))?; + if count_multiplicity { + result = Self::append(result, cls_py.into_bound(py), set)?; + } else { + result = Self::ensure(result, cls_py.into_bound(py), set)?; + } + } + } + + Ok(result) + } + /// Classify the colors of an `AsynchronousGraph` according to their affinity to biological /// phenotypes. /// /// Each phenotype is given as an arbitrary `VertexSet`, identified by a `Class` instance. - /// Most often, phenotypes are described through sub-spaces (see also - /// `AsynchronousGraph.mk_subspace_vertices`). Each phenotype either holds or does not hold - /// in each network attractor. The conditions for this depend on the `PhenotypeOscillation` - /// type, that can be optionally given as a second dictionary. + /// Most often, phenotypes are described through pair-wise disjoint sub-spaces (see also + /// `AsynchronousGraph.mk_subspace_vertices`). The result of this method then associates + /// each network color with a collection of phenotypes (i.e. `Class`) that are attainable + /// in the corresponding fully instantiated network. /// - /// - [default] `forbidden`: Phenotype is satisfied if all attractor vertices belong to the + /// By default, the network exhibits a phenotype if it can stay in the phenotype set + /// forever (i.e. there exists at least one attractor that is fully in this set). However, + /// we also allow other types of phenotypes based on the `PhenotypeOscillation`: + /// + /// - [default] `forbidden`: There *exists* an attractor that is a *subset* of the /// given phenotype `VertexSet` (oscillation is forbidden). - /// - `required`: Phenotype is satisfied if the attractor intersects the phenotype set, - /// but is not a proper subset (oscillation always occurs). - /// - `allowed`: Phenotype is satisfied if *some* attractor state belongs to the given - /// phenotype set (i.e. the attractor can be a subset of the phenotype, or just have - /// a non-empty intersection). - /// - /// A network color `c` then satisfies a phenotype if there *exists* an attractor in `c` - /// for which the phenotype holds. Colors that do not match any phenotype are returned with - /// an empty class (i.e. `Class([])`). - /// - /// Typically, phenotypes correspond to disjoint sets of vertices. However, this - /// is not a rule. Furthermore, the method can also deal with colors that satisfy multiple - /// phenotypes. Such colors are assigned a class that is the sum of all satisfied phenotype - /// classes. - /// - /// Note that you can (optionally) provide your own attractor states (or any other relevant + /// - `required`: There *exists* an attractor that *intersects* the phenotype set, but + /// is not a *proper subset* (phenotype is visited intermittently). + /// - `allowed`: There *exists* an attractor that *intersects* the phenotype set, but + /// can be also a proper subset (i.e. the network either stays in the phenotype forever, + /// or visits it intermittently, we don't care which one). + /// + /// Colors that do not match any phenotype are returned with an empty class (i.e. `Class([])`). + /// + /// Note that this method does not count the number of attractors, and it can assign the + /// same attractor to multiple phenotypes (for example, a network with a fixed-point `111` + /// exhibits both phenotype `a=11*` and `b=*11`, i.e. the network will be returned within + /// the class `Class(['a','b'])`). This is usually not a problem for disjoint phenotypes with + /// forbidden oscillation, just beware that for more complex phenotypes, + /// you might need additional analysis of the relevant colors. + /// + /// If you do also need to map attractors to phenotypes, have a look at + /// `Classification.classify_attractor_phenotypes`. + /// + /// You can (optionally) provide your own attractor states (or any other relevant /// set of states) as the `initial_trap` argument (the method assumes this is a trap set). /// However, computing phenotype classification does not require precise knowledge of /// attractors. Thus, it can be often much faster than exact attractor computation (especially /// if the number of attractors is high). However, you can use this option to restrict the - /// input space if you want to explicitly ignore certain attractors. + /// input space if you want to explicitly ignore certain parts of the state space. /// /// You can also combine the results of this analysis with other classifications (e.g. /// attractor bifurcations, or HCTL properties) using `Classification.ensure` or /// `Classification.append`. #[staticmethod] #[pyo3(signature = (graph, phenotypes, oscillation_types = None, initial_trap = None))] - pub fn classify_attractor_phenotypes( + pub fn classify_phenotypes( py: Python, graph: &AsynchronousGraph, phenotypes: HashMap, @@ -431,10 +520,20 @@ impl Classification { initial_trap: Option, ) -> PyResult> { let mut map = HashMap::new(); - map.insert(Class::new_native(Vec::new()), graph.mk_unit_colors()); let unit = graph.mk_unit_colored_vertices(); let initial_trap = initial_trap.unwrap_or_else(|| graph.mk_unit_colored_vertices()); + if !graph + .as_native() + .can_post_out(initial_trap.as_native()) + .is_empty() + { + return throw_runtime_error( + "Given initial trap set is not a trap set (it can be escaped).", + ); + } + + map.insert(Class::new_native(Vec::new()), initial_trap.colors()); for (cls, phenotype) in phenotypes { let p_type = oscillation_types diff --git a/tests/model-myeloid-3-unknown.aeon b/tests/model-myeloid-3-unknown.aeon new file mode 100644 index 00000000..b0b86537 --- /dev/null +++ b/tests/model-myeloid-3-unknown.aeon @@ -0,0 +1,47 @@ +CEBPa -> CEBPa +GATA1 -| CEBPa +FOG1 -| CEBPa +SCL -| CEBPa + +$cJun:(PU1 & !Gfi1) +PU1 -> cJun +Gfi1 -| cJun + +$EgrNab:((PU1 & cJun) & !Gfi1) +Gfi1 -| EgrNab +cJun -> EgrNab +PU1 -> EgrNab + +$EKLF:(GATA1 & !Fli1) +GATA1 -> EKLF +Fli1 -| EKLF + +EKLF -| Fli1 +GATA1 -> Fli1 + +$FOG1:(GATA1) +GATA1 -> FOG1 + +$GATA1:(((GATA1 | GATA2) | Fli1) & !PU1) +GATA1 -> GATA1 +GATA2 -> GATA1 +PU1 -| GATA1 +Fli1 -> GATA1 + +FOG1 -| GATA2 +GATA1 -| GATA2 +PU1 -| GATA2 +GATA2 -> GATA2 + +$Gfi1:(CEBPa & !EgrNab) +CEBPa -> Gfi1 +EgrNab -| Gfi1 + +CEBPa -> PU1 +PU1 -> PU1 +GATA2 -| PU1 +GATA1 -| PU1 + +$SCL:(GATA1 & !PU1) +GATA1 -> SCL +PU1 -| SCL diff --git a/tests/test_classification.py b/tests/test_classification.py index 8c432c06..822e9766 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -143,3 +143,56 @@ def test_property_classification(): assert str(l_ann) == str(annotations) os.remove("classification.test.2.zip") + +def test_phenotype_classification(): + path = "./tests/model-myeloid-3-unknown.aeon" + + bn = BooleanNetwork.from_file(path) + + graph = AsynchronousGraph(bn) + phenotypes = { + Class('erythrocyte'): graph.mk_subspace_vertices({"EKLF": True}), + Class('megakaryocyte'): graph.mk_subspace_vertices({"Fli1": True}), + Class('monocyte'): graph.mk_subspace_vertices({"cJun": True}), + Class('granulocyte'): graph.mk_subspace_vertices({"Gfi1": True}) + } + phenotype_mapping = Classification.classify_phenotypes(graph, phenotypes) + phenotype_attractor_mapping = Classification.classify_attractor_phenotypes(graph, phenotypes, count_multiplicity=False) + + # An example of how to nicely print the mappings: + for (cls, colors) in phenotype_mapping.items(): + print(cls, colors) + # Expected output: + # ["granulocyte", "megakaryocyte", "monocyte"] ColorSet(cardinality=1423062, symbolic_size=543) + # ["erythrocyte", "granulocyte", "megakaryocyte"] ColorSet(cardinality=4617, symbolic_size=197) + # ["granulocyte", "monocyte"] ColorSet(cardinality=58482, symbolic_size=295) + # ["granulocyte", "megakaryocyte"] ColorSet(cardinality=4617, symbolic_size=197) + # ["erythrocyte", "megakaryocyte"] ColorSet(cardinality=53865, symbolic_size=279) + # ["megakaryocyte"] ColorSet(cardinality=53865, symbolic_size=279) + # ["erythrocyte", "granulocyte", "megakaryocyte", "monocyte"] ColorSet(cardinality=1364580, symbolic_size=561) + + for (cls, colors) in phenotype_attractor_mapping.items(): + print([eval(x).feature_list() for x in cls.feature_list()], colors) + + # Expected output: + # [['granulocyte', 'megakaryocyte'], ['megakaryocyte', 'monocyte'], ['megakaryocyte']] ColorSet(cardinality=1364976, symbolic_size=675) + # [['granulocyte', 'megakaryocyte'], ['megakaryocyte', 'monocyte']] ColorSet(cardinality=58086, symbolic_size=408) + # [['granulocyte', 'megakaryocyte'], ['megakaryocyte']] ColorSet(cardinality=4617, symbolic_size=197) + # [['erythrocyte', 'granulocyte'], ['erythrocyte'], ['granulocyte', 'megakaryocyte'], ['megakaryocyte']] ColorSet(cardinality=4221, symbolic_size=213) + # [['megakaryocyte']] ColorSet(cardinality=53865, symbolic_size=279) + # [['granulocyte'], ['monocyte']] ColorSet(cardinality=58086, symbolic_size=408) + # [['granulocyte'], ['monocyte'], []] ColorSet(cardinality=396, symbolic_size=141) + # [['erythrocyte'], ['granulocyte'], ['megakaryocyte'], ['monocyte'], []] ColorSet(cardinality=4617, symbolic_size=165) + # [['erythrocyte'], ['granulocyte'], ['megakaryocyte'], ['monocyte']] ColorSet(cardinality=1281078, symbolic_size=858) + # [['erythrocyte', 'granulocyte'], ['erythrocyte'], ['granulocyte', 'megakaryocyte'], ['granulocyte'], ['megakaryocyte'], ['monocyte']] ColorSet(cardinality=78885, symbolic_size=453) + # [['erythrocyte'], ['megakaryocyte']] ColorSet(cardinality=53865, symbolic_size=279) + # [['erythrocyte'], ['granulocyte'], ['megakaryocyte']] ColorSet(cardinality=396, symbolic_size=141) + + # Test that every attractor class is a subset of the corresponding phenotype class: + for (cls, colors) in phenotype_attractor_mapping.items(): + phenotype_class = Class([]) + for inner in cls.feature_list(): + inner_cls: Class = eval(inner) + for phenotype in inner_cls.feature_list(): + phenotype_class = phenotype_class.ensure(phenotype) + assert colors.is_subset(phenotype_mapping[phenotype_class])