causy-dev · LilithWittmann · Oct 22, 2023 · Oct 20, 2023 · Oct 20, 2023 · Oct 20, 2023
diff --git a/.github/workflows/create-docs.yml b/.github/workflows/create-docs.yml
@@ -0,0 +1,32 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "main" ]
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11"]
+        platform: [macos-latest] # TODO: change add ubuntu-latest
+    runs-on: ${{ matrix.platform }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip poetry flake8
+        poetry install
+    - name: Publish to GitHub Pages
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: docs/build
diff --git a/.github/workflows/python-package.yml → .github/workflows/python-tests.yml b/.github/workflows/python-package.yml → .github/workflows/python-tests.yml
@@ -5,19 +5,18 @@ name: Python package
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "*" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "*" ]
 
 jobs:
   build:
-
-    runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.11", "3.12"]
-
+        python-version: ["3.11"]
+        platform: [macos-latest, windows-latest] # TODO: change add ubuntu-latest
+    runs-on: ${{ matrix.platform }}
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/README.md b/README.md
@@ -36,20 +36,21 @@ poetry run causy execute tests/fixtures/toy_data_larger.json --pipeline pc.json
 ### Usage via Code
 
 Use a default algorithm
+
 ```python
 from causy.algorithms import PC
-from causy.utils import show_edges
+from causy.utils import retrieve_edges
 
 model = PC()
 model.create_graph_from_data(
     [
-        {"a": 1, "b": 0.3}, 
+        {"a": 1, "b": 0.3},
         {"a": 0.5, "b": 0.2}
     ]
 )
 model.create_all_possible_edges()
 model.execute_pipeline_steps()
-edges = show_edges(model.graph)
+edges = retrieve_edges(model.graph)
 
 for edge in edges:
     print(

diff --git a/causy/algorithms.py b/causy/algorithms.py
@@ -1,11 +1,13 @@
 from causy.exit_conditions import ExitOnNoActions
+from causy.generators import PairsWithNeighboursGenerator
 from causy.graph import graph_model_factory, Loop
 from causy.independence_tests import (
     CalculateCorrelations,
     CorrelationCoefficientTest,
     PartialCorrelationTest,
     ExtendedPartialCorrelationTestMatrix,
 )
+from causy.interfaces import AS_MANY_AS_FIELDS, ComparisonSettings
 from causy.orientation_tests import (
     ColliderTest,
     NonColliderTest,
@@ -32,3 +34,34 @@
         ),
     ]
 )
+
+
+ParallelPC = graph_model_factory(
+    pipeline_steps=[
+        CalculateCorrelations(),
+        CorrelationCoefficientTest(threshold=0.01),
+        PartialCorrelationTest(
+            threshold=0.01, parallel=True, chunk_size_parallel_processing=10000
+        ),
+        ExtendedPartialCorrelationTestMatrix(
+            threshold=0.01,
+            chunk_size_parallel_processing=1000,
+            parallel=True,
+            generator=PairsWithNeighboursGenerator(
+                chunked=False,
+                shuffle_combinations=True,
+                comparison_settings=ComparisonSettings(min=4, max=AS_MANY_AS_FIELDS),
+            ),
+        ),
+        ColliderTest(),
+        Loop(
+            pipeline_steps=[
+                NonColliderTest(),
+                FurtherOrientTripleTest(),
+                OrientQuadrupleTest(),
+                FurtherOrientQuadrupleTest(),
+            ],
+            exit_condition=ExitOnNoActions(),
+        ),
+    ]
+)
diff --git a/causy/cli.py b/causy/cli.py
@@ -1,18 +1,18 @@
 import importlib
 import json
+from datetime import datetime
 from json import JSONEncoder
+import logging
 
 import typer
 
 from causy.graph import graph_model_factory
 from causy.utils import (
-    load_pipeline_artefact_by_definition,
     load_pipeline_steps_by_definition,
-    show_edges,
+    retrieve_edges,
 )
 
 app = typer.Typer()
-import logging
 
 
 def load_json(pipeline_file: str):
@@ -58,7 +58,7 @@ def execute(
     data_file: str,
     pipeline: str = None,
     algorithm: str = None,
-    graph_actions_save_file: str = None,
+    output_file: str = None,
     render_save_file: str = None,
     log_level: str = "ERROR",
 ):
@@ -68,9 +68,17 @@ def execute(
         pipeline_config = load_json(pipeline)
         pipeline = create_pipeline(pipeline_config)
         model = graph_model_factory(pipeline_steps=pipeline)()
+        algorithm_reference = {
+            "type": "pipeline",
+            "reference": pipeline,  # TODO: how to reference pipeline in a way that it can be loaded?
+        }
     elif algorithm:
         typer.echo(f"💾 Creating pipeline from algorithm {algorithm}")
         model = load_algorithm(algorithm)()
+        algorithm_reference = {
+            "type": "default",
+            "reference": algorithm,
+        }
 
     else:
         raise ValueError("Either pipeline_file or algorithm must be specified")
@@ -83,18 +91,31 @@ def execute(
 
     typer.echo("🕵🏻‍♀  Executing pipeline steps...")
     model.execute_pipeline_steps()
-    edges = show_edges(model.graph)
-    for edge in edges:
+    edges = []
+    for edge in retrieve_edges(model.graph):
         print(
-            f"{edge[0].name} -> {edge[1].name}: {model.graph.edges[edge[0]][edge[1]]}"
+            f"{model.graph.nodes[edge[0]].name} -> {model.graph.nodes[edge[1]].name}: {model.graph.edges[edge[0]][edge[1]]}"
+        )
+        edges.append(
+            {
+                "from": model.graph.nodes[edge[0]].to_dict(),
+                "to": model.graph.nodes[edge[1]].to_dict(),
+                "value": model.graph.edges[edge[0]][edge[1]],
+            }
         )
 
-    if graph_actions_save_file:
-        typer.echo(f"💾 Saving graph actions to {graph_actions_save_file}")
-        with open(graph_actions_save_file, "w") as file:
-            file.write(
-                json.dumps(model.graph.action_history, cls=MyJSONEncoder, indent=4)
-            )
+    if output_file:
+        typer.echo(f"💾 Saving graph actions to {output_file}")
+        with open(output_file, "w") as file:
+            export = {
+                "name": algorithm,
+                "created_at": datetime.now().isoformat(),
+                "algorithm": algorithm_reference,
+                "steps": model.graph.action_history,
+                "nodes": model.graph.nodes,
+                "edges": edges,
+            }
+            file.write(json.dumps(export, cls=MyJSONEncoder, indent=4))
 
     if render_save_file:
         # I'm just a hacky rendering function, pls replace me with causy ui 🙄
@@ -105,7 +126,7 @@ def execute(
         n_graph = nx.DiGraph()
         for u in model.graph.edges:
             for v in model.graph.edges[u]:
-                n_graph.add_edge(u.name, v.name)
+                n_graph.add_edge(model.graph.nodes[u].name, model.graph.nodes[v].name)
         fig = plt.figure(figsize=(10, 10))
         nx.draw(n_graph, with_labels=True, ax=fig.add_subplot(111))
         fig.savefig(render_save_file)

diff --git a/causy/generators.py b/causy/generators.py
@@ -1,3 +1,4 @@
+import copy
 import itertools
 import logging
 
@@ -9,7 +10,6 @@
     AS_MANY_AS_FIELDS,
 )
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -43,6 +43,24 @@ def generate(
 
 
 class PairsWithNeighboursGenerator(GeneratorInterface):
+    shuffle_combinations = True
+    chunked = True
+
+    def __init__(
+        self,
+        comparison_settings: ComparisonSettings,
+        chunked: bool = None,
+        shuffle_combinations: bool = None,
+    ):
+        super().__init__(comparison_settings, chunked)
+        if shuffle_combinations is not None:
+            self.shuffle_combinations = shuffle_combinations
+
+    def to_dict(self):
+        result = super().to_dict()
+        result["params"]["shuffle_combinations"] = self.shuffle_combinations
+        return result
+
     def generate(
         self, graph: BaseGraphInterface, graph_model_instance_: GraphModelInterface
     ):
@@ -71,20 +89,36 @@ def generate(
         for i in range(start, stop):
             logger.debug(f"PairsWithNeighboursGenerator: i={i}")
             checked_combinations = set()
-            for node in graph.edges:
-                for neighbour in graph.edges[node]:
+            local_edges = copy.deepcopy(graph.edges)
+            for node in local_edges:
+                for neighbour in local_edges[node]:
                     if (node, neighbour) in checked_combinations:
                         continue
 
                     checked_combinations.add((node, neighbour))
                     if i == 2:
-                        yield (node.name, neighbour.name)
+                        yield (node, neighbour)
                         continue
 
                     other_neighbours = set(graph.edges[node])
-                    other_neighbours.remove(neighbour)
+                    if neighbour in other_neighbours:
+                        other_neighbours.remove(neighbour)
+                    else:
+                        continue
                     if len(other_neighbours) + 2 < i:
                         continue
-
-                    for k in itertools.combinations(other_neighbours, i):
-                        yield [node.name, neighbour.name] + [ks.name for ks in k]
+                    combinations = itertools.combinations(other_neighbours, i)
+                    if self.shuffle_combinations:
+                        combinations = list(combinations)
+                        import random
+
+                        random.shuffle(combinations)
+
+                    if self.chunked:
+                        chunk = []
+                        for k in combinations:
+                            chunk.append([node, neighbour] + [ks for ks in k])
+                        yield chunk
+                    else:
+                        for k in combinations:
+                            yield [node, neighbour] + [ks for ks in k]