Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to pytorch #7

Merged
merged 18 commits into from
Oct 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/create-docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python package

on:
push:
branches: [ "main" ]

jobs:
build:
strategy:
fail-fast: false
matrix:
python-version: ["3.11"]
platform: [macos-latest] # TODO: change add ubuntu-latest
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip poetry flake8
poetry install
- name: Publish to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: docs/build
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@ name: Python package

on:
push:
branches: [ "main" ]
branches: [ "*" ]
pull_request:
branches: [ "main" ]
branches: [ "*" ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.11", "3.12"]

python-version: ["3.11"]
platform: [macos-latest, windows-latest] # TODO: change add ubuntu-latest
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,21 @@ poetry run causy execute tests/fixtures/toy_data_larger.json --pipeline pc.json
### Usage via Code

Use a default algorithm

```python
from causy.algorithms import PC
from causy.utils import show_edges
from causy.utils import retrieve_edges

model = PC()
model.create_graph_from_data(
[
{"a": 1, "b": 0.3},
{"a": 1, "b": 0.3},
{"a": 0.5, "b": 0.2}
]
)
model.create_all_possible_edges()
model.execute_pipeline_steps()
edges = show_edges(model.graph)
edges = retrieve_edges(model.graph)

for edge in edges:
print(
Expand Down
33 changes: 33 additions & 0 deletions causy/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from causy.exit_conditions import ExitOnNoActions
from causy.generators import PairsWithNeighboursGenerator
from causy.graph import graph_model_factory, Loop
from causy.independence_tests import (
CalculateCorrelations,
CorrelationCoefficientTest,
PartialCorrelationTest,
ExtendedPartialCorrelationTestMatrix,
)
from causy.interfaces import AS_MANY_AS_FIELDS, ComparisonSettings
from causy.orientation_tests import (
ColliderTest,
NonColliderTest,
Expand All @@ -32,3 +34,34 @@
),
]
)


ParallelPC = graph_model_factory(
pipeline_steps=[
CalculateCorrelations(),
CorrelationCoefficientTest(threshold=0.01),
PartialCorrelationTest(
threshold=0.01, parallel=True, chunk_size_parallel_processing=10000
),
ExtendedPartialCorrelationTestMatrix(
threshold=0.01,
chunk_size_parallel_processing=1000,
parallel=True,
generator=PairsWithNeighboursGenerator(
chunked=False,
shuffle_combinations=True,
comparison_settings=ComparisonSettings(min=4, max=AS_MANY_AS_FIELDS),
),
),
ColliderTest(),
Loop(
pipeline_steps=[
NonColliderTest(),
FurtherOrientTripleTest(),
OrientQuadrupleTest(),
FurtherOrientQuadrupleTest(),
],
exit_condition=ExitOnNoActions(),
),
]
)
49 changes: 35 additions & 14 deletions causy/cli.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import importlib
import json
from datetime import datetime
from json import JSONEncoder
import logging

import typer

from causy.graph import graph_model_factory
from causy.utils import (
load_pipeline_artefact_by_definition,
load_pipeline_steps_by_definition,
show_edges,
retrieve_edges,
)

app = typer.Typer()
import logging


def load_json(pipeline_file: str):
Expand Down Expand Up @@ -58,7 +58,7 @@ def execute(
data_file: str,
pipeline: str = None,
algorithm: str = None,
graph_actions_save_file: str = None,
output_file: str = None,
render_save_file: str = None,
log_level: str = "ERROR",
):
Expand All @@ -68,9 +68,17 @@ def execute(
pipeline_config = load_json(pipeline)
pipeline = create_pipeline(pipeline_config)
model = graph_model_factory(pipeline_steps=pipeline)()
algorithm_reference = {
"type": "pipeline",
"reference": pipeline, # TODO: how to reference pipeline in a way that it can be loaded?
}
elif algorithm:
typer.echo(f"💾 Creating pipeline from algorithm {algorithm}")
model = load_algorithm(algorithm)()
algorithm_reference = {
"type": "default",
"reference": algorithm,
}

else:
raise ValueError("Either pipeline_file or algorithm must be specified")
Expand All @@ -83,18 +91,31 @@ def execute(

typer.echo("🕵🏻‍♀ Executing pipeline steps...")
model.execute_pipeline_steps()
edges = show_edges(model.graph)
for edge in edges:
edges = []
for edge in retrieve_edges(model.graph):
print(
f"{edge[0].name} -> {edge[1].name}: {model.graph.edges[edge[0]][edge[1]]}"
f"{model.graph.nodes[edge[0]].name} -> {model.graph.nodes[edge[1]].name}: {model.graph.edges[edge[0]][edge[1]]}"
)
edges.append(
{
"from": model.graph.nodes[edge[0]].to_dict(),
"to": model.graph.nodes[edge[1]].to_dict(),
"value": model.graph.edges[edge[0]][edge[1]],
}
)

if graph_actions_save_file:
typer.echo(f"💾 Saving graph actions to {graph_actions_save_file}")
with open(graph_actions_save_file, "w") as file:
file.write(
json.dumps(model.graph.action_history, cls=MyJSONEncoder, indent=4)
)
if output_file:
typer.echo(f"💾 Saving graph actions to {output_file}")
with open(output_file, "w") as file:
export = {
"name": algorithm,
"created_at": datetime.now().isoformat(),
"algorithm": algorithm_reference,
"steps": model.graph.action_history,
"nodes": model.graph.nodes,
"edges": edges,
}
file.write(json.dumps(export, cls=MyJSONEncoder, indent=4))

if render_save_file:
# I'm just a hacky rendering function, pls replace me with causy ui 🙄
Expand All @@ -105,7 +126,7 @@ def execute(
n_graph = nx.DiGraph()
for u in model.graph.edges:
for v in model.graph.edges[u]:
n_graph.add_edge(u.name, v.name)
n_graph.add_edge(model.graph.nodes[u].name, model.graph.nodes[v].name)
fig = plt.figure(figsize=(10, 10))
nx.draw(n_graph, with_labels=True, ax=fig.add_subplot(111))
fig.savefig(render_save_file)
Expand Down
50 changes: 42 additions & 8 deletions causy/generators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import itertools
import logging

Expand All @@ -9,7 +10,6 @@
AS_MANY_AS_FIELDS,
)


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -43,6 +43,24 @@ def generate(


class PairsWithNeighboursGenerator(GeneratorInterface):
shuffle_combinations = True
chunked = True

def __init__(
self,
comparison_settings: ComparisonSettings,
chunked: bool = None,
shuffle_combinations: bool = None,
):
super().__init__(comparison_settings, chunked)
if shuffle_combinations is not None:
self.shuffle_combinations = shuffle_combinations

def to_dict(self):
result = super().to_dict()
result["params"]["shuffle_combinations"] = self.shuffle_combinations
return result

def generate(
self, graph: BaseGraphInterface, graph_model_instance_: GraphModelInterface
):
Expand Down Expand Up @@ -71,20 +89,36 @@ def generate(
for i in range(start, stop):
logger.debug(f"PairsWithNeighboursGenerator: i={i}")
checked_combinations = set()
for node in graph.edges:
for neighbour in graph.edges[node]:
local_edges = copy.deepcopy(graph.edges)
for node in local_edges:
for neighbour in local_edges[node]:
if (node, neighbour) in checked_combinations:
continue

checked_combinations.add((node, neighbour))
if i == 2:
yield (node.name, neighbour.name)
yield (node, neighbour)
continue

other_neighbours = set(graph.edges[node])
other_neighbours.remove(neighbour)
if neighbour in other_neighbours:
other_neighbours.remove(neighbour)
else:
continue
if len(other_neighbours) + 2 < i:
continue

for k in itertools.combinations(other_neighbours, i):
yield [node.name, neighbour.name] + [ks.name for ks in k]
combinations = itertools.combinations(other_neighbours, i)
if self.shuffle_combinations:
combinations = list(combinations)
import random

random.shuffle(combinations)

if self.chunked:
chunk = []
for k in combinations:
chunk.append([node, neighbour] + [ks for ks in k])
yield chunk
else:
for k in combinations:
yield [node, neighbour] + [ks for ks in k]
Loading