Skip to content

Commit

Permalink
continue
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jun 7, 2024
1 parent 1024e79 commit c4543e9
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def main() -> None:

imzmls = [
"menzha_20231208_s607923_tonsil-repro-sample-01.imzML",
"menzha_20231208_s607923_tonsil-repro-sample-02.imzML",
"menzha_20231208_s607930_64074-b20-30928-a.imzML",
"menzha_20240212_tonsil_06-50.imzML",
]
Expand All @@ -35,7 +36,7 @@ def main() -> None:
for imzml in imzmls:
requested_files += prepare_tasks(data_raw_dir / imzml, work_dir=work_dir)

SnakemakeInvoke().invoke(work_dir=work_dir, result_files=requested_files, n_cores=4)
SnakemakeInvoke(continue_on_error=True).invoke(work_dir=work_dir, result_files=requested_files, n_cores=4)


def get_all_output_files(folders: list[Path]) -> list[Path]:
Expand Down
18 changes: 9 additions & 9 deletions src/depiction_targeted_preproc/workflow/proc/cluster_hdbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import typer
import xarray
from hdbscan.flat import (HDBSCAN_flat)
from loguru import logger
from sklearn.preprocessing import StandardScaler
from typer import Option

Expand All @@ -28,15 +29,14 @@ def cluster_dbscan(input_netcdf_path: Annotated[Path, Option()], output_netcdf_p
# dbscan = (eps=0.3, min_samples=10)
data_scaled = scaler.transform(reduced_data.values)

# clusterer = hdbscan.HDBSCAN()
# clusterer.fit(data_scaled)
# clusters = clusterer.labels_

clusterer = HDBSCAN_flat(data_scaled,
cluster_selection_method='eom',
n_clusters=10,
min_cluster_size=math.ceil(0.02*data_scaled.shape[0]))
clusters = clusterer.labels_
try:
clusterer = HDBSCAN_flat(data_scaled,
n_clusters=10)
#min_cluster_size=math.ceil(0.02 * data_scaled.shape[0]))
clusters = clusterer.labels_
except IndexError:
logger.error("No clusters found")
clusters = np.zeros(data_scaled.shape[0])

cluster_data = xarray.DataArray(clusters, dims=("i",), coords={"i": image.data_flat.coords["i"]}).expand_dims("c")
cluster_data.coords["c"] = ["cluster"]
Expand Down
50 changes: 32 additions & 18 deletions src/depiction_targeted_preproc/workflow/proc/cluster_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
from depiction_targeted_preproc.workflow.proc.__cluster_stats import compute_CHAOS, compute_PAS


# from fastdtw import fastdtw


def compute_silhouette(cluster_data, cluster_coords):
# higher is better
return silhouette_score(cluster_coords, cluster_data)
try:
return silhouette_score(cluster_coords, cluster_data)
except ValueError:
return np.nan


def compute_davies_bouldin(cluster_data, cluster_coords):
Expand All @@ -27,20 +27,13 @@ def compute_davies_bouldin(cluster_data, cluster_coords):


def compute_calinski_harabasz(X, labels):
return calinski_harabasz_score(X, labels)
try:
return calinski_harabasz_score(X, labels)
except ValueError:
return np.nan


def cluster_stats(input_netcdf_path: Annotated[Path, Option()], output_csv_path: Annotated[Path, Option()]) -> None:
cluster_image = MultiChannelImage.read_hdf5(input_netcdf_path)

cluster_data = cluster_image.data_flat.values.ravel()
cluster_coords = np.hstack(
(
cluster_image.data_flat.coords["x"].values.reshape(-1, 1),
cluster_image.data_flat.coords["y"].values.reshape(-1, 1),
)
)

def compute_metrics(cluster_data: np.ndarray, cluster_coords: np.ndarray) -> dict[str, float]:
chaos = compute_CHAOS(cluster_data, cluster_coords)
logger.info(f"Computed CHAOS: {chaos}")

Expand All @@ -56,9 +49,30 @@ def cluster_stats(input_netcdf_path: Annotated[Path, Option()], output_csv_path:
calinski_harabasz = compute_calinski_harabasz(cluster_coords, cluster_data)
logger.info(f"Computed Calinski-Harabasz: {calinski_harabasz}")

return {
"CHAOS": chaos,
"PAS": pas,
"Silhouette": silhouette,
"Davies-Bouldin": davies_bouldin,
"Calinski-Harabasz": calinski_harabasz
}


def cluster_stats(input_netcdf_path: Annotated[Path, Option()], output_csv_path: Annotated[Path, Option()]) -> None:
cluster_image = MultiChannelImage.read_hdf5(input_netcdf_path)

cluster_data = cluster_image.data_flat.values.ravel()
cluster_coords = np.hstack(
(
cluster_image.data_flat.coords["x"].values.reshape(-1, 1),
cluster_image.data_flat.coords["y"].values.reshape(-1, 1),
)
)

metrics = compute_metrics(cluster_data, cluster_coords)

metrics_df = pl.DataFrame(
{"metric": ["CHAOS", "PAS", "Silhouette", "Davies-Bouldin", "Calinski-Harabasz"],
"value": [chaos, pas, silhouette, davies_bouldin, calinski_harabasz]}
{"metric": list(metrics.keys()), "value": list(metrics.values())}
)
metrics_df.write_csv(output_csv_path)

Expand Down
8 changes: 7 additions & 1 deletion src/depiction_targeted_preproc/workflow/snakemake_invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
@dataclass
class SnakemakeInvoke:
use_subprocess: bool = True
continue_on_error: bool = False

def invoke(self, work_dir: Path, result_files: list[Path], n_cores: int = 1) -> None:
if self.use_subprocess:
Expand All @@ -31,6 +32,7 @@ def _invoke_direct(self, work_dir: Path, result_files: list[Path], n_cores: int)
from snakemake.settings import StorageSettings
from snakemake.settings import ResourceSettings
from snakemake.settings import DAGSettings
from snakemake.settings import ExecutionSettings

with SnakemakeApi(
OutputSettings(
Expand All @@ -47,12 +49,15 @@ def _invoke_direct(self, work_dir: Path, result_files: list[Path], n_cores: int)
dag_api = workflow_api.dag(
dag_settings=DAGSettings(targets=[str(p) for p in result_files], force_incomplete=True)
)
dag_api.execute_workflow()
dag_api.execute_workflow(execution_settings=ExecutionSettings(keep_going=self.continue_on_error))

def _invoke_subprocess(self, work_dir: Path, result_files: list[Path], n_cores: int) -> None:
snakemake_bin = shutil.which("snakemake")
if snakemake_bin is None:
raise RuntimeError(f"snakemake not found, check PATH: {os.environ['PATH']}")
extra_args = []
if self.continue_on_error:
extra_args.append("--keep-going")
command = [
snakemake_bin,
"-d",
Expand All @@ -61,6 +66,7 @@ def _invoke_subprocess(self, work_dir: Path, result_files: list[Path], n_cores:
str(n_cores),
"--snakefile",
str(self.snakefile_path),
*extra_args,
*[str(file.relative_to(work_dir)) for file in result_files],
]
logger.info("Executing {command}", command=command)
Expand Down

0 comments on commit c4543e9

Please sign in to comment.