diff --git a/.gitignore b/.gitignore
index a5c92fa..65091b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,4 @@ trace*
work/
test.ipynb
flop.png
+*.h5ad
\ No newline at end of file
diff --git a/docs/src/_static/nc_corneto.svg b/docs/src/_static/nc_corneto.svg
new file mode 100644
index 0000000..e3abb7d
--- /dev/null
+++ b/docs/src/_static/nc_corneto.svg
@@ -0,0 +1,2099 @@
+
+
+
+
diff --git a/docs/src/api.rst b/docs/src/api.rst
index c4637ff..78f9723 100644
--- a/docs/src/api.rst
+++ b/docs/src/api.rst
@@ -209,6 +209,20 @@ NCI60
data.omics.nci60_datatypes
data.omics.nci60_table
+.. _api-phosphoegf:
+
+Phospho-EGF meta-analysis
+~~~~~
+.. module::networkcommons.data.omics
+.. currentmodule:: networkcommons
+
+.. autosummary::
+ :toctree: api
+ :recursive:
+
+ data.omics.phospho_egf_datatypes
+ data.omics.phospho_egf_tables
+
.. _api-eval:
Evaluation and description
diff --git a/docs/src/contents.rst b/docs/src/contents.rst
index bfef1ff..b95ddca 100644
--- a/docs/src/contents.rst
+++ b/docs/src/contents.rst
@@ -43,4 +43,5 @@ NetworkCommons: Table of Contents
:maxdepth: 2
:caption: Additional resources
- vignettes/A_moon
\ No newline at end of file
+ vignettes/A_moon
+ vignettes/B_pertpy
\ No newline at end of file
diff --git a/docs/src/datasets.rst b/docs/src/datasets.rst
index 24da76e..43ab294 100644
--- a/docs/src/datasets.rst
+++ b/docs/src/datasets.rst
@@ -92,6 +92,25 @@ NCI60
.. _details-pk:
+
+Phosphoproteomics in response to EGF
+-----
+
+**Alias:** PhosphoEGF
+
+**Description:** A meta-analysis of phosphoproteomics data in response to EGF stimulation
+
+**Publication Link:** `Garrido-Rodriguez et al. Evaluating signaling pathway inference from kinase-substrate interactions and phosphoproteomics data. bioRxiv (2024). `_
+
+**Data location:** `Supplementary Data files of the manuscript `_
+
+**Detailed Description:** This dataset the results of a meta-analysis of phosphoproteomics data in response to EGF stimulation across different labs and stimulation times. The data is available at two different levels. First, the phosphosite differential abundance is provided for every combination of study and treatment time. In the table, 'This study' refers to the data generated in the manuscript. Second, we offer access to the kinase-level activities inerred using decoupleR and the different kinase-substrate networks described in the paper. Briefly, four different networks were employed: A first one based on literature (literature), one based on kinase-substrate interaction prediction via protein language models (phosformer), one based on positionl peptide array screening (kinlibrary) and a combination of all of them (combined).
+
+**Functions:** See API documentation for :ref:`Phospho-EGF meta-analysis`.
+
+.. _details-pk:
+
+
---------------
Prior Knowledge
---------------
diff --git a/docs/src/methods.rst b/docs/src/methods.rst
index 2b55094..55409ec 100644
--- a/docs/src/methods.rst
+++ b/docs/src/methods.rst
@@ -172,6 +172,10 @@ CORNETO - CARNIVAL
CORNETO (Constraint-based Optimization for the Reconstruction of NETworks from Omics) is a unified network inference method which combines a wide range of network methods including CARNIVAL which is currently implemented in NetworkCommons. CARNIVAL (CAusal Reasoning for Network identification using Integer VALue programming) connects a set of weighted target and source nodes using integer linear programming (ILP) and predicts the sign for the intermediate nodes (https://doi.org/10.1038/s41540-019-0118-z). Thereby, it optimizes a cost function that penalizes the inclusion of edges as well as the removal of target and source nodes. Additionally, it considers a set of constraints that among other things do not allow sign inconsistency.
+.. raw:: html
+
+
+
**Input:** Set of weighted target and source nodes, network graph
**Node weights:** w(v) ∈ ℝ
diff --git a/docs/src/vignettes/B_pertpy.ipynb b/docs/src/vignettes/B_pertpy.ipynb
new file mode 100644
index 0000000..60224f8
--- /dev/null
+++ b/docs/src/vignettes/B_pertpy.ipynb
@@ -0,0 +1,1362 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Vignette B: Pertpy bridging to NetworkCommons"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this vignette, we showcase how the resources from pertpy can be used with NetworkCommons. Here, we will use part of the vignette [Use-case: Deconvoluting drug responses in cancer cell lines](https://pertpy.readthedocs.io/en/latest/tutorials/notebooks/mcfarland_use_case.html), available in the [pertpy documentation](https://pertpy.readthedocs.org). This dataset contains single-cell RNA-seq perturbational profiles from 172 cancer cell lines treated with 13 drugs. Due to computational power constrains, we will only showcase this with one cell line and one dataset. However, this can be expanded "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pertpy as pt\n",
+ "import scanpy as sc\n",
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import networkcommons as nc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1. Processing with pertpy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This section was taken from the pertpy documentation, [Use-case: Deconvoluting drug responses in cancer cell lines](https://pertpy.readthedocs.io/en/latest/tutorials/notebooks/mcfarland_use_case.html). Please refer to this for further details."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "adata = pt.dt.mcfarland_2020()\n",
+ "adata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "adata.write_h5ad(\n",
+ " \"adata.h5ad\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "adata = sc.read_h5ad(\"adata.h5ad\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Navitoclax',\n",
+ " 'BRD3379',\n",
+ " 'AZD5591',\n",
+ " 'Taselisib',\n",
+ " 'Everolimus',\n",
+ " 'Idasanutlin',\n",
+ " 'Bortezomib',\n",
+ " 'sgLACZ',\n",
+ " 'sgGPX4-1',\n",
+ " 'control',\n",
+ " 'Trametinib',\n",
+ " 'sgOR2J2',\n",
+ " 'Afatinib',\n",
+ " 'Dabrafenib',\n",
+ " 'sgGPX4-2',\n",
+ " 'Gemcitabine',\n",
+ " 'JQ1',\n",
+ " 'Prexasertib']"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "adata.obs[\"perturbation\"].unique().tolist()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sc.pp.filter_genes(adata, min_cells=30)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "adata.layers[\"raw_counts\"] = adata.X.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Metadata annotation\n",
+ "cl_metadata = pt.md.CellLine()\n",
+ "cl_metadata.annotate(\n",
+ " adata,\n",
+ " query_id=\"DepMap_ID\",\n",
+ " reference_id=\"ModelID\",\n",
+ " fetch=[\"CellLineName\", \"Age\", \"OncotreePrimaryDisease\", \"SangerModelID\", \"OncotreeLineage\"],\n",
+ ")\n",
+ "\n",
+ "moa_metadata = pt.md.Moa()\n",
+ "moa_metadata.annotate(\n",
+ " adata,\n",
+ " query_id=\"perturbation\",\n",
+ ")\n",
+ "\n",
+ "# Add control annotations\n",
+ "adata.obs[\"moa\"] = [\"Control\" if pert == \"control\" else moa for moa, pert in zip(adata.obs[\"moa\"], adata.obs[\"perturbation\"])]\n",
+ "adata.obs[\"target\"] = [\"Control\" if pert == \"control\" else target for target, pert in zip(adata.obs[\"target\"], adata.obs[\"perturbation\"])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sc.pl.umap(adata, color=[\"moa\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cl_metadata.annotate_from_gdsc(\n",
+ " adata,\n",
+ " query_id=\"SangerModelID\",\n",
+ " reference_id=\"sanger_model_id\",\n",
+ " query_perturbation='perturbation',\n",
+ " gdsc_dataset=\"gdsc_1\",\n",
+ ")\n",
+ "adata.obs[\"ln_ic50_GDSC1\"] = adata.obs[\"ln_ic50\"].copy()\n",
+ "\n",
+ "cl_metadata.annotate_from_gdsc(\n",
+ " adata,\n",
+ " query_id=\"SangerModelID\",\n",
+ " reference_id=\"sanger_model_id\",\n",
+ " query_perturbation='perturbation',\n",
+ " gdsc_dataset=\"gdsc_2\",\n",
+ ")\n",
+ "adata.obs[\"ln_ic50_GDSC2\"] = adata.obs[\"ln_ic50\"].copy()\n",
+ "\n",
+ "del adata.obs[\"ln_ic50\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "adata_dabrafenib = adata[adata.obs[\"perturbation\"].isin([\"control\", \"Dabrafenib\"])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "adata_dabrafenib = sc.read_h5ad(\"adata_dabrafenib.h5ad\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "subset_cells = np.random.choice(adata_dabrafenib.obs[\"SangerModelID\"].unique().tolist(), size=10, replace=False)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['SIDM00759', 'SIDM00582', 'SIDM00963', 'SIDM01150', 'SIDM00143',\n",
+ " 'SIDM00756', 'SIDM01060', 'SIDM00139', 'SIDM01167', 'SIDM01026'],\n",
+ " dtype='\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RP11-34P13.7 | \n",
+ " AL627309.1 | \n",
+ " AP006222.2 | \n",
+ " RP4-669L17.10 | \n",
+ " RP11-206L10.3 | \n",
+ " RP11-206L10.2 | \n",
+ " RP11-206L10.9 | \n",
+ " FAM87B | \n",
+ " LINC00115 | \n",
+ " FAM41C | \n",
+ " ... | \n",
+ " MT-ND6 | \n",
+ " MT-CYB | \n",
+ " AC145212.1 | \n",
+ " MGC39584 | \n",
+ " AC011043.1 | \n",
+ " AL592183.1 | \n",
+ " AC011841.1 | \n",
+ " AL354822.1 | \n",
+ " PNRC2-1 | \n",
+ " SRSF10-1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " SIDM01150 | \n",
+ " -0.029041 | \n",
+ " -0.154688 | \n",
+ " 0.007570 | \n",
+ " 0.135863 | \n",
+ " 0.080412 | \n",
+ " 0.347878 | \n",
+ " 0.346763 | \n",
+ " 0.000000 | \n",
+ " -0.148709 | \n",
+ " -0.343955 | \n",
+ " ... | \n",
+ " 0.366750 | \n",
+ " -0.100324 | \n",
+ " 0.011169 | \n",
+ " 0.132036 | \n",
+ " -0.025371 | \n",
+ " 0.145605 | \n",
+ " 0.013151 | \n",
+ " 0.025349 | \n",
+ " -0.126633 | \n",
+ " -0.186471 | \n",
+ "
\n",
+ " \n",
+ " SIDM00143 | \n",
+ " 0.000000 | \n",
+ " 0.294966 | \n",
+ " 0.046804 | \n",
+ " -0.218813 | \n",
+ " 0.497196 | \n",
+ " 0.000000 | \n",
+ " -0.218851 | \n",
+ " 0.000000 | \n",
+ " 0.439098 | \n",
+ " -0.302195 | \n",
+ " ... | \n",
+ " 0.400622 | \n",
+ " 0.121451 | \n",
+ " -0.291270 | \n",
+ " 0.000000 | \n",
+ " 0.483729 | \n",
+ " 0.086145 | \n",
+ " 0.328663 | \n",
+ " 0.168367 | \n",
+ " -0.006578 | \n",
+ " 0.451708 | \n",
+ "
\n",
+ " \n",
+ " SIDM01060 | \n",
+ " 0.043288 | \n",
+ " 0.000000 | \n",
+ " -0.162589 | \n",
+ " 0.065901 | \n",
+ " 0.027507 | \n",
+ " -0.033283 | \n",
+ " 0.000590 | \n",
+ " 0.000000 | \n",
+ " -0.502486 | \n",
+ " -0.351066 | \n",
+ " ... | \n",
+ " 0.224724 | \n",
+ " 0.005616 | \n",
+ " -0.017290 | \n",
+ " 0.000000 | \n",
+ " -0.198974 | \n",
+ " -0.399949 | \n",
+ " 0.000000 | \n",
+ " 0.131691 | \n",
+ " 0.000000 | \n",
+ " -0.250337 | \n",
+ "
\n",
+ " \n",
+ " SIDM01167 | \n",
+ " 0.000000 | \n",
+ " 0.158182 | \n",
+ " 0.484159 | \n",
+ " 0.047778 | \n",
+ " 0.047758 | \n",
+ " 0.000000 | \n",
+ " -0.186445 | \n",
+ " -0.102564 | \n",
+ " 0.546340 | \n",
+ " 0.195960 | \n",
+ " ... | \n",
+ " 0.089615 | \n",
+ " 0.063865 | \n",
+ " 0.063088 | \n",
+ " 0.000000 | \n",
+ " -0.326881 | \n",
+ " 0.000977 | \n",
+ " 0.000000 | \n",
+ " 0.114601 | \n",
+ " 0.000000 | \n",
+ " -0.475517 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "4 rows × 21805 columns
\n",
+ ""
+ ],
+ "text/plain": [
+ " RP11-34P13.7 AL627309.1 AP006222.2 RP4-669L17.10 RP11-206L10.3 \\\n",
+ "SIDM01150 -0.029041 -0.154688 0.007570 0.135863 0.080412 \n",
+ "SIDM00143 0.000000 0.294966 0.046804 -0.218813 0.497196 \n",
+ "SIDM01060 0.043288 0.000000 -0.162589 0.065901 0.027507 \n",
+ "SIDM01167 0.000000 0.158182 0.484159 0.047778 0.047758 \n",
+ "\n",
+ " RP11-206L10.2 RP11-206L10.9 FAM87B LINC00115 FAM41C ... \\\n",
+ "SIDM01150 0.347878 0.346763 0.000000 -0.148709 -0.343955 ... \n",
+ "SIDM00143 0.000000 -0.218851 0.000000 0.439098 -0.302195 ... \n",
+ "SIDM01060 -0.033283 0.000590 0.000000 -0.502486 -0.351066 ... \n",
+ "SIDM01167 0.000000 -0.186445 -0.102564 0.546340 0.195960 ... \n",
+ "\n",
+ " MT-ND6 MT-CYB AC145212.1 MGC39584 AC011043.1 AL592183.1 \\\n",
+ "SIDM01150 0.366750 -0.100324 0.011169 0.132036 -0.025371 0.145605 \n",
+ "SIDM00143 0.400622 0.121451 -0.291270 0.000000 0.483729 0.086145 \n",
+ "SIDM01060 0.224724 0.005616 -0.017290 0.000000 -0.198974 -0.399949 \n",
+ "SIDM01167 0.089615 0.063865 0.063088 0.000000 -0.326881 0.000977 \n",
+ "\n",
+ " AC011841.1 AL354822.1 PNRC2-1 SRSF10-1 \n",
+ "SIDM01150 0.013151 0.025349 -0.126633 -0.186471 \n",
+ "SIDM00143 0.328663 0.168367 -0.006578 0.451708 \n",
+ "SIDM01060 0.000000 0.131691 0.000000 -0.250337 \n",
+ "SIDM01167 0.000000 0.114601 0.000000 -0.475517 \n",
+ "\n",
+ "[4 rows x 21805 columns]"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "logfc_df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. TF activity estimation with decoupler-py"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have the gene changes in response to the perturbation, we can perform TF activity estimation with decoupler and CollecTRI. This scores will be the input for the network contextualization methods from NetworkCommons. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import decoupler as dc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:Downloading data from `https://omnipathdb.org/queries/enzsub?format=json`\n",
+ "INFO:root:Downloading data from `https://omnipathdb.org/queries/interactions?format=json`\n",
+ "INFO:root:Downloading data from `https://omnipathdb.org/queries/complexes?format=json`\n",
+ "INFO:root:Downloading data from `https://omnipathdb.org/queries/annotations?format=json`\n",
+ "INFO:root:Downloading data from `https://omnipathdb.org/queries/intercell?format=json`\n",
+ "INFO:root:Downloading data from `https://omnipathdb.org/about?format=text`\n"
+ ]
+ }
+ ],
+ "source": [
+ "net = dc.get_collectri()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tf_acts, pvals = dc.run_ulm(logfc_df, net)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ABL1 | \n",
+ " AEBP1 | \n",
+ " AHR | \n",
+ " AHRR | \n",
+ " AIP | \n",
+ " AIRE | \n",
+ " AP1 | \n",
+ " APEX1 | \n",
+ " AR | \n",
+ " ARID1A | \n",
+ " ... | \n",
+ " ZNF382 | \n",
+ " ZNF384 | \n",
+ " ZNF395 | \n",
+ " ZNF410 | \n",
+ " ZNF436 | \n",
+ " ZNF699 | \n",
+ " ZNF76 | \n",
+ " ZNF804A | \n",
+ " ZNF91 | \n",
+ " ZXDC | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " SIDM01150 | \n",
+ " 0.080080 | \n",
+ " -1.631165 | \n",
+ " 1.822143 | \n",
+ " -0.486594 | \n",
+ " -0.783931 | \n",
+ " -0.844352 | \n",
+ " 1.723601 | \n",
+ " -1.519310 | \n",
+ " -0.344363 | \n",
+ " 1.623128 | \n",
+ " ... | \n",
+ " -4.238011 | \n",
+ " 1.965072 | \n",
+ " -2.004019 | \n",
+ " 0.069187 | \n",
+ " 2.652415 | \n",
+ " -0.182054 | \n",
+ " -1.793975 | \n",
+ " -0.143599 | \n",
+ " 1.938493 | \n",
+ " -0.277062 | \n",
+ "
\n",
+ " \n",
+ " SIDM00143 | \n",
+ " -1.332259 | \n",
+ " 1.592115 | \n",
+ " 1.183483 | \n",
+ " 0.799785 | \n",
+ " 1.025860 | \n",
+ " 0.468413 | \n",
+ " 4.497127 | \n",
+ " -1.430149 | \n",
+ " 2.600641 | \n",
+ " 2.156102 | \n",
+ " ... | \n",
+ " -0.789468 | \n",
+ " 3.996778 | \n",
+ " -1.028295 | \n",
+ " -0.517214 | \n",
+ " 0.598231 | \n",
+ " 1.087119 | \n",
+ " 0.778563 | \n",
+ " -0.599900 | \n",
+ " 1.412452 | \n",
+ " -2.276532 | \n",
+ "
\n",
+ " \n",
+ " SIDM01060 | \n",
+ " -0.652209 | \n",
+ " -2.475549 | \n",
+ " -1.580986 | \n",
+ " 1.089401 | \n",
+ " -1.944108 | \n",
+ " 0.399068 | \n",
+ " -5.066813 | \n",
+ " -2.765601 | \n",
+ " -0.934723 | \n",
+ " -0.981179 | \n",
+ " ... | \n",
+ " 1.715413 | \n",
+ " -0.158227 | \n",
+ " 1.480932 | \n",
+ " 0.409427 | \n",
+ " -0.044302 | \n",
+ " -0.413981 | \n",
+ " -0.048804 | \n",
+ " -2.692391 | \n",
+ " 1.406787 | \n",
+ " -0.253700 | \n",
+ "
\n",
+ " \n",
+ " SIDM01167 | \n",
+ " -1.334910 | \n",
+ " 0.017429 | \n",
+ " -2.367893 | \n",
+ " 0.813763 | \n",
+ " 2.198922 | \n",
+ " -0.660442 | \n",
+ " -2.465387 | \n",
+ " -1.764444 | \n",
+ " -2.983476 | \n",
+ " 1.955569 | \n",
+ " ... | \n",
+ " 0.219701 | \n",
+ " -0.641184 | \n",
+ " -0.172258 | \n",
+ " -0.653510 | \n",
+ " 1.087562 | \n",
+ " 2.267288 | \n",
+ " -1.447545 | \n",
+ " -0.400557 | \n",
+ " 1.555596 | \n",
+ " -0.400194 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4 rows × 735 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ABL1 AEBP1 AHR AHRR AIP AIRE \\\n",
+ "SIDM01150 0.080080 -1.631165 1.822143 -0.486594 -0.783931 -0.844352 \n",
+ "SIDM00143 -1.332259 1.592115 1.183483 0.799785 1.025860 0.468413 \n",
+ "SIDM01060 -0.652209 -2.475549 -1.580986 1.089401 -1.944108 0.399068 \n",
+ "SIDM01167 -1.334910 0.017429 -2.367893 0.813763 2.198922 -0.660442 \n",
+ "\n",
+ " AP1 APEX1 AR ARID1A ... ZNF382 ZNF384 \\\n",
+ "SIDM01150 1.723601 -1.519310 -0.344363 1.623128 ... -4.238011 1.965072 \n",
+ "SIDM00143 4.497127 -1.430149 2.600641 2.156102 ... -0.789468 3.996778 \n",
+ "SIDM01060 -5.066813 -2.765601 -0.934723 -0.981179 ... 1.715413 -0.158227 \n",
+ "SIDM01167 -2.465387 -1.764444 -2.983476 1.955569 ... 0.219701 -0.641184 \n",
+ "\n",
+ " ZNF395 ZNF410 ZNF436 ZNF699 ZNF76 ZNF804A \\\n",
+ "SIDM01150 -2.004019 0.069187 2.652415 -0.182054 -1.793975 -0.143599 \n",
+ "SIDM00143 -1.028295 -0.517214 0.598231 1.087119 0.778563 -0.599900 \n",
+ "SIDM01060 1.480932 0.409427 -0.044302 -0.413981 -0.048804 -2.692391 \n",
+ "SIDM01167 -0.172258 -0.653510 1.087562 2.267288 -1.447545 -0.400557 \n",
+ "\n",
+ " ZNF91 ZXDC \n",
+ "SIDM01150 1.938493 -0.277062 \n",
+ "SIDM00143 1.412452 -2.276532 \n",
+ "SIDM01060 1.406787 -0.253700 \n",
+ "SIDM01167 1.555596 -0.400194 \n",
+ "\n",
+ "[4 rows x 735 columns]"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tf_acts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "measurements = tf_acts.loc[\"SIDM01060\"].sort_values(ascending=False, key=abs)[0:25].to_dict()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 3. Network inference with NetworkCommons"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have scores for upstream and downstream layers, we can perform network inference with NetworkCommons. For the sake of simplicity and just for demonstration purposes, we will only use the shortest path approach. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "network = nc.data.network.get_omnipath()\n",
+ "graph = nc.utils.network_from_df(network)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "source = {'BRAF': -1}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shortest_path_network, shortest_path_list = nc.methods.run_shortest_paths(graph, source, measurements)\n",
+ "shortest_sc_network, shortest_sc_list = nc.methods.run_sign_consistency(shortest_path_network, shortest_path_list, source, measurements)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ">"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "visualizer = nc.visual.NetworkXVisualizer(shortest_sc_network)\n",
+ "visualizer.visualize_network(source, measurements, network_type='sign_consistent')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "networkcommons-DX9y6Uxu-py3.10",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/networkcommons/data/datasets.yaml b/networkcommons/data/datasets.yaml
index a78b04f..9423da0 100644
--- a/networkcommons/data/datasets.yaml
+++ b/networkcommons/data/datasets.yaml
@@ -47,3 +47,9 @@ omics:
It includes three files: TF activities from transcriptomics data,
metabolite abundances and gene reads.
path: NCI60/{cell_line}/{cell_line}__{data_type}.tsv
+ phosphoegf:
+ name: PhosphoEGF
+ description: Phosphoproteomics meta-analysis of the response to EGF stimulus
+ publication_link: https://www.biorxiv.org/content/10.1101/2024.10.21.619348v1
+ detailed_description: >-
+ This dataset contains phosphoproteomics data after EGF stimulus gathered and preprocessed from multiple studies.
diff --git a/networkcommons/data/omics/__init__.py b/networkcommons/data/omics/__init__.py
index 5c48ca5..93a89db 100644
--- a/networkcommons/data/omics/__init__.py
+++ b/networkcommons/data/omics/__init__.py
@@ -24,3 +24,4 @@
from ._scperturb import *
from ._nci60 import *
from ._cptac import *
+from ._phosphoegf import *
diff --git a/networkcommons/data/omics/_phosphoegf.py b/networkcommons/data/omics/_phosphoegf.py
new file mode 100644
index 0000000..5da2d81
--- /dev/null
+++ b/networkcommons/data/omics/_phosphoegf.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+#
+# This file is part of the `networkcommons` Python module
+#
+# Copyright 2024
+# Heidelberg University Hospital
+#
+# File author(s): Saez Lab (omnipathdb@gmail.com)
+#
+# Distributed under the GPLv3 license
+# See the file `LICENSE` or read a copy at
+# https://www.gnu.org/licenses/gpl-3.0.txt
+#
+
+"""
+Meta-analysis of phosphoproteomics response to EGF stimulus.
+"""
+
+import pandas as pd
+import warnings
+
+def phospho_egf_datatypes() -> pd.DataFrame:
+ """
+ Table describing the available data types in the Phospho EGF dataset.
+
+ Returns:
+ DataFrame with all data types.
+ """
+
+ return pd.DataFrame({
+ 'type': ['phosphosite', 'kinase'],
+ 'description': ['Differential phosphoproteomics at the site level for all studies in the meta-analysis',
+ 'Kinase activities obtained using each of the kinase-substrate prior knowledge resources'],
+ })
+
+
+def phospho_egf_tables(type='diffabundance'):
+ """
+ A table with the corresponding data type for the phospho EGF dataset.
+
+ Args:
+ type:
+ Either 'diffabundance' or 'kinase_scores'.
+
+ Returns:
+ A DataFrame with the corresponding data.
+ """
+
+ if type == 'phosphosite':
+ out_table = pd.read_csv('https://www.biorxiv.org/content/biorxiv/early/2024/10/22/2024.10.21.619348/DC3/embed/media-3.gz', compression='gzip', low_memory=False)
+ elif type == 'kinase':
+ out_table = pd.read_csv('https://www.biorxiv.org/content/biorxiv/early/2024/10/22/2024.10.21.619348/DC4/embed/media-4.gz', compression='gzip', low_memory=False)
+ else:
+ warnings.warn(f'Unknown data type "{type}"')
+ return None
+ return out_table
+
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 853efae..dbb7264 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -913,26 +913,26 @@ reference = "pypi-public"
[[package]]
name = "corneto"
-version = "1.0.0.dev0"
+version = "1.0.0a0"
description = "CORNETO: A Unified Framework for Omics-Driven Network Inference"
optional = false
-python-versions = "^3.9"
-files = []
-develop = false
+python-versions = ">=3.9"
+files = [
+ {file = "corneto-1.0.0a0-py3-none-any.whl", hash = "sha256:30375a7c5d121f488425cf71ef842ca13137e568ccd38a49569c2970cf5f19bf"},
+ {file = "corneto-1.0.0a0.tar.gz", hash = "sha256:966d50762f92047ea4e5d1a136607f20ae9ec1d585530a24ad5c95e700b24180"},
+]
[package.dependencies]
-numpy = ">=1.15"
+numpy = ">=1.15,<2.0.0"
[package.extras]
highs = ["cvxpy-base (>=1.5.0,<2.0.0)"]
-os = ["cvxpy-base (>=1.5.0,<2.0.0)", "scipy (>=1.11.0)"]
-picos = ["picos (>=2.4.1,<3.0.0)"]
+os = ["cvxpy-base (>=1.5.0,<2.0.0)", "scipy (>=1.11.0,<2.0.0)"]
[package.source]
-type = "git"
-url = "https://github.com/deeenes/corneto.git"
-reference = "main"
-resolved_reference = "cfa311a6d1463f94d94b5e6e5735b5267e69a8a4"
+type = "legacy"
+url = "https://pypi.org/simple"
+reference = "pypi-public"
[[package]]
name = "coverage"
@@ -7351,4 +7351,4 @@ igraph = ["igraph"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
-content-hash = "d6baeb6a8129ff6502df82db848bd47d25346bc2c278deb1ed34f3a809eb6765"
+content-hash = "a962bad01f6adb39beb22a297ea79184ea1e32f762d436cc3377ef3a941faff9"
diff --git a/pyproject.toml b/pyproject.toml
index cd112fa..4bf29a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,7 +64,7 @@ omnipath = "^1.0.8"
pygraphviz = "1.11"
picos = { version = "^2.4.17", optional = true }
cvxpy = "^1.5.1"
-corneto = "1.0.0.dev0"
+corneto = "1.0.0a0"
seaborn = "^0.13.2"
[tool.poetry.dev-dependencies]