From aa750a6400d9792779aae172219230f9a048bfac Mon Sep 17 00:00:00 2001 From: Martin Garrido Date: Wed, 13 Nov 2024 15:38:05 +0100 Subject: [PATCH 1/3] add phospho-egf meta analysis data --- docs/src/datasets.rst | 19 +++++++ docs/src/vignettes/B_phospho_egf_data.ipynb | 34 ++++++++++++ networkcommons/data/datasets.yaml | 6 +++ networkcommons/data/omics/__init__.py | 1 + networkcommons/data/omics/_phosphoegf.py | 58 +++++++++++++++++++++ 5 files changed, 118 insertions(+) create mode 100644 docs/src/vignettes/B_phospho_egf_data.ipynb create mode 100644 networkcommons/data/omics/_phosphoegf.py diff --git a/docs/src/datasets.rst b/docs/src/datasets.rst index 24da76e..43ab294 100644 --- a/docs/src/datasets.rst +++ b/docs/src/datasets.rst @@ -92,6 +92,25 @@ NCI60 .. _details-pk: + +Phosphoproteomics in response to EGF +----- + +**Alias:** PhosphoEGF + +**Description:** A meta-analysis of phosphoproteomics data in response to EGF stimulation + +**Publication Link:** `Garrido-Rodriguez et al. Evaluating signaling pathway inference from kinase-substrate interactions and phosphoproteomics data. bioRxiv (2024). `_ + +**Data location:** `Supplementary Data files of the manuscript `_ + +**Detailed Description:** This dataset the results of a meta-analysis of phosphoproteomics data in response to EGF stimulation across different labs and stimulation times. The data is available at two different levels. First, the phosphosite differential abundance is provided for every combination of study and treatment time. In the table, 'This study' refers to the data generated in the manuscript. Second, we offer access to the kinase-level activities inerred using decoupleR and the different kinase-substrate networks described in the paper. Briefly, four different networks were employed: A first one based on literature (literature), one based on kinase-substrate interaction prediction via protein language models (phosformer), one based on positionl peptide array screening (kinlibrary) and a combination of all of them (combined). + +**Functions:** See API documentation for :ref:`Phospho-EGF meta-analysis`. + +.. _details-pk: + + --------------- Prior Knowledge --------------- diff --git a/docs/src/vignettes/B_phospho_egf_data.ipynb b/docs/src/vignettes/B_phospho_egf_data.ipynb new file mode 100644 index 0000000..34c463f --- /dev/null +++ b/docs/src/vignettes/B_phospho_egf_data.ipynb @@ -0,0 +1,34 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import networkcommons as nc" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nc_dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/networkcommons/data/datasets.yaml b/networkcommons/data/datasets.yaml index a78b04f..9423da0 100644 --- a/networkcommons/data/datasets.yaml +++ b/networkcommons/data/datasets.yaml @@ -47,3 +47,9 @@ omics: It includes three files: TF activities from transcriptomics data, metabolite abundances and gene reads. path: NCI60/{cell_line}/{cell_line}__{data_type}.tsv + phosphoegf: + name: PhosphoEGF + description: Phosphoproteomics meta-analysis of the response to EGF stimulus + publication_link: https://www.biorxiv.org/content/10.1101/2024.10.21.619348v1 + detailed_description: >- + This dataset contains phosphoproteomics data after EGF stimulus gathered and preprocessed from multiple studies. diff --git a/networkcommons/data/omics/__init__.py b/networkcommons/data/omics/__init__.py index 5c48ca5..93a89db 100644 --- a/networkcommons/data/omics/__init__.py +++ b/networkcommons/data/omics/__init__.py @@ -24,3 +24,4 @@ from ._scperturb import * from ._nci60 import * from ._cptac import * +from ._phosphoegf import * diff --git a/networkcommons/data/omics/_phosphoegf.py b/networkcommons/data/omics/_phosphoegf.py new file mode 100644 index 0000000..5da2d81 --- /dev/null +++ b/networkcommons/data/omics/_phosphoegf.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +# +# This file is part of the `networkcommons` Python module +# +# Copyright 2024 +# Heidelberg University Hospital +# +# File author(s): Saez Lab (omnipathdb@gmail.com) +# +# Distributed under the GPLv3 license +# See the file `LICENSE` or read a copy at +# https://www.gnu.org/licenses/gpl-3.0.txt +# + +""" +Meta-analysis of phosphoproteomics response to EGF stimulus. +""" + +import pandas as pd +import warnings + +def phospho_egf_datatypes() -> pd.DataFrame: + """ + Table describing the available data types in the Phospho EGF dataset. + + Returns: + DataFrame with all data types. + """ + + return pd.DataFrame({ + 'type': ['phosphosite', 'kinase'], + 'description': ['Differential phosphoproteomics at the site level for all studies in the meta-analysis', + 'Kinase activities obtained using each of the kinase-substrate prior knowledge resources'], + }) + + +def phospho_egf_tables(type='diffabundance'): + """ + A table with the corresponding data type for the phospho EGF dataset. + + Args: + type: + Either 'diffabundance' or 'kinase_scores'. + + Returns: + A DataFrame with the corresponding data. + """ + + if type == 'phosphosite': + out_table = pd.read_csv('https://www.biorxiv.org/content/biorxiv/early/2024/10/22/2024.10.21.619348/DC3/embed/media-3.gz', compression='gzip', low_memory=False) + elif type == 'kinase': + out_table = pd.read_csv('https://www.biorxiv.org/content/biorxiv/early/2024/10/22/2024.10.21.619348/DC4/embed/media-4.gz', compression='gzip', low_memory=False) + else: + warnings.warn(f'Unknown data type "{type}"') + return None + return out_table + \ No newline at end of file From 56d8a6e2af23875ac5e244f0c0f4f97122d0b050 Mon Sep 17 00:00:00 2001 From: Martin Garrido Date: Wed, 13 Nov 2024 15:40:09 +0100 Subject: [PATCH 2/3] add phosphoegf details to api.rst --- docs/src/api.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/src/api.rst b/docs/src/api.rst index 9be4608..e3e278a 100644 --- a/docs/src/api.rst +++ b/docs/src/api.rst @@ -195,6 +195,20 @@ NCI60 data.omics.nci60_datatypes data.omics.nci60_table +.. _api-phosphoegf: + +Phospho-EGF meta-analysis +~~~~~ +.. module::networkcommons.data.omics +.. currentmodule:: networkcommons + +.. autosummary:: + :toctree: api + :recursive: + + data.omics.phospho_egf_datatypes + data.omics.phospho_egf_tables + .. _api-eval: Evaluation and description From f246fdcbc64b0812bc2fb13249480ba41bbf9e16 Mon Sep 17 00:00:00 2001 From: Martin Garrido Date: Tue, 19 Nov 2024 17:52:44 +0100 Subject: [PATCH 3/3] remove zombie vignette --- docs/src/vignettes/B_phospho_egf_data.ipynb | 34 --------------------- 1 file changed, 34 deletions(-) delete mode 100644 docs/src/vignettes/B_phospho_egf_data.ipynb diff --git a/docs/src/vignettes/B_phospho_egf_data.ipynb b/docs/src/vignettes/B_phospho_egf_data.ipynb deleted file mode 100644 index 34c463f..0000000 --- a/docs/src/vignettes/B_phospho_egf_data.ipynb +++ /dev/null @@ -1,34 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import networkcommons as nc" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "nc_dev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.15" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}