diff --git a/docs/src/api.rst b/docs/src/api.rst
index 400ff0e..b4d6a22 100644
--- a/docs/src/api.rst
+++ b/docs/src/api.rst
@@ -78,6 +78,7 @@ Prior Knowledge
data.network.get_omnipath
data.network.get_lianaplus
data.network.get_phosphositeplus
+ data.network.get_cosmos_pkn
Datasets
@@ -149,6 +150,19 @@ CPTAC
data.omics.cptac_datatypes
data.omics.cptac_extend_dataframe
+NCI60
+~~~~~
+.. module::networkcommons.data.omics
+.. currentmodule:: networkcommons
+
+.. autosummary::
+ :toctree: api
+ :recursive:
+
+ data.omics.nci60_datasets
+ data.omics.nci60_datatypes
+ data.omics.nci60_table
+
Other
~~~~~~~~
diff --git a/docs/src/contents.rst b/docs/src/contents.rst
index 09e1667..4be9548 100644
--- a/docs/src/contents.rst
+++ b/docs/src/contents.rst
@@ -11,6 +11,13 @@ NetworkCommons: Table of Contents
api
+.. toctree::
+ :maxdepth: 2
+ :caption: Contribution guidelines
+
+ guidelines/guide_1_data
+
+
.. toctree::
:maxdepth: 2
:caption: Vignettes
diff --git a/docs/src/guidelines/guide_1_data.ipynb b/docs/src/guidelines/guide_1_data.ipynb
new file mode 100644
index 0000000..56b16a4
--- /dev/null
+++ b/docs/src/guidelines/guide_1_data.ipynb
@@ -0,0 +1,290 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Contribution's guideline: Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Thank you very much for considering contributing to the data collection of **NetworkCommons**! In order to make the resource as user-friendly as possible, we aim to be as transparent as possible, which means that all contributions should contain at least the following elements.\n",
+ "\n",
+ "## 1. Data information\n",
+ "* Experimental design: number of samples, number of experiments (if applicable), confounding factors\n",
+ "* Data production and processing: tools used, how the data processing was performed (if applicable).\n",
+ "* Files: number and type of files, with a small description of their contents.\n",
+ "* Link to the database from which the data was retrieved.\n",
+ "* Link to the dataset publication\n",
+ "* Path information explaining the structure of the data directories\n",
+ "This information should be appended to the existing YAML file in `networkcommons/data/datasets.yaml`\n",
+ "\n",
+ "An example of this can be found below:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ ".. code-block:: yaml\n",
+ "\n",
+ " NCI60:\n",
+ " name: NCI60\n",
+ " description: NCI-60 cell line data\n",
+ " publication_link: https://doi.org/10.1038/nrc1951\n",
+ " detailed_description: >-\n",
+ " This dataset contains data from the NCI-60 cell line panel.\n",
+ " It includes three files: TF activities from transcriptomics data,\n",
+ " metabolite abundances and gene reads.\n",
+ " path: NCI60/{cell_line}/{cell_line}__{data_type}.tsv"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This information can then be accessed via `nc.data.omics.datasets()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " description | \n",
+ " publication_link | \n",
+ " detailed_description | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " decryptm | \n",
+ " DecryptM | \n",
+ " Drug perturbation proteomics and phosphoproteomics data | \n",
+ " https://doi.org/10.1126/science.ade3925 | \n",
+ " This dataset contains the profiling of 31 cancer drugs in 13 human cancer cell line models resulted in 1.8 million dose-response curves, including 47,502 regulated phosphopeptides, 7316 ubiquitinylated peptides, and 546 regulated acetylated peptides. | \n",
+ "
\n",
+ " \n",
+ " panacea | \n",
+ " Panacea | \n",
+ " Pancancer Analysis of Chemical Entity Activity RNA-Seq data | \n",
+ " https://doi.org/10.1016/j.xcrm.2021.100492 | \n",
+ " PANACEA contains dose-response and perturbational profiles for 32 kinase inhibitors in 11 cancer cell lines, in addition to a DMSO control. Originally, this resource served as the basis for a DREAM Challenge assessing the accuracy and sensitivity of computational algorithms for de novo drug polypharmacology predictions. | \n",
+ "
\n",
+ " \n",
+ " moon | \n",
+ " MOON | \n",
+ " Database files for running MOON | \n",
+ " https://example.com/moon | \n",
+ " This dataset contains database files required for running the MOON software. | \n",
+ "
\n",
+ " \n",
+ " cosmos | \n",
+ " COSMOS | \n",
+ " Database files for running COSMOS (MetaPKN) | \n",
+ " https://example.com/cosmos | \n",
+ " This dataset includes database files for the COSMOS software (MetaPKN). | \n",
+ "
\n",
+ " \n",
+ " CPTAC | \n",
+ " CPTAC | \n",
+ " Clinical Proteomic Tumor Analysis Consortium data | \n",
+ " https://example.com/CPTAC | \n",
+ " This dataset contains data from the Clinical Proteomic Tumor Analysis Consortium. It includes various cancer types and proteomic data. | \n",
+ "
\n",
+ " \n",
+ " NCI60 | \n",
+ " NCI60 | \n",
+ " NCI-60 cell line data | \n",
+ " https://example.com/NCI60 | \n",
+ " This dataset contains data from the NCI-60 cell line panel. It includes three files: TF activities from transcriptomics data, metabolite abundances and gene reads. | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name \\\n",
+ "decryptm DecryptM \n",
+ "panacea Panacea \n",
+ "moon MOON \n",
+ "cosmos COSMOS \n",
+ "CPTAC CPTAC \n",
+ "NCI60 NCI60 \n",
+ "\n",
+ " description \\\n",
+ "decryptm Drug perturbation proteomics and phosphoproteomics data \n",
+ "panacea Pancancer Analysis of Chemical Entity Activity RNA-Seq data \n",
+ "moon Database files for running MOON \n",
+ "cosmos Database files for running COSMOS (MetaPKN) \n",
+ "CPTAC Clinical Proteomic Tumor Analysis Consortium data \n",
+ "NCI60 NCI-60 cell line data \n",
+ "\n",
+ " publication_link \\\n",
+ "decryptm https://doi.org/10.1126/science.ade3925 \n",
+ "panacea https://doi.org/10.1016/j.xcrm.2021.100492 \n",
+ "moon https://example.com/moon \n",
+ "cosmos https://example.com/cosmos \n",
+ "CPTAC https://example.com/CPTAC \n",
+ "NCI60 https://example.com/NCI60 \n",
+ "\n",
+ " detailed_description \n",
+ "decryptm This dataset contains the profiling of 31 cancer drugs in 13 human cancer cell line models resulted in 1.8 million dose-response curves, including 47,502 regulated phosphopeptides, 7316 ubiquitinylated peptides, and 546 regulated acetylated peptides. \n",
+ "panacea PANACEA contains dose-response and perturbational profiles for 32 kinase inhibitors in 11 cancer cell lines, in addition to a DMSO control. Originally, this resource served as the basis for a DREAM Challenge assessing the accuracy and sensitivity of computational algorithms for de novo drug polypharmacology predictions. \n",
+ "moon This dataset contains database files required for running the MOON software. \n",
+ "cosmos This dataset includes database files for the COSMOS software (MetaPKN). \n",
+ "CPTAC This dataset contains data from the Clinical Proteomic Tumor Analysis Consortium. It includes various cancer types and proteomic data. \n",
+ "NCI60 This dataset contains data from the NCI-60 cell line panel. It includes three files: TF activities from transcriptomics data, metabolite abundances and gene reads. "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nc.data.omics.datasets()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. Data handlers (API)\n",
+ "The data will either be deposited in the [NetworkCommons server](https://commons.omnipathdb.org/), or can be directly accessed from the original source. Regardless of this, the following functions are required\n",
+ "\n",
+ "* A function providing an overview of the subsets (if applicable). For example, check `nc.data.omics.decryptm_experiments()`. \n",
+ "* In case the data contains different files (for example, different omics layers, metadata tables, etc.), a function should retrieve this information. For example, check `nc.data.omics.nci60_datatypes()`\n",
+ "* A function that retrieves the data. For example, check `nc.data.omics.nci60_table()`. Ideally, a `pd.DataFrame`, but we are planning to expand support for `AnnData` instances.\n",
+ "\n",
+ "These new functions can be implemented in a new file, `_{dataset}`, inside the `networkcommons/data/omics/` folder.\n",
+ "\n",
+ "For example, `nc.data.omics.nci60_table()` retrieves a single `pd.DataFrame` by providing a data type and a cell line."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " WASH7P | \n",
+ " -2.109966 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " NOC2L | \n",
+ " -1.480194 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " HES4 | \n",
+ " -0.781522 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ISG15 | \n",
+ " 0.406806 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " AGRN | \n",
+ " -0.324970 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID score\n",
+ "0 WASH7P -2.109966\n",
+ "1 NOC2L -1.480194\n",
+ "2 HES4 -0.781522\n",
+ "3 ISG15 0.406806\n",
+ "4 AGRN -0.324970"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nc.data.omics.nci60_table(cell_line='A498', data_type='RNA').head()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "networkcommons-DX9y6Uxu-py3.10",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/src/vignettes/2_moon.ipynb b/docs/src/vignettes/2_moon.ipynb
index 2ed8bf7..9d489b7 100644
--- a/docs/src/vignettes/2_moon.ipynb
+++ b/docs/src/vignettes/2_moon.ipynb
@@ -48,26 +48,9 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "/home/victo/networkcommons/docs/src/vignettes\n"
- ]
- }
- ],
- "source": [
- "!pwd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
"outputs": [],
"source": [
- "meta_network_df = pd.read_csv('../../../data/moon/meta_network.sif', sep='\\t')"
+ "meta_network_df = nc.data.network.get_cosmos_pkn()"
]
},
{
@@ -109,120 +92,198 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "We then download a dataset and read inputs:"
+ "In this notebook, we will use data from the NCI60 Human Tumor Cell Lines Screen. We will use the cell line 706-0. To have an overview of the cell lines, we can run `nc.data.omics.nci60_datasets()`."
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " cell_line | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 786-0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " A498 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " A549_ATCC | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ACHN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " BT-549 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " cell_line\n",
+ "0 786-0\n",
+ "1 A498\n",
+ "2 A549_ATCC\n",
+ "3 ACHN\n",
+ "4 BT-549"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nc.data.omics.nci60_datasets().head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This resource contains three different types of data: transcriptomics, TF activity estimates and metabolic information."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " data_type | \n",
+ " description | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " TF_scores | \n",
+ " TF scores | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " RNA | \n",
+ " RNA expression | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " metabolomic | \n",
+ " metabolomic data | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " data_type description\n",
+ "0 TF_scores TF scores\n",
+ "1 RNA RNA expression\n",
+ "2 metabolomic metabolomic data"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nc.data.omics.nci60_datatypes()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
- "moon_data = nc.data.omics.moon()"
+ "sig_df = nc.data.omics.nci60_table(cell_line='786-0', data_type='TF_scores')\n",
+ "rna_df = nc.data.omics.nci60_table(cell_line='786-0', data_type='RNA')\n",
+ "metab_df = nc.data.omics.nci60_table(cell_line='786-0', data_type='metabolomic')"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'sig': TF value\n",
- " 0 AR 1.156582\n",
- " 1 BACH1 2.399881\n",
- " 2 CEBPA 3.687354\n",
- " 3 CREB1 0.829149\n",
- " 4 CTCF 2.914983\n",
- " 5 E2F1 4.989779\n",
- " 6 E2F4 3.972646\n",
- " 7 EGR1 6.337803\n",
- " 8 ELK1 0.444149\n",
- " 9 EPAS1 4.268129\n",
- " 10 ESR1 7.069928\n",
- " 11 ETS1 5.957844\n",
- " 12 FOS 5.009215\n",
- " 13 FOXA1 2.338539\n",
- " 14 FOXM1 1.206632\n",
- " 15 FOXO3 -0.772054\n",
- " 16 FOXP1 0.876896\n",
- " 17 GATA2 1.052240\n",
- " 18 GATA3 4.433932\n",
- " 19 HIF1A 2.503899\n",
- " 20 HNF4A 5.230794\n",
- " 21 JUN 4.310749\n",
- " 22 MITF 4.685015\n",
- " 23 MYC 0.761681\n",
- " 24 NFKB1 2.386302\n",
- " 25 PRDM14 2.602170\n",
- " 26 RARA 2.259669\n",
- " 27 RELA 3.635926\n",
- " 28 RUNX1 1.654963\n",
- " 29 SOX2 0.903587\n",
- " 30 SP1 2.073969\n",
- " 31 SP3 0.190111\n",
- " 32 SPI1 5.666462\n",
- " 33 SREBF1 1.577459\n",
- " 34 STAT1 2.219767\n",
- " 35 STAT2 0.092127\n",
- " 36 STAT3 1.241225\n",
- " 37 TAL1 2.968578\n",
- " 38 TFAP2A 0.182564\n",
- " 39 TFAP2C 7.987909\n",
- " 40 TP53 1.014723\n",
- " 41 USF1 2.194528\n",
- " 42 VDR 1.545408\n",
- " 43 YY1 1.521236\n",
- " 44 ZNF263 7.254531,\n",
- " 'metab': metab value Unnamed: 2\n",
- " 0 HMDB0011747 0.401991 NaN\n",
- " 1 HMDB0000755 -0.142718 NaN\n",
- " 2 HMDB0000905 -1.244183 NaN\n",
- " 3 HMDB0000012 -0.967207 NaN\n",
- " 4 HMDB0001191 -0.631035 NaN\n",
- " .. ... ... ...\n",
- " 133 HMDB0011720 -0.716155 NaN\n",
- " 134 HMDB0000883 -0.059923 NaN\n",
- " 135 HMDB0000292 -0.772272 NaN\n",
- " 136 HMDB0000299 1.132812 NaN\n",
- " 137 HMDB0002917 -1.728352 NaN\n",
- " \n",
- " [138 rows x 3 columns],\n",
- " 'rna': gene value\n",
- " 0 WASH7P -0.084246\n",
- " 1 LINC01128 -0.246712\n",
- " 2 NOC2L 0.508906\n",
- " 3 KLHL17 -0.001460\n",
- " 4 HES4 -0.281146\n",
- " ... ... ...\n",
- " 8252 CMC4 -0.029409\n",
- " 8253 BRCC3 -0.058883\n",
- " 8254 VBP1 -0.554804\n",
- " 8255 TMLHE -1.095530\n",
- " 8256 CD24 0.923448\n",
- " \n",
- " [8257 rows x 2 columns]}"
+ "{'AR': 1.1565824565146148,\n",
+ " 'BACH1': 2.3998807796742443,\n",
+ " 'CEBPA': 3.6873543923958847,\n",
+ " 'CREB1': 0.8291485083247008,\n",
+ " 'CTCF': 2.9149829587082383}"
]
},
- "execution_count": 8,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "moon_data"
+ "sig_df.set_index('ID')['score'].to_dict()"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
- "sig_input = moon_data['sig'].set_index('TF')['value'].to_dict()\n",
- "rna_input = moon_data['rna'].set_index('gene')['value'].to_dict()\n",
- "metab_input = moon_data['metab'].set_index('metab')['value'].to_dict()"
+ "sig_input = sig_df.set_index('ID')['score'].to_dict()\n",
+ "rna_input = rna_df.set_index('ID')['score'].to_dict()\n",
+ "metab_input = metab_df.set_index('ID')['score'].to_dict()"
]
},
{
diff --git a/networkcommons/data/datasets.yaml b/networkcommons/data/datasets.yaml
index bb2a23a..232869d 100644
--- a/networkcommons/data/datasets.yaml
+++ b/networkcommons/data/datasets.yaml
@@ -4,24 +4,46 @@ omics:
decryptm:
name: DecryptM
description: Drug perturbation proteomics and phosphoproteomics data
+ publication_link: https://doi.org/10.1126/science.ade3925
+ detailed_description: >-
+ This dataset contains the profiling of 31 cancer drugs in 13 human cancer
+ cell line models resulted in 1.8 million dose-response curves, including
+ 47,502 regulated phosphopeptides, 7316 ubiquitinylated peptides,
+ and 546 regulated acetylated peptides.
path: decryptm/{experiment}/{data_type}/{fname}
panacea:
name: Panacea
description: Pancancer Analysis of Chemical Entity Activity RNA-Seq data
+ publication_link: https://doi.org/10.1016/j.xcrm.2021.100492
+ detailed_description: >-
+ PANACEA contains dose-response and perturbational profiles for 32
+ kinase inhibitors in 11 cancer cell lines, in addition to a DMSO control.
+ Originally, this resource served as the basis for a DREAM Challenge
+ assessing the accuracy and sensitivity of computational algorithms for
+ de novo drug polypharmacology predictions.
path: panacea/panacea__{table}data.tsv
test:
name: Test data
description: Small RNA-Seq data for unit tests
+ publication_link: NA
+ detailed_description: >-
+ This is a test dataset containing small RNA-Seq data.
+ It is used for unit tests.
path: unit_test/test__{table}data.tsv
- moon:
- name: MOON
- description: Database files for running MOON
- path: moon/{table}_input.tsv
- cosmos:
- name: COSMOS
- description: Database files for running COSMOS (MetaPKN)
- path: moon/net/meta_network.sif
CPTAC:
name: CPTAC
description: Clinical Proteomic Tumor Analysis Consortium data
+ publication_link: https://doi.org/10.1158/2159-8290.CD-13-0219
+ detailed_description: >-
+ This dataset contains data from the Clinical Proteomic Tumor Analysis Consortium.
+ It includes various cancer types and proteomic data.
path: CPTAC/{data_type}/{cancer_type}_{fname}.txt
+ NCI60:
+ name: NCI60
+ description: NCI-60 cell line data
+ publication_link: https://doi.org/10.1038/nrc1951
+ detailed_description: >-
+ This dataset contains data from the NCI-60 cell line panel.
+ It includes three files: TF activities from transcriptomics data,
+ metabolite abundances and gene reads.
+ path: NCI60/{cell_line}/{cell_line}__{data_type}.tsv
diff --git a/networkcommons/data/network/_moon.py b/networkcommons/data/network/_moon.py
index 9b49a6f..5134c87 100644
--- a/networkcommons/data/network/_moon.py
+++ b/networkcommons/data/network/_moon.py
@@ -17,18 +17,51 @@
Prior knowledge network used by MOON.
"""
-__all__ = ['build_moon_regulons']
+__all__ = ['build_moon_regulons', 'get_cosmos_pkn']
import lazy_import
import numpy as np
import pandas as pd
-dc = lazy_import.lazy_module('decoupler')
-
from networkcommons import _utils
from . import _omnipath
from . import _liana
+import os
+import urllib
+from networkcommons import _conf
+from networkcommons.data.omics import _common
+
+# dc = lazy_import.lazy_module('decoupler')
+import decoupler as dc
+
+
+def get_cosmos_pkn(update: bool = False):
+ """
+ Retrieves the metabolic network used in COSMOS from the server
+
+ Returns:
+ network (pandas.DataFrame): metabolic network with
+ source, target, and sign columns.
+ """
+ path = os.path.join(_conf.get('pickle_dir'), 'metapkn.pickle')
+
+ if update or not os.path.exists(path):
+
+ baseurl = urllib.parse.urljoin(_common._baseurl(), 'prior_knowledge')
+
+ file_legend = pd.read_csv(baseurl + '/meta_network.sif', sep='\t')
+
+ file_legend.to_pickle(path)
+
+ else:
+
+ file_legend = pd.read_pickle(path)
+
+ return file_legend
+
+
+
def build_moon_regulons(include_liana=False):
diff --git a/networkcommons/data/network/_omnipath.py b/networkcommons/data/network/_omnipath.py
index a76e14a..78533e4 100644
--- a/networkcommons/data/network/_omnipath.py
+++ b/networkcommons/data/network/_omnipath.py
@@ -79,7 +79,7 @@ def get_phosphositeplus(update: bool = False):
if update or not os.path.exists(path):
- baseurl = urllib.parse.urljoin(_common._baseurl(), 'phosphosite')
+ baseurl = urllib.parse.urljoin(_common._baseurl(), 'prior_knowledge')
file_legend = pd.read_csv(baseurl + '/kinase-substrate.tsv', sep='\t')
diff --git a/networkcommons/data/omics/_common.py b/networkcommons/data/omics/_common.py
index 93c9975..4f02078 100644
--- a/networkcommons/data/omics/_common.py
+++ b/networkcommons/data/omics/_common.py
@@ -45,18 +45,20 @@ def _datasets() -> dict[str, dict]:
return _module_data('datasets').get('omics', {})
-def datasets() -> dict[str, str]:
+def datasets() -> pd.DataFrame:
"""
Built-in omics datasets.
Returns:
- A dict with dataset labels as keys and descriptions as values.
+ A DataFrame with dataset details.
"""
+ data = _datasets().get('datasets', {})
+ df = pd.DataFrame.from_dict(data, orient='index')
+ pd.set_option('display.max_colwidth', None)
- return {
- k: v['description']
- for k, v in _datasets().get('datasets', {}).items()
- }
+ df = df[df.index != 'test'] # Exclude the 'test' dataset
+
+ return df[['name', 'description', 'publication_link', 'detailed_description']]
def _baseurl() -> str:
diff --git a/networkcommons/data/omics/_moon.py b/networkcommons/data/omics/_moon.py
index e985525..acf8ff3 100644
--- a/networkcommons/data/omics/_moon.py
+++ b/networkcommons/data/omics/_moon.py
@@ -19,26 +19,86 @@
from __future__ import annotations
-__all__ = ['moon']
+__all__ = ['nci60_datasets', 'nci60_datatypes', 'nci60_table']
import pandas as pd
+import os
+import urllib.parse
from . import _common
+from networkcommons import _conf
-def moon() -> dict[str, pd.DataFrame]:
+def nci60_datasets(update: bool = False) -> pd.DataFrame:
"""
- Example data for Moon.
+ Table of all NCI60 datasets (cell types).
+
+ Args:
+ update:
+ Force download and update cache.
Returns:
- Three data frames: signaling, metabolite and gene activity
- measurements.
+ Data frame of all NCI60 datasets, with columns "experiment",
+ "data_type" and "fname".
"""
- return {
- table: _common._open(
- _common._commons_url('moon', table = table),
- df = {'sep': '\t'},
+ path = os.path.join(_conf.get('pickle_dir'), 'nci60_datasets.pickle')
+
+ if update or not os.path.exists(path):
+
+ baseurl = urllib.parse.urljoin(_common._baseurl(), 'NCI60')
+
+ datasets = pd.DataFrame(
+ [
+ (
+ cell_line,
+ )
+ for cell_line in _common._ls(baseurl)
+ ],
+ columns = ['cell_line']
)
- for table in ('sig', 'metab', 'rna')
- }
+ datasets.to_pickle(path)
+
+ else:
+
+ datasets = pd.read_pickle(path)
+
+ return datasets
+
+
+def nci60_datatypes() -> pd.DataFrame:
+ """
+ Table of all NCI60 data types.
+
+ Returns:
+ Data frame of all NCI60 data types, with columns "data_type",
+ and "description".
+ """
+ df = pd.DataFrame({
+ 'data_type': ['TF_scores', 'RNA', 'metabolomic'],
+ 'description': ['TF scores', 'RNA expression', 'metabolomic data']
+ }
+ )
+
+ return df
+
+
+def nci60_table(cell_line: str, data_type: str) -> pd.DataFrame:
+ """
+ One table of omics data from NCI60.
+
+ Args:
+ cell_line:
+ Name of the cell line. For a complete list see
+ `nci60_datasets()`.
+ data_type:
+ Type of data. For a complete list see `nci60_datatypes()`.
+
+ Returns:
+ The table as a pandas DataFrame.
+ """
+
+ return _common._open(
+ _common._commons_url('NCI60', **locals()),
+ df = {'sep': '\t'},
+ )
diff --git a/networkcommons/methods/_moon.py b/networkcommons/methods/_moon.py
index b64e086..868820d 100644
--- a/networkcommons/methods/_moon.py
+++ b/networkcommons/methods/_moon.py
@@ -42,7 +42,8 @@
import lazy_import
import networkx as nx
import pandas as pd
-dc = lazy_import.lazy_module('decoupler')
+# dc = lazy_import.lazy_module('decoupler')
+import decoupler as dc
import numpy as np
from . import _graph
diff --git a/tests/test_omics.py b/tests/test_omics.py
index 3d20e30..bcebf65 100644
--- a/tests/test_omics.py
+++ b/tests/test_omics.py
@@ -25,8 +25,10 @@ def test_datasets_2():
dsets = _common.datasets()
- assert 'decryptm' in dsets
- assert 'CPTAC' in dsets
+ assert isinstance(dsets, pd.DataFrame)
+ assert dsets.columns.tolist() == ['name', 'description', 'publication_link', 'detailed_description']
+ assert 'decryptm' in dsets.index
+ assert 'CPTAC' in dsets.index
def test_commons_url():