Skip to content

Commit

Permalink
Added statistics for CSF/SC T2starw
Browse files Browse the repository at this point in the history
Fixes #70
  • Loading branch information
jcohenadad committed Jan 24, 2024
1 parent 2ab713a commit 6edccc7
Showing 1 changed file with 129 additions and 3 deletions.
132 changes: 129 additions & 3 deletions data_processing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,18 @@
"import json\n",
"import subprocess\n",
"import glob\n",
"import itertools\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import nibabel as nib\n",
"import pandas as pd\n",
"from scipy.interpolate import interp1d\n",
"from scipy.ndimage import uniform_filter1d\n",
"from scipy.stats import f_oneway\n",
"from scipy.stats import f_oneway, ttest_rel\n",
"import shutil\n",
"from statsmodels.stats.multicomp import pairwise_tukeyhsd"
"from statsmodels.stats.multicomp import pairwise_tukeyhsd\n",
"from statsmodels.stats.anova import AnovaRM\n",
"import statsmodels.api as sm"
]
},
{
Expand Down Expand Up @@ -359,6 +362,9 @@
"if n_rows == 1:\n",
" axes = [axes]\n",
"\n",
"# Data storage for statistics\n",
"stats = []\n",
"\n",
"# Iterate over each subject and create a subplot\n",
"for i, subject in enumerate(subjects):\n",
" ax = axes[i]\n",
Expand Down Expand Up @@ -391,11 +397,16 @@
"\n",
" method_data.append(resampled_data)\n",
"\n",
" # If there's data for this shim method, plot it\n",
" # If there's data for this shim method, plot it and compute stats\n",
" if method_data:\n",
" # Plotting each file's data separately\n",
" for resampled_data in method_data:\n",
" ax.plot(x_grid, resampled_data, label=f\"{shim_method}\")\n",
" \n",
" # Compute stats on the non-resampled data (to avoid interpolation errors)\n",
" mean_data = np.mean(data_sc_csf_ratio)\n",
" sd_data = np.std(data_sc_csf_ratio)\n",
" stats.append([subject, shim_method, mean_data, sd_data])\n",
"\n",
" # Set custom x-ticks\n",
" ax.set_xticks(custom_xticks)\n",
Expand All @@ -416,6 +427,121 @@
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3fb8449e",
"metadata": {},
"outputs": [],
"source": [
"# Perform statistics\n",
"\n",
"# Convert to DataFrame and save to CSV\n",
"df_stats = pd.DataFrame(stats, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n",
"df_stats.to_csv(os.path.join(path_results, 'stats_t2starw.csv'), index=False)\n",
"\n",
"# Compute statistics across subjects\n",
"grouped_means = df_stats.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n",
"\n",
"# Format mean ± standard deviation\n",
"grouped_means['Average_formatted'] = grouped_means['Average']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Average']['std'].map(\"{:.2f}\".format)\n",
"grouped_means['Standard_Deviation_formatted'] = grouped_means['Standard_Deviation']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Standard_Deviation']['std'].map(\"{:.2f}\".format)\n",
"\n",
"# Drop multi-level index and only keep formatted columns\n",
"grouped_means = grouped_means.drop(columns=['Average', 'Standard_Deviation'])\n",
"grouped_means.columns = ['Average', 'Standard_Deviation'] # Rename columns for clarity\n",
"grouped_means.reset_index().to_csv(os.path.join(path_results, 'stats_t2starw_average_across_subjects.csv'), index=False)\n",
"\n",
"# Perform Repeated Measures ANOVA\n",
"aovrm = AnovaRM(df_stats, 'Average', 'Subject', within=['Shim_Mode'])\n",
"res = aovrm.fit()\n",
"print(res.summary())\n",
"\n",
"# Perform Post Hoc paired t-tests\n",
"\n",
"# Filter out subjects that don't have all shim modes\n",
"shim_modes = df_stats['Shim_Mode'].unique()\n",
"valid_subjects = df_stats.groupby('Subject').filter(lambda x: all(mode in x['Shim_Mode'].values for mode in shim_modes))\n",
"\n",
"# Pairwise comparisons\n",
"pairs = list(itertools.combinations(shim_modes, 2))\n",
"p_values = []\n",
"\n",
"for pair in pairs:\n",
" data1 = valid_subjects[valid_subjects['Shim_Mode'] == pair[0]]['Average']\n",
" data2 = valid_subjects[valid_subjects['Shim_Mode'] == pair[1]]['Average']\n",
" _, p_value = ttest_rel(data1, data2)\n",
" p_values.append(p_value)\n",
"\n",
"# Adjusting for multiple comparisons (Bonferroni correction)\n",
"adjusted_p_values = [p * len(pairs) for p in p_values] # Bonferroni correction\n",
"adjusted_p_values = [min(p, 1) for p in adjusted_p_values] # p-values cannot be greater than 1\n",
"\n",
"# Creating a summary DataFrame\n",
"comparison_results = pd.DataFrame({'Pair': pairs, 'P-Value': p_values, 'Adjusted P-Value': adjusted_p_values})\n",
"\n",
"# Save to CSV\n",
"comparison_results.to_csv(os.path.join(path_results, 'stats_t2starw_paired_ttest.csv'), index=False)\n",
"\n",
"print(f\"Paired t-tests:\\n{comparison_results}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b898cd9c",
"metadata": {},
"outputs": [],
"source": [
"# Create tables and perform statistics\n",
"\n",
"# Go back to root data folder\n",
"os.chdir(path_data)\n",
"\n",
"# Data storage\n",
"data_summary = []\n",
"\n",
"# Compute mean and SD of CSF/SC for each subject and each shim mode\n",
"for subject in subjects:\n",
" for shim_mode in shim_modes:\n",
" all_data = []\n",
" for file_path in glob.glob(os.path.join(path_data, subject, \"fmap\", f\"TB1map_*{shim_mode}*.csv\")):\n",
" df = pd.read_csv(file_path)\n",
" # Drop rows where VertLevel is 1 or 2 (because RF shimming was not performed at these levels)\n",
" df = df[df['VertLevel'].isin(['1', '2']) == False]\n",
" wa_data = df['WA()']\n",
" all_data.extend(wa_data)\n",
"\n",
" if all_data:\n",
" mean_data = np.mean(all_data)\n",
" sd_data = np.std(all_data)\n",
" data_summary.append([subject, shim_mode, mean_data, sd_data])\n",
"\n",
"# Convert to DataFrame and save to CSV\n",
"df_summary = pd.DataFrame(data_summary, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n",
"df_summary.to_csv(os.path.join(path_results, 'subject_shim_mode_summary.csv'), index=False)\n",
"\n",
"# Compute statistics across subjects\n",
"grouped_means = df_summary.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n",
"\n",
"# Format mean ± standard deviation\n",
"grouped_means['Average_formatted'] = grouped_means['Average']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Average']['std'].map(\"{:.2f}\".format)\n",
"grouped_means['Standard_Deviation_formatted'] = grouped_means['Standard_Deviation']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Standard_Deviation']['std'].map(\"{:.2f}\".format)\n",
"\n",
"# Drop multi-level index and only keep formatted columns\n",
"grouped_means = grouped_means.drop(columns=['Average', 'Standard_Deviation'])\n",
"grouped_means.columns = ['Average', 'Standard_Deviation'] # Rename columns for clarity\n",
"grouped_means.reset_index().to_csv(os.path.join(path_results, 'average_across_subjects.csv'), index=False)\n",
"\n",
"# Perform ANOVA and Posthoc Tests\n",
"anova_result = f_oneway(*[group[\"Average\"].values for name, group in df_summary.groupby(\"Shim_Mode\")])\n",
"print(\"ANOVA Result:\", anova_result)\n",
"\n",
"if anova_result.pvalue < 0.05:\n",
" posthoc_result = pairwise_tukeyhsd(df_summary['Average'], df_summary['Shim_Mode'])\n",
" print(\"Posthoc Tukey HSD Result:\\n\", posthoc_result)\n"
]
},
{
"cell_type": "markdown",
"id": "bda7c3ac",
Expand Down

0 comments on commit 6edccc7

Please sign in to comment.