From 6edccc72f9b1b9b8fbb82f47f06a231a24204120 Mon Sep 17 00:00:00 2001 From: jcohenadad Date: Tue, 23 Jan 2024 21:57:28 -0500 Subject: [PATCH] Added statistics for CSF/SC T2starw Fixes #70 --- data_processing.ipynb | 132 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 3 deletions(-) diff --git a/data_processing.ipynb b/data_processing.ipynb index ebb52f8..baca8bc 100644 --- a/data_processing.ipynb +++ b/data_processing.ipynb @@ -85,15 +85,18 @@ "import json\n", "import subprocess\n", "import glob\n", + "import itertools\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import nibabel as nib\n", "import pandas as pd\n", "from scipy.interpolate import interp1d\n", "from scipy.ndimage import uniform_filter1d\n", - "from scipy.stats import f_oneway\n", + "from scipy.stats import f_oneway, ttest_rel\n", "import shutil\n", - "from statsmodels.stats.multicomp import pairwise_tukeyhsd" + "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n", + "from statsmodels.stats.anova import AnovaRM\n", + "import statsmodels.api as sm" ] }, { @@ -359,6 +362,9 @@ "if n_rows == 1:\n", " axes = [axes]\n", "\n", + "# Data storage for statistics\n", + "stats = []\n", + "\n", "# Iterate over each subject and create a subplot\n", "for i, subject in enumerate(subjects):\n", " ax = axes[i]\n", @@ -391,11 +397,16 @@ "\n", " method_data.append(resampled_data)\n", "\n", - " # If there's data for this shim method, plot it\n", + " # If there's data for this shim method, plot it and compute stats\n", " if method_data:\n", " # Plotting each file's data separately\n", " for resampled_data in method_data:\n", " ax.plot(x_grid, resampled_data, label=f\"{shim_method}\")\n", + " \n", + " # Compute stats on the non-resampled data (to avoid interpolation errors)\n", + " mean_data = np.mean(data_sc_csf_ratio)\n", + " sd_data = np.std(data_sc_csf_ratio)\n", + " stats.append([subject, shim_method, mean_data, sd_data])\n", "\n", " # Set custom x-ticks\n", " ax.set_xticks(custom_xticks)\n", @@ -416,6 +427,121 @@ "plt.show()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fb8449e", + "metadata": {}, + "outputs": [], + "source": [ + "# Perform statistics\n", + "\n", + "# Convert to DataFrame and save to CSV\n", + "df_stats = pd.DataFrame(stats, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", + "df_stats.to_csv(os.path.join(path_results, 'stats_t2starw.csv'), index=False)\n", + "\n", + "# Compute statistics across subjects\n", + "grouped_means = df_stats.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n", + "\n", + "# Format mean ± standard deviation\n", + "grouped_means['Average_formatted'] = grouped_means['Average']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Average']['std'].map(\"{:.2f}\".format)\n", + "grouped_means['Standard_Deviation_formatted'] = grouped_means['Standard_Deviation']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Standard_Deviation']['std'].map(\"{:.2f}\".format)\n", + "\n", + "# Drop multi-level index and only keep formatted columns\n", + "grouped_means = grouped_means.drop(columns=['Average', 'Standard_Deviation'])\n", + "grouped_means.columns = ['Average', 'Standard_Deviation'] # Rename columns for clarity\n", + "grouped_means.reset_index().to_csv(os.path.join(path_results, 'stats_t2starw_average_across_subjects.csv'), index=False)\n", + "\n", + "# Perform Repeated Measures ANOVA\n", + "aovrm = AnovaRM(df_stats, 'Average', 'Subject', within=['Shim_Mode'])\n", + "res = aovrm.fit()\n", + "print(res.summary())\n", + "\n", + "# Perform Post Hoc paired t-tests\n", + "\n", + "# Filter out subjects that don't have all shim modes\n", + "shim_modes = df_stats['Shim_Mode'].unique()\n", + "valid_subjects = df_stats.groupby('Subject').filter(lambda x: all(mode in x['Shim_Mode'].values for mode in shim_modes))\n", + "\n", + "# Pairwise comparisons\n", + "pairs = list(itertools.combinations(shim_modes, 2))\n", + "p_values = []\n", + "\n", + "for pair in pairs:\n", + " data1 = valid_subjects[valid_subjects['Shim_Mode'] == pair[0]]['Average']\n", + " data2 = valid_subjects[valid_subjects['Shim_Mode'] == pair[1]]['Average']\n", + " _, p_value = ttest_rel(data1, data2)\n", + " p_values.append(p_value)\n", + "\n", + "# Adjusting for multiple comparisons (Bonferroni correction)\n", + "adjusted_p_values = [p * len(pairs) for p in p_values] # Bonferroni correction\n", + "adjusted_p_values = [min(p, 1) for p in adjusted_p_values] # p-values cannot be greater than 1\n", + "\n", + "# Creating a summary DataFrame\n", + "comparison_results = pd.DataFrame({'Pair': pairs, 'P-Value': p_values, 'Adjusted P-Value': adjusted_p_values})\n", + "\n", + "# Save to CSV\n", + "comparison_results.to_csv(os.path.join(path_results, 'stats_t2starw_paired_ttest.csv'), index=False)\n", + "\n", + "print(f\"Paired t-tests:\\n{comparison_results}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b898cd9c", + "metadata": {}, + "outputs": [], + "source": [ + "# Create tables and perform statistics\n", + "\n", + "# Go back to root data folder\n", + "os.chdir(path_data)\n", + "\n", + "# Data storage\n", + "data_summary = []\n", + "\n", + "# Compute mean and SD of CSF/SC for each subject and each shim mode\n", + "for subject in subjects:\n", + " for shim_mode in shim_modes:\n", + " all_data = []\n", + " for file_path in glob.glob(os.path.join(path_data, subject, \"fmap\", f\"TB1map_*{shim_mode}*.csv\")):\n", + " df = pd.read_csv(file_path)\n", + " # Drop rows where VertLevel is 1 or 2 (because RF shimming was not performed at these levels)\n", + " df = df[df['VertLevel'].isin(['1', '2']) == False]\n", + " wa_data = df['WA()']\n", + " all_data.extend(wa_data)\n", + "\n", + " if all_data:\n", + " mean_data = np.mean(all_data)\n", + " sd_data = np.std(all_data)\n", + " data_summary.append([subject, shim_mode, mean_data, sd_data])\n", + "\n", + "# Convert to DataFrame and save to CSV\n", + "df_summary = pd.DataFrame(data_summary, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", + "df_summary.to_csv(os.path.join(path_results, 'subject_shim_mode_summary.csv'), index=False)\n", + "\n", + "# Compute statistics across subjects\n", + "grouped_means = df_summary.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n", + "\n", + "# Format mean ± standard deviation\n", + "grouped_means['Average_formatted'] = grouped_means['Average']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Average']['std'].map(\"{:.2f}\".format)\n", + "grouped_means['Standard_Deviation_formatted'] = grouped_means['Standard_Deviation']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Standard_Deviation']['std'].map(\"{:.2f}\".format)\n", + "\n", + "# Drop multi-level index and only keep formatted columns\n", + "grouped_means = grouped_means.drop(columns=['Average', 'Standard_Deviation'])\n", + "grouped_means.columns = ['Average', 'Standard_Deviation'] # Rename columns for clarity\n", + "grouped_means.reset_index().to_csv(os.path.join(path_results, 'average_across_subjects.csv'), index=False)\n", + "\n", + "# Perform ANOVA and Posthoc Tests\n", + "anova_result = f_oneway(*[group[\"Average\"].values for name, group in df_summary.groupby(\"Shim_Mode\")])\n", + "print(\"ANOVA Result:\", anova_result)\n", + "\n", + "if anova_result.pvalue < 0.05:\n", + " posthoc_result = pairwise_tukeyhsd(df_summary['Average'], df_summary['Shim_Mode'])\n", + " print(\"Posthoc Tukey HSD Result:\\n\", posthoc_result)\n" + ] + }, { "cell_type": "markdown", "id": "bda7c3ac",