diff --git a/data_processing.ipynb b/data_processing.ipynb index baca8bc..7100561 100644 --- a/data_processing.ipynb +++ b/data_processing.ipynb @@ -363,7 +363,7 @@ " axes = [axes]\n", "\n", "# Data storage for statistics\n", - "stats = []\n", + "data_stats = []\n", "\n", "# Iterate over each subject and create a subplot\n", "for i, subject in enumerate(subjects):\n", @@ -406,7 +406,7 @@ " # Compute stats on the non-resampled data (to avoid interpolation errors)\n", " mean_data = np.mean(data_sc_csf_ratio)\n", " sd_data = np.std(data_sc_csf_ratio)\n", - " stats.append([subject, shim_method, mean_data, sd_data])\n", + " data_stats.append([subject, shim_method, mean_data, sd_data])\n", "\n", " # Set custom x-ticks\n", " ax.set_xticks(custom_xticks)\n", @@ -430,14 +430,14 @@ { "cell_type": "code", "execution_count": null, - "id": "3fb8449e", + "id": "afeb6c8f", "metadata": {}, "outputs": [], "source": [ "# Perform statistics\n", "\n", "# Convert to DataFrame and save to CSV\n", - "df_stats = pd.DataFrame(stats, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", + "df_stats = pd.DataFrame(data_stats, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", "df_stats.to_csv(os.path.join(path_results, 'stats_t2starw.csv'), index=False)\n", "\n", "# Compute statistics across subjects\n", @@ -486,62 +486,6 @@ "print(f\"Paired t-tests:\\n{comparison_results}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b898cd9c", - "metadata": {}, - "outputs": [], - "source": [ - "# Create tables and perform statistics\n", - "\n", - "# Go back to root data folder\n", - "os.chdir(path_data)\n", - "\n", - "# Data storage\n", - "data_summary = []\n", - "\n", - "# Compute mean and SD of CSF/SC for each subject and each shim mode\n", - "for subject in subjects:\n", - " for shim_mode in shim_modes:\n", - " all_data = []\n", - " for file_path in glob.glob(os.path.join(path_data, subject, \"fmap\", f\"TB1map_*{shim_mode}*.csv\")):\n", - " df = pd.read_csv(file_path)\n", - " # Drop rows where VertLevel is 1 or 2 (because RF shimming was not performed at these levels)\n", - " df = df[df['VertLevel'].isin(['1', '2']) == False]\n", - " wa_data = df['WA()']\n", - " all_data.extend(wa_data)\n", - "\n", - " if all_data:\n", - " mean_data = np.mean(all_data)\n", - " sd_data = np.std(all_data)\n", - " data_summary.append([subject, shim_mode, mean_data, sd_data])\n", - "\n", - "# Convert to DataFrame and save to CSV\n", - "df_summary = pd.DataFrame(data_summary, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", - "df_summary.to_csv(os.path.join(path_results, 'subject_shim_mode_summary.csv'), index=False)\n", - "\n", - "# Compute statistics across subjects\n", - "grouped_means = df_summary.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n", - "\n", - "# Format mean ± standard deviation\n", - "grouped_means['Average_formatted'] = grouped_means['Average']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Average']['std'].map(\"{:.2f}\".format)\n", - "grouped_means['Standard_Deviation_formatted'] = grouped_means['Standard_Deviation']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Standard_Deviation']['std'].map(\"{:.2f}\".format)\n", - "\n", - "# Drop multi-level index and only keep formatted columns\n", - "grouped_means = grouped_means.drop(columns=['Average', 'Standard_Deviation'])\n", - "grouped_means.columns = ['Average', 'Standard_Deviation'] # Rename columns for clarity\n", - "grouped_means.reset_index().to_csv(os.path.join(path_results, 'average_across_subjects.csv'), index=False)\n", - "\n", - "# Perform ANOVA and Posthoc Tests\n", - "anova_result = f_oneway(*[group[\"Average\"].values for name, group in df_summary.groupby(\"Shim_Mode\")])\n", - "print(\"ANOVA Result:\", anova_result)\n", - "\n", - "if anova_result.pvalue < 0.05:\n", - " posthoc_result = pairwise_tukeyhsd(df_summary['Average'], df_summary['Shim_Mode'])\n", - " print(\"Posthoc Tukey HSD Result:\\n\", posthoc_result)\n" - ] - }, { "cell_type": "markdown", "id": "bda7c3ac", @@ -693,17 +637,20 @@ "if n_rows == 1:\n", " axes = [axes]\n", "\n", + "# Data storage for statistics\n", + "data_stats = []\n", + "\n", "# Iterate over each subject and create a subplot\n", "for i, subject in enumerate(subjects):\n", " ax = axes[i]\n", " \n", " os.chdir(os.path.join(path_data, subject, \"fmap\"))\n", "\n", - " for shim_method in shim_modes:\n", + " for shim_mode in shim_modes:\n", " # Initialize list to collect data for this shim method\n", " method_data = []\n", "\n", - " for file_path in glob.glob(f\"TB1map_*{shim_method}*.csv\"):\n", + " for file_path in glob.glob(f\"TB1map_*{shim_mode}*.csv\"):\n", " df = pd.read_csv(file_path)\n", " wa_data = df['WA()']\n", "\n", @@ -719,11 +666,16 @@ "\n", " method_data.append(smoothed_data)\n", "\n", - " # If there's data for this shim method, plot it\n", - " if method_data:\n", - " # Plotting each file's data separately\n", - " for resampled_data in method_data:\n", - " ax.plot(x_grid, resampled_data, label=f\"{shim_method}\")\n", + " # If there's data for this shim method, plot it\n", + " if method_data:\n", + " # Plotting each file's data separately\n", + " for resampled_data in method_data:\n", + " ax.plot(x_grid, resampled_data, label=f\"{shim_mode}\")\n", + "\n", + " # Compute stats on the non-resampled data (to avoid interpolation errors)\n", + " mean_data = np.mean(wa_data)\n", + " sd_data = np.std(wa_data)\n", + " data_stats.append([subject, shim_mode, mean_data, sd_data])\n", "\n", " # Set custom x-ticks\n", " ax.set_xticks(custom_xticks)\n", @@ -747,40 +699,18 @@ { "cell_type": "code", "execution_count": null, - "id": "f8b90a09", + "id": "56f0ebf5", "metadata": {}, "outputs": [], "source": [ - "# Create tables and perform statistics\n", - "\n", - "# Go back to root data folder\n", - "os.chdir(path_data)\n", - "\n", - "# Data storage\n", - "data_summary = []\n", - "\n", - "# Compute mean and SD of B1+ for each subject and each shim mode\n", - "for subject in subjects:\n", - " for shim_mode in shim_modes:\n", - " all_data = []\n", - " for file_path in glob.glob(os.path.join(path_data, subject, \"fmap\", f\"TB1map_*{shim_mode}*.csv\")):\n", - " df = pd.read_csv(file_path)\n", - " # Drop rows where VertLevel is 1 or 2 (because RF shimming was not performed at these levels)\n", - " df = df[df['VertLevel'].isin(['1', '2']) == False]\n", - " wa_data = df['WA()']\n", - " all_data.extend(wa_data)\n", - "\n", - " if all_data:\n", - " mean_data = np.mean(all_data)\n", - " sd_data = np.std(all_data)\n", - " data_summary.append([subject, shim_mode, mean_data, sd_data])\n", + "# Perform statistics\n", "\n", "# Convert to DataFrame and save to CSV\n", - "df_summary = pd.DataFrame(data_summary, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", - "df_summary.to_csv(os.path.join(path_results, 'subject_shim_mode_summary.csv'), index=False)\n", + "df_stats = pd.DataFrame(data_stats, columns=['Subject', 'Shim_Mode', 'Average', 'Standard_Deviation'])\n", + "df_stats.to_csv(os.path.join(path_results, 'stats_b1plus.csv'), index=False)\n", "\n", "# Compute statistics across subjects\n", - "grouped_means = df_summary.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n", + "grouped_means = df_stats.groupby('Shim_Mode').agg({'Average': ['mean', 'std'], 'Standard_Deviation': ['mean', 'std']})\n", "\n", "# Format mean ± standard deviation\n", "grouped_means['Average_formatted'] = grouped_means['Average']['mean'].map(\"{:.2f}\".format) + \" ± \" + grouped_means['Average']['std'].map(\"{:.2f}\".format)\n", @@ -789,15 +719,40 @@ "# Drop multi-level index and only keep formatted columns\n", "grouped_means = grouped_means.drop(columns=['Average', 'Standard_Deviation'])\n", "grouped_means.columns = ['Average', 'Standard_Deviation'] # Rename columns for clarity\n", - "grouped_means.reset_index().to_csv(os.path.join(path_results, 'average_across_subjects.csv'), index=False)\n", + "grouped_means.reset_index().to_csv(os.path.join(path_results, 'stats_b1plus_average_across_subjects.csv'), index=False)\n", "\n", - "# Perform ANOVA and Posthoc Tests\n", - "anova_result = f_oneway(*[group[\"Average\"].values for name, group in df_summary.groupby(\"Shim_Mode\")])\n", - "print(\"ANOVA Result:\", anova_result)\n", + "# Perform Repeated Measures ANOVA\n", + "aovrm = AnovaRM(df_stats, 'Average', 'Subject', within=['Shim_Mode'])\n", + "res = aovrm.fit()\n", + "print(res.summary())\n", + "\n", + "# Perform Post Hoc paired t-tests\n", + "\n", + "# Filter out subjects that don't have all shim modes\n", + "shim_modes = df_stats['Shim_Mode'].unique()\n", + "valid_subjects = df_stats.groupby('Subject').filter(lambda x: all(mode in x['Shim_Mode'].values for mode in shim_modes))\n", "\n", - "if anova_result.pvalue < 0.05:\n", - " posthoc_result = pairwise_tukeyhsd(df_summary['Average'], df_summary['Shim_Mode'])\n", - " print(\"Posthoc Tukey HSD Result:\\n\", posthoc_result)\n" + "# Pairwise comparisons\n", + "pairs = list(itertools.combinations(shim_modes, 2))\n", + "p_values = []\n", + "\n", + "for pair in pairs:\n", + " data1 = valid_subjects[valid_subjects['Shim_Mode'] == pair[0]]['Average']\n", + " data2 = valid_subjects[valid_subjects['Shim_Mode'] == pair[1]]['Average']\n", + " _, p_value = ttest_rel(data1, data2)\n", + " p_values.append(p_value)\n", + "\n", + "# Adjusting for multiple comparisons (Bonferroni correction)\n", + "adjusted_p_values = [p * len(pairs) for p in p_values] # Bonferroni correction\n", + "adjusted_p_values = [min(p, 1) for p in adjusted_p_values] # p-values cannot be greater than 1\n", + "\n", + "# Creating a summary DataFrame\n", + "comparison_results = pd.DataFrame({'Pair': pairs, 'P-Value': p_values, 'Adjusted P-Value': adjusted_p_values})\n", + "\n", + "# Save to CSV\n", + "comparison_results.to_csv(os.path.join(path_results, 'stats_b1plus_paired_ttest.csv'), index=False)\n", + "\n", + "print(f\"Paired t-tests:\\n{comparison_results}\")" ] }, {