Skip to content

Commit

Permalink
Summed differential matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanjameskennedy committed Jul 12, 2024
1 parent 5595655 commit 098bfd6
Showing 1 changed file with 19 additions and 3 deletions.
22 changes: 19 additions & 3 deletions jasentool/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def plot_barplot(self, count_dict, output_plot_fpath):

print(f"The number of alleles that aren't null for more than 1000 samples is {len(categories)}")

plt.figure(figsize=(12, 6))
plt.figure(figsize=(10, 8))
bars = plt.bar(categories, counts, color='skyblue')

# Add titles and labels
Expand All @@ -121,9 +121,19 @@ def plot_barplot(self, count_dict, output_plot_fpath):
plt.tight_layout()
plt.savefig(output_plot_fpath, dpi=600)

def plot_matrix_barplot(self, df, output_plot_fpath):
plt.figure(figsize=(10, 8))
plt.bar(df.index, df['sum'], color='skyblue')
plt.xlabel('Sample')
plt.ylabel('Sum of sample allele differences')
plt.title("Summed differential matrix of distances between pipelines' cgMLST results")
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(output_plot_fpath, dpi=600)

def plot_boxplot(self, count_dict, output_plot_fpath):
counts = list(count_dict.values())
plt.figure(figsize=(10, 6)) # Optional: set the figure size
plt.figure(figsize=(10, 8)) # Optional: set the figure size
plt.boxplot(counts, vert=True, patch_artist=True) # `vert=True` for vertical boxplot, `patch_artist=True` for filled boxes

# Add title and labels
Expand All @@ -141,17 +151,23 @@ def plot_boxplot(self, count_dict, output_plot_fpath):

def run(self, input_files, output_fpaths, generate_matrix):
# heatmap_fpath = os.path.join(os.path.dirname(output_fpaths[0]), "cgviz_vs_jasen_heatmap.png")
output_csv_fpath = os.path.join(os.path.dirname(output_fpaths[0]), "cgviz_vs_jasen.csv")
barplot_matrix_fpath = os.path.join(os.path.dirname(output_fpaths[0]), "summed_differential_matrix_barplot.png")
barplot_fpath = os.path.join(os.path.dirname(output_fpaths[0]), "null_alleles_barplot.png")
boxplot_fpath = os.path.join(os.path.dirname(output_fpaths[0]), "sample_null_boxplot.png")
null_alleles_count, sample_null_count = self.get_null_allele_counts(input_files)
self.plot_boxplot(sample_null_count, boxplot_fpath)
self.plot_barplot(null_alleles_count, barplot_fpath)
if generate_matrix:
output_csv_fpath = os.path.join(os.path.dirname(output_fpaths[0]), "cgviz_vs_jasen.csv")
sample_ids = [os.path.basename(input_file).replace("_result.json", "") for input_file in input_files]
cgviz_matrix_df = self.generate_matrix(sample_ids, self.get_cgviz_cgmlst_data)
jasen_matrix_df = self.generate_matrix(sample_ids, self.get_jasen_cgmlst_data)
distance_df = jasen_matrix_df - cgviz_matrix_df
distance_df = distance_df.astype(float)
distance_df.to_csv(output_csv_fpath, index=True, header=True)
# self.plot_heatmap(distance_df, output_plot_fpath)
if os.path.exists(output_csv_fpath):
distance_df = pd.read_csv(output_csv_fpath, index_col=0)
distance_df['sum'] = distance_df.sum(axis=1)
filtered_df = distance_df[distance_df['sum'] >= 100]
self.plot_matrix_barplot(filtered_df, barplot_matrix_fpath)

0 comments on commit 098bfd6

Please sign in to comment.