Skip to content

Commit

Permalink
make imports explicit and improve style
Browse files Browse the repository at this point in the history
  • Loading branch information
martinvoegele committed Oct 4, 2023
1 parent bbe1b07 commit 73c4702
Show file tree
Hide file tree
Showing 19 changed files with 391 additions and 122 deletions.
7 changes: 0 additions & 7 deletions pensa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +0,0 @@
from .preprocessing import *
from .features import *
from .statesinfo import *
from .clusters import *
from .comparison import *
from .dimensionality import *

15 changes: 12 additions & 3 deletions pensa/clusters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
from .clustering import *
from .trajectory import *
from .wss import *
from .clustering import \
obtain_clusters, \
obtain_combined_clusters, \
obtain_mult_combined_clusters, \
find_closest_frames

from .trajectory import \
write_cluster_traj

from .wss import \
wss_over_number_of_clusters, \
wss_over_number_of_combined_clusters
35 changes: 29 additions & 6 deletions pensa/comparison/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
from .statistics import *
from .relative_entropy import *
from .statespecific import *
from .visualization import *
from .metrics import *
from .uncertainty_analysis import *
from .statistics import \
kolmogorov_smirnov_analysis, \
mean_difference_analysis, \
feature_correlation

from .relative_entropy import \
relative_entropy_analysis

from .statespecific import \
ssi_feature_analysis, \
ssi_ensemble_analysis, \
cossi_featens_analysis

from .visualization import \
residue_visualization, \
distances_visualization, \
pair_features_heatmap, \
resnum_heatmap

from .metrics import \
pca_sampling_efficiency, \
average_jsd, average_kld, average_ksp, average_kss, average_ssi, \
max_jsd, max_kld, max_ksp, max_kss, max_ssi, min_ksp

from .uncertainty_analysis import \
relen_block_analysis, \
relen_sem_analysis, \
ssi_block_analysis, \
ssi_sem_analysis
120 changes: 92 additions & 28 deletions pensa/comparison/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@


"""
Calculates the average and maximum Jensen-Shannon distance and the Kullback-Leibler divergences for each feature from two ensembles. Each of four functions uses the relative_entropy_analysis function with the same parameters.
Calculates the average and maximum Jensen-Shannon distance and the Kullback-Leibler divergences
for each feature from two ensembles.
Each of the four functions uses the relative_entropy_analysis function with the same parameters.
Parameters
----------
Expand All @@ -20,16 +22,19 @@
Can be obtained from features object via .describe().
Must be the same as features_a. Provided as a sanity check.
all_data_a : float array
Trajectory data from the first ensemble. Format: [frames, frame_data].
Trajectory data from the first ensemble.
Format: [frames, frame_data].
all_data_b : float array
Trajectory data from the second ensemble.
For kld functions, the second ensemble should be the reference ensemble.
Format: [frames, frame_data].
bin_width : float, default=None
Bin width for the axis to compare the distributions on.
If bin_width is None, bin_num (see below) bins are used and the width is determined from the common histogram.
If bin_width is None, bin_num (see below) bins are used
and the width is determined from the common histogram.
bin_num : int, default=10
Number of bins for the axis to compare the distributions on (only if bin_width=None).
Number of bins for the axis to compare the distributions on
(only if bin_width=None).
verbose : bool, default=True
Print intermediate results.
override_name_check : bool, default=False
Expand All @@ -50,28 +55,52 @@
"""


def average_jsd(features_a, features_b, all_data_a, all_data_b, bin_width=None, bin_num=10, verbose=True, override_name_check=False):
_, data_jsdist, _, _ = relative_entropy_analysis(features_a, features_b, all_data_a, all_data_b, bin_width=bin_width, bin_num=bin_num, verbose=verbose, override_name_check=override_name_check)
def average_jsd(features_a, features_b, all_data_a, all_data_b,
bin_width=None, bin_num=10, verbose=True,
override_name_check=False):
_, data_jsdist, _, _ = relative_entropy_analysis(
features_a, features_b, all_data_a, all_data_b,
bin_width=bin_width, bin_num=bin_num, verbose=verbose,
override_name_check=override_name_check
)
return np.mean(data_jsdist)


def max_jsd(features_a, features_b, all_data_a, all_data_b, bin_width=None, bin_num=10, verbose=True, override_name_check=False):
_, data_jsdist, _, _ = relative_entropy_analysis(features_a, features_b, all_data_a, all_data_b, bin_width=bin_width, bin_num=bin_num, verbose=verbose, override_name_check=override_name_check)
def max_jsd(features_a, features_b, all_data_a, all_data_b,
bin_width=None, bin_num=10, verbose=True,
override_name_check=False):
_, data_jsdist, _, _ = relative_entropy_analysis(
features_a, features_b, all_data_a, all_data_b,
bin_width=bin_width, bin_num=bin_num, verbose=verbose,
override_name_check=override_name_check
)
return np.max(data_jsdist)


def average_kld(features_a, features_b, all_data_a, all_data_b, bin_width=None, bin_num=10, verbose=True, override_name_check=False):
_, _, data_kld_ab, _ = relative_entropy_analysis(features_a, features_b, all_data_a, all_data_b, bin_width=bin_width, bin_num=bin_num, verbose=verbose, override_name_check=override_name_check)
def average_kld(features_a, features_b, all_data_a, all_data_b,
bin_width=None, bin_num=10, verbose=True,
override_name_check=False):
_, _, data_kld_ab, _ = relative_entropy_analysis(
features_a, features_b, all_data_a, all_data_b,
bin_width=bin_width, bin_num=bin_num, verbose=verbose,
override_name_check=override_name_check
)
return np.mean(data_kld_ab)


def max_kld(features_a, features_b, all_data_a, all_data_b, bin_width=None, bin_num=10, verbose=True, override_name_check=False):
_, _, data_kld_ab, _ = relative_entropy_analysis(features_a, features_b, all_data_a, all_data_b, bin_width=bin_width, bin_num=bin_num, verbose=verbose, override_name_check=override_name_check)
def max_kld(features_a, features_b, all_data_a, all_data_b,
bin_width=None, bin_num=10, verbose=True,
override_name_check=False):
_, _, data_kld_ab, _ = relative_entropy_analysis(
features_a, features_b, all_data_a, all_data_b,
bin_width=bin_width, bin_num=bin_num, verbose=verbose,
override_name_check=override_name_check)
return np.max(data_kld_ab)


"""
Calculates the average and maximum Kolmogorov-Smirnov statistic for two distributions. Each of five functions uses the kolmogorov_smirnov_analysis function with the same parameters.
Calculates the average and maximum Kolmogorov-Smirnov statistic for two distributions.
Each of the five functions uses the kolmogorov_smirnov_analysis function with the same parameters.
Parameters
----------
Expand Down Expand Up @@ -108,33 +137,54 @@ def max_kld(features_a, features_b, all_data_a, all_data_b, bin_width=None, bin_
"""


def average_kss(features_a, features_b, all_data_a, all_data_b, verbose=True, override_name_check=False):
_, data_kss, _ = kolmogorov_smirnov_analysis(features_a, features_b, all_data_a, all_data_b, verbose=verbose, override_name_check=override_name_check)
def average_kss(features_a, features_b, all_data_a, all_data_b,
verbose=True, override_name_check=False):
_, data_kss, _ = kolmogorov_smirnov_analysis(
features_a, features_b, all_data_a, all_data_b,
verbose=verbose, override_name_check=override_name_check
)
return np.mean(data_kss)


def max_kss(features_a, features_b, all_data_a, all_data_b, verbose=True, override_name_check=False):
_, data_kss, _ = kolmogorov_smirnov_analysis(features_a, features_b, all_data_a, all_data_b, verbose=verbose, override_name_check=override_name_check)
def max_kss(features_a, features_b, all_data_a, all_data_b,
verbose=True, override_name_check=False):
_, data_kss, _ = kolmogorov_smirnov_analysis(
features_a, features_b, all_data_a, all_data_b,
verbose=verbose, override_name_check=override_name_check
)
return np.max(data_kss)


def average_ksp(features_a, features_b, all_data_a, all_data_b, verbose=True, override_name_check=False):
_, _, data_ksp = kolmogorov_smirnov_analysis(features_a, features_b, all_data_a, all_data_b, verbose=verbose, override_name_check=override_name_check)
def average_ksp(features_a, features_b, all_data_a, all_data_b,
verbose=True, override_name_check=False):
_, _, data_ksp = kolmogorov_smirnov_analysis(
features_a, features_b, all_data_a, all_data_b,
verbose=verbose, override_name_check=override_name_check
)
return np.mean(data_ksp)


def max_ksp(features_a, features_b, all_data_a, all_data_b, verbose=True, override_name_check=False):
_, _, data_ksp = kolmogorov_smirnov_analysis(features_a, features_b, all_data_a, all_data_b, verbose=verbose, override_name_check=override_name_check)
def max_ksp(features_a, features_b, all_data_a, all_data_b,
verbose=True, override_name_check=False):
_, _, data_ksp = kolmogorov_smirnov_analysis(
features_a, features_b, all_data_a, all_data_b,
verbose=verbose, override_name_check=override_name_check
)
return np.max(data_ksp)


def min_ksp(features_a, features_b, all_data_a, all_data_b, verbose=True, override_name_check=False):
_, _, data_ksp = kolmogorov_smirnov_analysis(features_a, features_b, all_data_a, all_data_b, verbose=verbose, override_name_check=override_name_check)
def min_ksp(features_a, features_b, all_data_a, all_data_b,
verbose=True, override_name_check=False):
_, _, data_ksp = kolmogorov_smirnov_analysis(
features_a, features_b, all_data_a, all_data_b,
verbose=verbose, override_name_check=override_name_check
)
return np.min(data_ksp)


"""
Calculates average and maximum State Specific Information statistic for a feature across two ensembles. Each of two functions uses the ssi_ensemble_analysis function with the same parameters.
Calculates average and maximum State Specific Information statistic for a feature across two ensembles.
Each of two functions uses the ssi_ensemble_analysis function with the same parameters.
Parameters
----------
Expand Down Expand Up @@ -174,13 +224,27 @@ def min_ksp(features_a, features_b, all_data_a, all_data_b, verbose=True, overri
"""


def average_ssi(features_a, features_b, all_data_a, all_data_b, torsions=None, pocket_occupancy=None, pbc=True, verbose=True, write_plots=None, override_name_check=False):
_, data_ssi = ssi_ensemble_analysis(features_a, features_b, all_data_a, all_data_b, torsions=torsions, pocket_occupancy=pocket_occupancy, pbc=pbc, verbose=verbose, write_plots=write_plots, override_name_check=override_name_check)
def average_ssi(features_a, features_b, all_data_a, all_data_b,
torsions=None, pocket_occupancy=None, pbc=True,
verbose=True, write_plots=None, override_name_check=False):
_, data_ssi = ssi_ensemble_analysis(
features_a, features_b, all_data_a, all_data_b,
torsions=torsions, pocket_occupancy=pocket_occupancy,
pbc=pbc, verbose=verbose, write_plots=write_plots,
override_name_check=override_name_check
)
return np.mean(data_ssi)


def max_ssi(features_a, features_b, all_data_a, all_data_b, torsions=None, pocket_occupancy=None, pbc=True, verbose=True, write_plots=None, override_name_check=False):
_, data_ssi = ssi_ensemble_analysis(features_a, features_b, all_data_a, all_data_b, torsions=torsions, pocket_occupancy=pocket_occupancy, pbc=pbc, verbose=verbose, write_plots=write_plots, override_name_check=override_name_check)
def max_ssi(features_a, features_b, all_data_a, all_data_b,
torsions=None, pocket_occupancy=None, pbc=True,
verbose=True, write_plots=None, override_name_check=False):
_, data_ssi = ssi_ensemble_analysis(
features_a, features_b, all_data_a, all_data_b,
torsions=torsions, pocket_occupancy=pocket_occupancy,
pbc=pbc, verbose=verbose, write_plots=write_plots,
override_name_check=override_name_check
)
return np.max(data_ssi)


Expand Down
3 changes: 2 additions & 1 deletion pensa/comparison/relative_entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import scipy.spatial.distance


def relative_entropy_analysis(features_a, features_b, all_data_a, all_data_b, bin_width=None, bin_num=10, verbose=True, override_name_check=False):
def relative_entropy_analysis(features_a, features_b, all_data_a, all_data_b,
bin_width=None, bin_num=10, verbose=True, override_name_check=False):
"""
Calculates the Jensen-Shannon distance and the Kullback-Leibler divergences for each feature from two ensembles.
Expand Down
Loading

0 comments on commit 73c4702

Please sign in to comment.