Skip to content

Commit

Permalink
Updated and tested tutorials. Unified scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
NJ-Thomson committed Nov 26, 2024
1 parent f70175c commit a1c37e8
Show file tree
Hide file tree
Showing 13 changed files with 78 additions and 56 deletions.
2 changes: 1 addition & 1 deletion pensa/features/hbond_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def read_water_site_h_bonds_quickly(structure_input, xtc_input, atomgroups, site
else:
g = Grid(grid_input)
elif grid_input is None:
g = generate_grid(u, atomgroups)
g = generate_grid(u, atomgroups[0])
else:
g = Grid(grid_input)

Expand Down
2 changes: 1 addition & 1 deletion pensa/features/water_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def read_water_features(structure_input, xtc_input, atomgroup, top_waters=10,
water_information.append([water_ID, list(atom_location), water_pocket_occupation_frequency])

# Write data out and visualize water sites in pdb
if write is True:
if out_name is not None:
data_out(out_name + '_' + water_ID + '.txt', water_out)
data_out(out_name + '_WaterSummary.txt', water_information)
write_atom_to_pdb(pdb_outname, atom_location, water_ID, atomgroup)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,45 +22,43 @@
"""

struc = "mor-data/11426_dyn_151.pdb"
xtc = "mor-data/11423_trj_151.xtc"
water_feat, water_data = read_water_features(
structure_input=struc,
xtc_input=xtc,
top_waters=1,
atomgroup="OH2",
write=True,
write_grid_as="TIP3P",
out_name="11426_dyn_151"
)
# struc = "mor-data/11426_dyn_151.pdb"
# xtc = "mor-data/11423_trj_151.xtc"
# water_feat, water_data = read_water_features(
# structure_input=struc,
# xtc_input=xtc,
# top_waters=1,
# atomgroup="OH2",
# write_grid_as="TIP3P",
# out_name="11426_dyn_151"
# )


# We can use the get_atom_features, which provides the same
# functionality but ignores orientations as atoms are considered spherically symmetric.
# # We can use the get_atom_features, which provides the same
# # functionality but ignores orientations as atoms are considered spherically symmetric.

struc = "mor-data/11426_dyn_151.pdb"
# struc = "mor-data/11426_dyn_151.pdb"

xtc = "mor-data/11423_trj_151.xtc"
# xtc = "mor-data/11423_trj_151.xtc"

# Here we locate the sodium site which has the highest probability
# The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis
# # Here we locate the sodium site which has the highest probability
# # The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis

atom_feat, atom_data = read_atom_features(
structure_input=struc,
xtc_input=xtc,
top_atoms=1,
atomgroup="SOD",
element="Na",
write=True,
out_name="11426_dyn_151"
)
# atom_feat, atom_data = read_atom_features(
# structure_input=struc,
# xtc_input=xtc,
# top_atoms=1,
# atomgroup="SOD",
# element="Na",
# out_name="11426_dyn_151"
# )


# If we have already obtained the grid, we can speed up featurization by reading it in.

struc = "mor-data/11426_dyn_151.pdb"
xtc = "mor-data/11423_trj_151.xtc"
grid = "dens/11426_dyn_151OH2_density.dx"
grid = "11426_dyn_151_OH2_density.dx"
water_feat, water_data = read_water_features(
structure_input=struc,
xtc_input=xtc,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pensa.features import read_cavity_bonds, read_h_bonds
from pensa.features import hbond_features, atom_features

# Featurize water cavity hydrogen bonds

Expand All @@ -8,26 +8,24 @@
pdb_file_a = root_dir + '/11426_dyn_151.pdb'
trj_file_a = root_dir + '/11423_trj_151.xtc'

names, data = read_cavity_bonds(
names, data = hbond_features.read_water_site_h_bonds_quickly(
ref_file_a, trj_file_a,
atomgroups=['OH2', 'H1', 'H2'],
site_IDs=[1, 2],
grid_input=None,
write=True,
write_grid_as='TIP3P',
out_name='11423_trj_169'
out_name='11423_trj_151'
)

# Faturize ligand-protein hydrogen bonds
# Featurize ligand-protein hydrogen bonds

ref_file_a = root_dir + '/11580_dyn_169.psf'
pdb_file_a = root_dir + '/11579_dyn_169.pdb'
trj_file_a = root_dir + '/11578_trj_169.xtc'

names, data = read_h_bonds(
names, data = hbond_features.read_h_bonds(
ref_file_a, trj_file_a,
fixed_group='resname 4VO',
dyn_group='protein',
write=True,
out_name='4VO_hbonds'
)
38 changes: 18 additions & 20 deletions tutorial/PENSA_Tutorial_SSI.py → scripts/state_specific_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
if not os.path.exists(subdir):
os.makedirs(subdir)

# Extract the coordinates of the receptor from the trajectory
# # # Extract the coordinates of the receptor from the trajectory
extract_coordinates(
ref_file_a, pdb_file_a, trj_file_a,
out_name_a + "_receptor", sel_base_a
Expand All @@ -62,7 +62,7 @@
out_name_b + "_receptor", sel_base_b
)

# Extract the features from the beginning (start_frame) of the trajectory
# # Extract the features from the beginning (start_frame) of the trajectory
start_frame = 0
a_rec = read_structure_features(
out_name_a + "_receptor.gro",
Expand All @@ -81,9 +81,9 @@
out_name_a = "condition-a"
out_name_b = "condition-b"

# Extract the multivariate torsion coordinates of each residue as a
# timeseries from the trajectory and write into subdirectory
# output = [[torsion 1 timeseries], [torsion 2 timeseries], ..., [torsion n timeseries]]
# # Extract the multivariate torsion coordinates of each residue as a
# # timeseries from the trajectory and write into subdirectory
# # output = [[torsion 1 timeseries], [torsion 2 timeseries], ..., [torsion n timeseries]]
sc_multivar_res_feat_a, sc_multivar_res_data_a = get_multivar_res_timeseries(
a_rec_feat, a_rec_data, 'sc-torsions', write=True, out_name=out_name_a
)
Expand All @@ -96,16 +96,16 @@
discretize='gaussian', pbc=True
)

# We can calculate the State Specific Information (SSI) shared between the
# ensemble switch and the combined ensemble residue conformations. As the ensemble
# is a binary change, SSI can exist within the range [0, 1] units=bits.
# 0 bits = no information, 1 bits = maximum information, i.e. you can predict the state of the ensemble
# with certainty from the state of the residue.
# Set write_plots = True to generate a folder with all the clustered states for each residue.
# # We can calculate the State Specific Information (SSI) shared between the
# # ensemble switch and the combined ensemble residue conformations. As the ensemble
# # is a binary change, SSI can exist within the range [0, 1] units=bits.
# # 0 bits = no information, 1 bits = maximum information, i.e. you can predict the state of the ensemble
# # with certainty from the state of the residue.
# # Set write_plots = True to generate a folder with all the clustered states for each residue.
data_names, data_ssi = ssi_ensemble_analysis(
sc_multivar_res_feat_a['sc-torsions'], sc_multivar_res_feat_b['sc-torsions'],
sc_multivar_res_data_a['sc-torsions'], sc_multivar_res_data_b['sc-torsions'],
discrete_states_ab, verbose=True, write_plots=False
discrete_states_ab, verbose=True, write_plots=True
)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
Expand Down Expand Up @@ -149,26 +149,25 @@

# Extract the combined density of the waters in both ensembles a and b
extract_combined_grid(
out_name_a + ".gro", "dens/cond-a_wateraligned.xtc",
out_name_a + ".gro", "traj/cond-a_water_aligned.xtc",
out_name_b + ".gro", out_name_b + ".xtc",
atomgroup="OH2",
write_grid_as="TIP3P",
out_name="ab_grid_",
use_memmap=True
)

grid_combined = "dens/ab_grid_OH2_density.dx"
grid_combined = "ab_grid_OH2_density.dx"

# Then we featurize the waters common to both simulations
# We can do the same analysis for ions using the get_atom_features featurizer.

water_feat_a, water_data_a = read_water_features(
structure_input=out_name_a + ".gro",
xtc_input="dens/cond-a_wateraligned.xtc",
xtc_input="traj/cond-a_water_aligned.xtc",
top_waters=2,
atomgroup="OH2",
grid_input=grid_combined,
write=True,
out_name="cond_a"
)

Expand All @@ -178,15 +177,14 @@
top_waters=2,
atomgroup="OH2",
grid_input=grid_combined,
write=True,
out_name="cond_b"
)

# Calculating SSI is then exactly the same as for residues
# Calculating SSI is then exactly the same as for residues, with the h2o argument set to True.
discrete_states_ab1 = get_discrete_states(
water_data_a['WaterPocket_Distr'],
water_data_b['WaterPocket_Distr'],
discretize='gaussian', pbc=True
discretize='gaussian', pbc=True, h2o=True, write_plots=True
)

# SSI shared between waters and the switch between ensemble conditions
Expand All @@ -198,7 +196,7 @@

# Alternatively we can see if the pocket occupancy (the presence/absence of water at the site) shares SSI
# Currently this is only enabled with ssi_ensemble_analysis. We need to turn off the periodic boundary conditions
# as the distributions are no longer periodic.
# as the distributions are no longer periodic.

discrete_states_ab2 = get_discrete_states(
water_data_a['WaterPocket_OccupDistr'],
Expand Down
Empty file modified tutorial/0-download.sh
100644 → 100755
Empty file.
Empty file modified tutorial/1-preprocessing-gpcrmd.sh
100644 → 100755
Empty file.
Empty file modified tutorial/2-comparison-of-feature-distributions.sh
100644 → 100755
Empty file.
Empty file modified tutorial/3-principal-component-analysis.sh
100644 → 100755
Empty file.
Empty file modified tutorial/4-clustering.sh
100644 → 100755
Empty file.
8 changes: 8 additions & 0 deletions tutorial/5-density-hbond-featurization.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash



#python ../scripts/hbond_featurizer.py

python ../scripts/density_featurizer.py

4 changes: 4 additions & 0 deletions tutorial/6-state-specific-information.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

python ../scripts/state_specific_info.py

16 changes: 16 additions & 0 deletions tutorial/runsh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash



./1-preprocessing-gpcrmd.sh

./2-comparison-of-feature-distributions.sh

./3-principal-component-analysis.sh

./4-clustering.sh

./5-density-hbond-featurization.sh

./6-state-specific-information.sh
-

0 comments on commit a1c37e8

Please sign in to comment.