-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds an example of how to compute saturation mutagenesis using RaSP
- Loading branch information
1 parent
b3e8694
commit 5c25427
Showing
3 changed files
with
4,777 additions
and
0 deletions.
There are no files selected for viewing
77 changes: 77 additions & 0 deletions
77
...ation_mutagenesis_for_two_proteins_using_rasp/saturation_mutagenesis_at_first_position.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
""" | ||
In this example, we use RaSP to predict a saturation | ||
mutagenesis for two proteins at the first position. | ||
This example relies on pandas, so remember to | ||
install it in your current environment: | ||
pip install pandas | ||
""" | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
from poli.core.util.proteins.defaults import AMINO_ACIDS | ||
from poli import objective_factory | ||
|
||
THIS_DIR = Path(__file__).parent.resolve() | ||
|
||
|
||
def saturation_mutagenesis_for_protein_at_position( | ||
x: np.ndarray, position: int | ||
) -> np.ndarray: | ||
""" | ||
This function takes a string representation of a | ||
protein sequence and returns a numpy array of shape | ||
(20, len(x)) containing the effect of all possible | ||
single point mutations. | ||
""" | ||
assert x[position] != "" | ||
|
||
mutations = [] | ||
for amino_acid in AMINO_ACIDS: | ||
mutation = x.copy() | ||
mutation[position] = amino_acid | ||
mutations.append(mutation) | ||
|
||
return np.array(mutations) | ||
|
||
|
||
if __name__ == "__main__": | ||
wildtype_pdb_paths_for_rasp = list((THIS_DIR / "two_proteins").glob("*.pdb")) | ||
|
||
_, f_rasp, x0, y0, _ = objective_factory.create( | ||
name="rasp", | ||
wildtype_pdb_path=wildtype_pdb_paths_for_rasp, | ||
) | ||
|
||
# At this point, x0 contains the string | ||
# representations of the wildtype sequences. | ||
# Let's construct the saturation mutagenesis | ||
# of each of these: | ||
mutations_for_first_protein = saturation_mutagenesis_for_protein_at_position( | ||
x0[0], 0 | ||
) | ||
mutations_for_second_protein = saturation_mutagenesis_for_protein_at_position( | ||
x0[1], 0 | ||
) | ||
|
||
# Now, we can predict the effect of each of these | ||
# mutations using RaSP: | ||
x = np.vstack([mutations_for_first_protein, mutations_for_second_protein]) | ||
|
||
y = f_rasp(x) | ||
|
||
# Saving the results in a CSV file: | ||
df = pd.DataFrame( | ||
[ | ||
{ | ||
"mutation": "".join(x_i), | ||
"score": y_i, | ||
} | ||
for x_i, y_i in zip(x, y.flatten()) | ||
] | ||
) | ||
|
||
print(df.head()) |
Oops, something went wrong.