Skip to content

Commit

Permalink
Overhaul the modification matching for the peptide_list.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Oct 11, 2024
1 parent 56ca8c3 commit ae56090
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ private static void runSkyline(String skylinePath, Path wd, String skylineVersio

Path peptideListPath = skylineOutputDir.resolve("peptide_list.txt").toAbsolutePath();
WritePeptideList pepWriter = new WritePeptideList();
Map<String, Set<String>> addedMods = pepWriter.writePeptideList(psmTsvFiles, peptideListPath);
Map<Float, Set<String>> addedMods = pepWriter.writePeptideList(psmTsvFiles, peptideListPath);

Path modXmlPath = wd.resolve("mod.xml");
WriteSkyMods writeSkyMods = new WriteSkyMods(modXmlPath, pf, modsMode, matchUnimod, !useSpeclib, addedMods);
Expand Down Expand Up @@ -256,6 +256,8 @@ private static void runSkyline(String skylinePath, Path wd, String skylineVersio
}

for (String s : lcmsFiles) {
// todo: Skyline automatically looking for the LC-MS files in specific directories and import them even though they should noe be imported for quant because they are just for library building
// todo: if there are LC-MS files in the directories where Skyline was looking for the files, the same files will be imported twice
writer.write("--import-file=" + s + " ");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@
import java.util.regex.Pattern;

public class WritePeptideList {

private static Map<String, Integer> columns;
private static final Pattern sitePattern = Pattern.compile("(\\d+)\\w\\(");
private static final Pattern massPattern = Pattern.compile("(\\([\\d.]+\\))");
private static final Pattern AApattern = Pattern.compile("\\d?([\\w-]+)\\(");
private static final Pattern varModPattern = Pattern.compile("([0-9]+)([A-Z])\\(([\\d.-]+)\\)");
private static final Pattern nTermModPattern = Pattern.compile("N-term\\(([\\d.-]+)\\)");
private static final Pattern cTermModPattern = Pattern.compile("C-term\\(([\\d.-]+)\\)");

public static final String COL_ASSIGNED_MODS = "Assigned Modifications";
public static final String COL_PEPTIDE = "Peptide";
public static final String COL_CHARGE = "Charge";
public static final String COL_PROTEIN = "Protein";


public Map<String, Set<String>> writePeptideList(Set<Path> psmtsvFiles, Path outputPath) throws IOException {
public Map<Float, Set<String>> writePeptideList(Set<Path> psmtsvFiles, Path outputPath) throws IOException {
Map<String, Set<String>> proteinMap = new HashMap<>();
Map<String, Set<String>> additiveMods = new HashMap<>();
Map<Float, Set<String>> additiveMods = new HashMap<>();

for (Path psmtsv: psmtsvFiles) {
BufferedReader reader = new BufferedReader(new FileReader(psmtsv.toFile()));
Expand Down Expand Up @@ -62,31 +63,48 @@ public Map<String, Set<String>> writePeptideList(Set<Path> psmtsvFiles, Path out
* of "+" characters.
* @return
*/
public static String generateModifiedPeptide(String[] psmSplits, Map<String, Integer> columns, boolean addCharge, Map<String, Set<String>> additiveMods) {
public static String generateModifiedPeptide(String[] psmSplits, Map<String, Integer> columns, boolean addCharge, Map<Float, Set<String>> additiveMods) {
String peptide = psmSplits[columns.get(COL_PEPTIDE)];
String mods = psmSplits[columns.get(COL_ASSIGNED_MODS)].trim();
Map<Integer, Float> modMap = new HashMap<>();

Matcher m = nTermModPattern.matcher(mods);
while (m.find()) {
Float f = modMap.get(1);
if (f != null) {
f += Float.parseFloat(m.group(1));
modMap.put(1, f);
additiveMods.computeIfAbsent(f, k -> new HashSet<>()).add("n^");
} else {
modMap.put(1, Float.parseFloat(m.group(1)));
}
}

String[] mods = psmSplits[columns.get(COL_ASSIGNED_MODS)].split(",");
Map<Integer, String> modMap = new TreeMap<>();
for (String mod : mods) {
Matcher siteMatch = sitePattern.matcher(mod);
int site;
if (siteMatch.find()) {
site = Integer.parseInt(siteMatch.group(1));
Matcher massMatch = massPattern.matcher(mod);
if (massMatch.find()) {
if (modMap.containsKey(site)) {
// handle multiple mods (e.g., 5C(57.0215),5C(100.00)) by adding masses together into a single mod
double mass = Double.parseDouble(massMatch.group(1).replace("(", "").replace(")", ""));
mass += Double.parseDouble(modMap.get(site).replace("[", "").replace("]", ""));
modMap.put(site, String.format("[%.5f]", mass));
// add mod to list for appending to mod.xml
additiveMods.computeIfAbsent(String.format("%.4f", mass), k -> new HashSet<>()).add(getSite(mod));
} else {
modMap.put(site, massMatch.group(1).replace("(", "[").replace(")", "]"));
}
}
m = cTermModPattern.matcher(mods);
while (m.find()) {
Float f = modMap.get(peptide.length());
if (f != null) {
f += Float.parseFloat(m.group(1));
modMap.put(peptide.length(), f);
additiveMods.computeIfAbsent(f, k -> new HashSet<>()).add("c^");
} else {
modMap.put(peptide.length(), Float.parseFloat(m.group(1)));
}
}

m = varModPattern.matcher(mods);
while (m.find()) {
int site = Integer.parseInt(m.group(1));
Float f = modMap.get(site);
if (f != null) {
f += Float.parseFloat(m.group(3));
modMap.put(site, f);
additiveMods.computeIfAbsent(f, k -> new HashSet<>()).add(m.group(2));
} else {
modMap.put(site, Float.parseFloat(m.group(3)));
}
}

String modPep = insertMods(peptide, modMap);
int charge = Integer.parseInt(psmSplits[columns.get(COL_CHARGE)]);
String chargeStr = addCharge ? "+".repeat(charge) : "";
Expand All @@ -96,34 +114,21 @@ public static String generateModifiedPeptide(String[] psmSplits, Map<String, Int
/**
* Generate a modified peptide String with all Assigned modifications placed within it
*/
private static String insertMods(String peptide, Map<Integer, String> modMap) {
StringBuilder modifiedPeptide = new StringBuilder(peptide);

// Offset to account for insertions
int offset = 0;

// Iterate through the sorted entries and insert the mods
for (Map.Entry<Integer, String> entry : modMap.entrySet()) {
int position = entry.getKey() + offset;
String mod = entry.getValue();

if (position >= 0 && position <= modifiedPeptide.length()) {
modifiedPeptide.insert(position, mod);
offset += mod.length();
private static String insertMods(String peptide, Map<Integer, Float> modMap) {
StringBuilder modifiedPeptide = new StringBuilder(peptide.length());
char[] aas = peptide.toCharArray();
for (int i = 0; i < aas.length; ++i) {
Float f = modMap.get(i + 1);
if (f != null) {
// With more decimal digits, there will be mismatches between the floating point values from FragPipe and Skyline.
modifiedPeptide.append(String.format("%c[%.1f]", aas[i], f));
} else {
modifiedPeptide.append(aas[i]);
}
}
return modifiedPeptide.toString();
}

// Return the AA (or terminus) of a given Assigned Mod
private static String getSite(String mod) {
Matcher m = AApattern.matcher(mod);
if (m.find()) {
return m.group(1);
}
return "";
}

private String initHeader(String header) {
columns = new HashMap<>();
String[] splits = header.split("\t");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public void writeSSL(Set<Path> psmtsvFiles, Path outputPath, boolean isPercolato
sslLine.append(scoreType).append("\t");
sslLine.append(splits[columns.get(COL_SCORE)]).append("\t");
double rt = Double.parseDouble(splits[columns.get(COL_RT)]);
sslLine.append(String.format("%.4f", rt / 60.0)).append("\t"); // RT in minutes
sslLine.append(rt / 60f).append("\t"); // RT in minutes

// add IM if present
if (checkIM) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public class WriteSkyMods {
}
}

public WriteSkyMods(Path path, PropsFile pf, int modsMode, boolean matchUnimod, boolean isSSL, Map<String, Set<String>> addedMods) throws Exception {
public WriteSkyMods(Path path, PropsFile pf, int modsMode, boolean matchUnimod, boolean isSSL, Map<Float, Set<String>> addedMods) throws Exception {
List<Mod> mods = new ArrayList<>(4);

String fixModStr = pf.getProperty("msfragger.table.fix-mods");
Expand Down Expand Up @@ -113,8 +113,8 @@ public WriteSkyMods(Path path, PropsFile pf, int modsMode, boolean matchUnimod,
}

// add any combined mods (multiple at one site) found during peptide list generation
for (Map.Entry<String, Set<String>> entry : addedMods.entrySet()) {
mass = Float.parseFloat(entry.getKey());
for (Map.Entry<Float, Set<String>> entry : addedMods.entrySet()) {
mass = entry.getKey();
mods.addAll(convertMods(String.join("", entry.getValue()), true, mass, mass, new ArrayList<>(), new ArrayList<>(), false));
}

Expand Down

0 comments on commit ae56090

Please sign in to comment.