adapt mods when using all PTM model

wilhelm-lab · Nov 4, 2024 · 0d7ab1b · 0d7ab1b
1 parent 58b8375
commit 0d7ab1b
Show file tree

Hide file tree

Showing 7 changed files with 28 additions and 4 deletions.
diff --git a/spectrum_io/search_result/mascot.py b/spectrum_io/search_result/mascot.py
@@ -25,6 +25,7 @@ def read_result(
         custom_mods: Optional[Dict[str, int]] = None,
         ptm_unimod_id: Optional[int] = 0,
         ptm_sites: Optional[list[str]] = None,
+        ptm_model: bool = False
     ) -> pd.DataFrame:
         """
         Function to read a mascot msf file and perform some basic formatting.

diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py
@@ -74,6 +74,7 @@ def read_result(
         custom_mods: dict[str, int] | None = None,
         ptm_unimod_id: int | None = 0,
         ptm_sites: list[str] | None = None,
+        ptm_model: bool = False
     ) -> pd.DataFrame:
         """
         Function to read a msms txt and perform some basic formatting.
@@ -87,6 +88,8 @@ def read_result(
         :return: pd.DataFrame with the formatted data
         """
         parsed_mods = parse_mods(self.standard_mods | (custom_mods or {}))
+        if ptm_model:
+            parsed_mods = c.MAXQUANT_VAR_MODS
         if tmt_label:
             unimod_tag = c.TMT_MODS[tmt_label]
             parsed_mods["K"] = f"K{unimod_tag}"

diff --git a/spectrum_io/search_result/msamanda.py b/spectrum_io/search_result/msamanda.py
@@ -25,6 +25,7 @@ def read_result(
         custom_mods: dict[str, int] | None = None,
         ptm_unimod_id: int | None = 0,
         ptm_sites: list[str] | None = None,
+        ptm_model: bool = False,
         suffix: str = "output.csv",
     ) -> pd.DataFrame:
         """

diff --git a/spectrum_io/search_result/msfragger.py b/spectrum_io/search_result/msfragger.py
@@ -6,7 +6,6 @@
 import pandas as pd
 import spectrum_fundamentals.constants as c
 from pyteomics import pepxml
-from spectrum_fundamentals.constants import MSFRAGGER_VAR_MODS
 from spectrum_fundamentals.mod_string import add_permutations, internal_without_mods
 from tqdm import tqdm
 
@@ -21,13 +20,25 @@ class MSFragger(SearchResults):
     @property
     def standard_mods(self):
         """Standard modifications that are always applied if not otherwise specified."""
-        return {"C[160]": 4, "M[147]": 35, "R[157]": 7, "Q[129]": 7, "N[115]": 7}
+        return {"C[160]": 4, "M[147]": 35, "R[157]": 7, "Q[129]": 7, "N[115]": 7,
+                }
+
+    @staticmethod
+    def fix_similar_mz(seq_modifications):
+        sequence = seq_modifications['modified_peptide']
+        mods = seq_modifications['modifications']
+        if 'K[170]' in sequence:
+            if '170.10' in mods:
+               sequence = sequence.replace('K[170]','K[170.10]')
+            else:
+               sequence =sequence.replace('K[170]','K[170.14]')
+        return sequence
 
     def filter_valid_prosit_sequences(self):
         """Filter valid Prosit sequences."""
         logger.info(f"#sequences before filtering for valid prosit sequences: {len(self.results.index)}")
         # retain only peptides that fall within [7, 30] length supported by Prosit
-        self.results = self.results[(self.results["PEPTIDE_LENGTH"] <= 30) & (self.results["PEPTIDE_LENGTH"] >= 7)]
+        self.results = self.results[(self.results["PEPTIDE_LENGTH"] <= 30) & (self.results["PEPTIDE_LENGTH"] >= 6)]
         # remove unsupported mods to exclude
         self.results = self.results[~self.results["MODIFIED_SEQUENCE"].str.contains(r"\[\d+\]", regex=True)]
         # remove precursor charges greater than 6
@@ -42,6 +53,7 @@ def read_result(
         custom_mods: dict[str, int] | None = None,
         ptm_unimod_id: int | None = 0,
         ptm_sites: list[str] | None = None,
+        ptm_model: bool = False
     ) -> pd.DataFrame:
         """
         Function to read a msms txt and perform some basic formatting.
@@ -56,6 +68,9 @@ def read_result(
         :return: pd.DataFrame with the formatted data
         """
         parsed_mods = parse_mods(self.standard_mods | (custom_mods or {}))
+        #TODO: fix model parsing for PTM model
+        if ptm_model:
+            parsed_mods = c.MSFRAGGER_VAR_MODS
         if tmt_label:
             unimod_tag = c.TMT_MODS[tmt_label]
             parsed_mods["K"] = f"K{unimod_tag}"
@@ -72,6 +87,7 @@ def read_result(
             ms_frag_results.append(pepxml.DataFrame(str(pep_xml_file)))
 
         self.results = pd.concat(ms_frag_results)
+        self.results['modified_peptide'] = self.results[['modified_peptide','modifications']].apply(MSFragger.fix_similar_mz,axis=1)
 
         self.convert_to_internal(mods=parsed_mods, ptm_unimod_id=ptm_unimod_id, ptm_sites=ptm_sites)
         return self.filter_valid_prosit_sequences()

diff --git a/spectrum_io/search_result/sage.py b/spectrum_io/search_result/sage.py
@@ -26,6 +26,7 @@ def read_result(
         custom_mods: dict[str, int] | None = None,
         ptm_unimod_id: int | None = 0,
         ptm_sites: list[str] | None = None,
+        ptm_model: bool = False
     ) -> pd.DataFrame:
         """
         Function to read a msms tsv and perform some basic formatting.

diff --git a/spectrum_io/search_result/search_results.py b/spectrum_io/search_result/search_results.py
@@ -118,6 +118,7 @@ def generate_internal(
         custom_mods: dict[str, int] | None = None,
         ptm_unimod_id: int | None = 0,
         ptm_sites: list[str] | None = None,
+        ptm_model: bool = False
     ) -> pd.DataFrame:
         """
         Generate df and save to out_path if provided.
@@ -145,7 +146,7 @@ def generate_internal(
             return csv.read_file(out_path)
 
         # convert, save and return
-        df = self.read_result(tmt_label, custom_mods=custom_mods, ptm_unimod_id=ptm_unimod_id, ptm_sites=ptm_sites)[
+        df = self.read_result(tmt_label, custom_mods=custom_mods, ptm_unimod_id=ptm_unimod_id, ptm_sites=ptm_sites,ptm_model=ptm_model)[
             COLUMNS
         ]
         csv.write_file(df, out_path)

diff --git a/spectrum_io/search_result/xisearch.py b/spectrum_io/search_result/xisearch.py
@@ -18,6 +18,7 @@ def read_result(
         custom_mods: Optional[Dict[str, int]] = None,
         ptm_unimod_id: Optional[int] = 0,
         ptm_sites: Optional[list[str]] = None,
+        ptm_model: bool = False
     ) -> pd.DataFrame:
         """
         Function to read a csv of CSMs and perform some basic formatting.