Skip to content

Commit

Permalink
adapt mods when using all PTM model
Browse files Browse the repository at this point in the history
  • Loading branch information
WassimG committed Nov 4, 2024
1 parent 58b8375 commit 0d7ab1b
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 4 deletions.
1 change: 1 addition & 0 deletions spectrum_io/search_result/mascot.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def read_result(
custom_mods: Optional[Dict[str, int]] = None,
ptm_unimod_id: Optional[int] = 0,
ptm_sites: Optional[list[str]] = None,
ptm_model: bool = False
) -> pd.DataFrame:
"""
Function to read a mascot msf file and perform some basic formatting.
Expand Down
3 changes: 3 additions & 0 deletions spectrum_io/search_result/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def read_result(
custom_mods: dict[str, int] | None = None,
ptm_unimod_id: int | None = 0,
ptm_sites: list[str] | None = None,
ptm_model: bool = False
) -> pd.DataFrame:
"""
Function to read a msms txt and perform some basic formatting.
Expand All @@ -87,6 +88,8 @@ def read_result(
:return: pd.DataFrame with the formatted data
"""
parsed_mods = parse_mods(self.standard_mods | (custom_mods or {}))
if ptm_model:
parsed_mods = c.MAXQUANT_VAR_MODS
if tmt_label:
unimod_tag = c.TMT_MODS[tmt_label]
parsed_mods["K"] = f"K{unimod_tag}"
Expand Down
1 change: 1 addition & 0 deletions spectrum_io/search_result/msamanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def read_result(
custom_mods: dict[str, int] | None = None,
ptm_unimod_id: int | None = 0,
ptm_sites: list[str] | None = None,
ptm_model: bool = False,
suffix: str = "output.csv",
) -> pd.DataFrame:
"""
Expand Down
22 changes: 19 additions & 3 deletions spectrum_io/search_result/msfragger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import pandas as pd
import spectrum_fundamentals.constants as c
from pyteomics import pepxml
from spectrum_fundamentals.constants import MSFRAGGER_VAR_MODS
from spectrum_fundamentals.mod_string import add_permutations, internal_without_mods
from tqdm import tqdm

Expand All @@ -21,13 +20,25 @@ class MSFragger(SearchResults):
@property
def standard_mods(self):
"""Standard modifications that are always applied if not otherwise specified."""
return {"C[160]": 4, "M[147]": 35, "R[157]": 7, "Q[129]": 7, "N[115]": 7}
return {"C[160]": 4, "M[147]": 35, "R[157]": 7, "Q[129]": 7, "N[115]": 7,
}

@staticmethod
def fix_similar_mz(seq_modifications):
sequence = seq_modifications['modified_peptide']
mods = seq_modifications['modifications']
if 'K[170]' in sequence:
if '170.10' in mods:
sequence = sequence.replace('K[170]','K[170.10]')
else:
sequence =sequence.replace('K[170]','K[170.14]')
return sequence

def filter_valid_prosit_sequences(self):
"""Filter valid Prosit sequences."""
logger.info(f"#sequences before filtering for valid prosit sequences: {len(self.results.index)}")
# retain only peptides that fall within [7, 30] length supported by Prosit
self.results = self.results[(self.results["PEPTIDE_LENGTH"] <= 30) & (self.results["PEPTIDE_LENGTH"] >= 7)]
self.results = self.results[(self.results["PEPTIDE_LENGTH"] <= 30) & (self.results["PEPTIDE_LENGTH"] >= 6)]
# remove unsupported mods to exclude
self.results = self.results[~self.results["MODIFIED_SEQUENCE"].str.contains(r"\[\d+\]", regex=True)]
# remove precursor charges greater than 6
Expand All @@ -42,6 +53,7 @@ def read_result(
custom_mods: dict[str, int] | None = None,
ptm_unimod_id: int | None = 0,
ptm_sites: list[str] | None = None,
ptm_model: bool = False
) -> pd.DataFrame:
"""
Function to read a msms txt and perform some basic formatting.
Expand All @@ -56,6 +68,9 @@ def read_result(
:return: pd.DataFrame with the formatted data
"""
parsed_mods = parse_mods(self.standard_mods | (custom_mods or {}))
#TODO: fix model parsing for PTM model
if ptm_model:
parsed_mods = c.MSFRAGGER_VAR_MODS
if tmt_label:
unimod_tag = c.TMT_MODS[tmt_label]
parsed_mods["K"] = f"K{unimod_tag}"
Expand All @@ -72,6 +87,7 @@ def read_result(
ms_frag_results.append(pepxml.DataFrame(str(pep_xml_file)))

self.results = pd.concat(ms_frag_results)
self.results['modified_peptide'] = self.results[['modified_peptide','modifications']].apply(MSFragger.fix_similar_mz,axis=1)

self.convert_to_internal(mods=parsed_mods, ptm_unimod_id=ptm_unimod_id, ptm_sites=ptm_sites)
return self.filter_valid_prosit_sequences()
Expand Down
1 change: 1 addition & 0 deletions spectrum_io/search_result/sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def read_result(
custom_mods: dict[str, int] | None = None,
ptm_unimod_id: int | None = 0,
ptm_sites: list[str] | None = None,
ptm_model: bool = False
) -> pd.DataFrame:
"""
Function to read a msms tsv and perform some basic formatting.
Expand Down
3 changes: 2 additions & 1 deletion spectrum_io/search_result/search_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def generate_internal(
custom_mods: dict[str, int] | None = None,
ptm_unimod_id: int | None = 0,
ptm_sites: list[str] | None = None,
ptm_model: bool = False
) -> pd.DataFrame:
"""
Generate df and save to out_path if provided.
Expand Down Expand Up @@ -145,7 +146,7 @@ def generate_internal(
return csv.read_file(out_path)

# convert, save and return
df = self.read_result(tmt_label, custom_mods=custom_mods, ptm_unimod_id=ptm_unimod_id, ptm_sites=ptm_sites)[
df = self.read_result(tmt_label, custom_mods=custom_mods, ptm_unimod_id=ptm_unimod_id, ptm_sites=ptm_sites,ptm_model=ptm_model)[
COLUMNS
]
csv.write_file(df, out_path)
Expand Down
1 change: 1 addition & 0 deletions spectrum_io/search_result/xisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def read_result(
custom_mods: Optional[Dict[str, int]] = None,
ptm_unimod_id: Optional[int] = 0,
ptm_sites: Optional[list[str]] = None,
ptm_model: bool = False
) -> pd.DataFrame:
"""
Function to read a csv of CSMs and perform some basic formatting.
Expand Down

0 comments on commit 0d7ab1b

Please sign in to comment.