-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path20220413-DA53-msAlign-QC-exporter.vbs
284 lines (249 loc) · 10.4 KB
/
20220413-DA53-msAlign-QC-exporter.vbs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
'***** Bruker DataAnalysis msAlign Exporter
' Created by Matt Willetts (Bruker), Kyle A. Brown (U-Wisconsin), and David L. Tabb (Institut Pasteur)
' This software makes it possible to conduct TopPIC searches on data from Bruker Q-TOF instruments.
' The SNAP / AutoMSn code at the very top handles "deconvolution."
' Everything else crafts an msalign file from the list of Compound objects.
'***** To Do:
' How can we detect the precursor charge better? The loop through MS1 peaks seems error-prone.
' How can we specify the settings for SNAP and AutoMSn directly rather than relying on users?
' How do we "rescue" MS/MS scans for which no precursor charge is reported?
' Do we need to write "FEATURE" files for better TopPIC compatibility?
' Localise for US, using '.' rather than ',' as a decimal separator
SetLocale(1033)
Const Proton = 1.00727647
Const IntensityMultiplier = 10
Const MZTolerance = 0.001
Const ZCeiling = 100
Const PkCountCeiling = 1000
Dim ZDistn(100)
Dim PkCountDistn(1000)
Dim PathAndFile, objFSO, msalignFile, qcFile, NoZCount
Dim ZMinimum, ZQuartile1, ZMedian, ZQuartile3, ZMaximum, ZMode
Dim PkCountMinimum, PkCountQuartile1, PkCountMedian, PkCountQuartile3, PkCountMaximum
'***** Run deconvolution in SNAP / MaxEnt / AutoMSn
'***** Configure for SNAP peak picking
Analysis.Method.MassListParameters.DetectionAlgorithm = 2
'***** Just select everything for peak picking
Analysis.ClearChromatogramRangeSelections
Analysis.AddChromatogramRangeSelection 0, 1000
Analysis.FindAutoMSn
On Error Resume Next
Analysis.Save
'***** Set up our output file for writing
Set objFSO = CreateObject("Scripting.FileSystemObject")
PathAndFile = Left(Analysis.Path, Len(Analysis.Path)-2) & ".msalign"
Set msalignFile = objFSO.CreateTextFile(PathAndFile)
PathAndFile = Left(Analysis.Path, Len(Analysis.Path)-2) & ".qc.tsv"
Set qcFile = objFSO.CreateTextFile(PathAndFile)
For Looper = 0 to ZCeiling
ZDistn(Looper) = 0
Next
For Looper = 0 to PkCountCeiling
PkCountDistn(Looper) = 0
Next
For Each ThisCompound In Analysis.Compounds
Dim MS1Spec, MS2Spec, MSMSType, MS1ScanNumber, MS2ScanNumber
Dim PrecursorMass, PrecursorCharge, PrecursorMZ, PrecursorIntensity, PrecursorRT
Dim StartPos, StopPos, ThisPeak, PeakCounter
Set MS1Spec = ThisCompound(1)
Set MS2Spec = ThisCompound(2)
'***** Determine the dissociation type (based on whether or not component Name contains the string "ETD").
If InStr(ThisCompound.Name,"ETD") Then
MSMSType = "ETD"
Else
MSMSType = "CID"
End If
'***** Determine the precursor m/z and mass, since we will need that to determine its charge
If IsNumeric(ThisCompound.Precursor) Then
PrecursorMZ = ThisCompound.Precursor
Else
'***** Otherwise grab the mass from the string naming this component
StartPos = InStr(ThisCompound.Name,"(") + 1
StopPos = InStr(ThisCompound.Name,")")
PrecursorMZ = Mid(ThisCompound.Name,StartPos,StopPos-StartPos)
End If
If IsNumeric(ThisCompound.RetentionTime) Then
PrecursorRT = ThisCompound.RetentionTime
Else
PrecursorRT = 0
End If
If IsNumeric(ThisCompound.Intensity) Then
PrecursorIntensity = ThisCompound.Intensity
Else
PrecursorIntensity = 0
End If
'***** Determine the precursor charge state, keeping track of how many precursors cannot be matched back to the MS scans.
PrecursorCharge = -1
For Each ThisPeak in MS1Spec.MSPeakList
If Abs(ThisPeak.m_over_z - PrecursorMZ)< MZTolerance Then
PrecursorCharge = ThisPeak.ChargeState
Exit For
End If
Next
If PrecursorCharge = -1 Then
ZDistn(0) = ZDistn(0) + 1
' If we couldn't determine the precursor charge we call it a +1 in the msalign file.
PrecursorCharge = 1
PrecursorMass = PrecursorMZ - Proton
Else
' Record this precursor charge in the array of precursor charge frequences
ZDistn(PrecursorCharge) = ZDistn(PrecursorCharge) + 1
PrecursorMass = (PrecursorMZ*PrecursorCharge) - (PrecursorCharge*Proton)
End If
'***** Get the first MS scan number to report for this compound
StartPos = InStr(MS1Spec.Name,"#")
StopPos = InStrRev(MS1Spec.Name,"-")
If StopPos = 0 Then
MS1ScanNumber=Mid(MS1Spec.Name, StartPos+1)
Else
MS1ScanNumber=Mid(MS1Spec.Name, StartPos+1, StopPos-(StartPos+1))
End If
'***** Get the first MS/MS scan number to report for this compound
StartPos = InStr(MS2Spec.Name,"#")
StopPos = InStrRev(MS2Spec.Name,"-")
If StopPos = 0 Then
MS2ScanNumber=Mid(MS2Spec.Name, StartPos+1)
Else
MS2ScanNumber=Mid(MS2Spec.Name, StartPos+1, StopPos-(StartPos+1))
End If
' Add the number of MS/MS peaks for this spectrum to the Peak Count distribution
PkCountDistn(MS2Spec.MSPeakList.Count) = PkCountDistn(MS2Spec.MSPeakList.Count)+1
'***** Now write this compound header to the msAlign file
msalignFile.WriteLine("BEGIN IONS")
msalignFile.WriteLine("ID=" & ThisCompound.CompoundNumber)
msalignFile.WriteLine("FRACTION_ID=1")
msalignFile.WriteLine("FILE_NAME=" & Analysis.Name)
msalignFile.WriteLine("SCANS=" & MS2ScanNumber)
msalignFile.WriteLine("RETENTION_TIME=" & PrecursorRT)
' Naturally the following will require update when something beyond MS/MS is being used.
msalignFile.WriteLine("LEVEL=2")
msalignFile.WriteLine("ACTIVATION=" & MSMSType)
msalignFile.WriteLine("MS_ONE_ID=" & ThisCompound.CompoundNumber)
msalignFile.WriteLine("MS_ONE_SCAN=" & MS1ScanNumber)
msalignFile.WriteLine("PRECURSOR_MZ=" & PrecursorMZ)
msalignFile.WriteLine("PRECURSOR_CHARGE=" & PrecursorCharge)
msalignFile.WriteLine("PRECURSOR_MASS=" & PrecursorMass)
' TopFD writes floating point precursor intensities, but Bruker gives us integers
msalignFile.WriteLine("PRECURSOR_INTENSITY=" & PrecursorIntensity & ".00")
'***** Now write the compound MS/MS peaks to the msAlign file
For Each ThisPeak in MS2Spec.MSPeakList
msalignFile.Write(Round(ThisPeak.ChargeState * (ThisPeak.m_over_z - Proton),4))
msalignFile.Write(" ")
msalignFile.Write(Round(ThisPeak.Intensity * IntensityMultiplier,2))
msalignFile.Write(" ")
msalignFile.WriteLine(ThisPeak.ChargeState)
Next
msalignFile.WriteLine("END IONS" & vbCrLf)
'***** We're done with this Compound. Continue to the next one.
Next
'***** Now complete writing the msalign file
msalignFile.Close
'***** QC METRIC COMPUTATION
'***** Compute quartiles for Precursor Z, skipping unknown precursor charges
ZMinimum = -1
ZQuartile1 = -1
ZMedian = -1
ZQuartile3 = -1
ZMaximum = -1
ZMode = -1
PositiveBinSum = 0
BiggestBinFreq = 0
For Looper = 1 to ZCeiling
If ZDistn(Looper) > 0 Then
PositiveBinSum = PositiveBinSum + ZDistn(Looper)
If ZMinimum = -1 Then
ZMinimum = Looper
End If
If ZDistn(Looper) > BiggestBinFreq Then
BiggestBinFreq=ZDistn(Looper)
ZMode = Looper
End If
ZMaximum = Looper
End If
Next
' What target number of spectra must be taken into account to find the first, second, and third quartiles?
Q1BinSum = PositiveBinSum / 4
Q2BinSum = PositiveBinSum / 2
Q3BinSum = Q1BinSum + Q2BinSum
PositiveBinSum = 0
For Looper = 1 to ZCeiling
PositiveBinSum = PositiveBinSum + ZDistn(Looper)
If ZQuartile1 = -1 AND PositiveBinSum >= Q1BinSum Then
ZQuartile1 = Looper
End IF
If ZMedian = -1 AND PositiveBinSum >= Q2BinSum Then
ZMedian = Looper
End IF
If ZQuartile3 = -1 AND PositiveBinSum >= Q3BinSum Then
ZQuartile3 = Looper
End IF
Next
'***** Compute quartiles for MS/MS Peak Count
PkCountMinimum = -1
PkCountQuartile1 = -1
PkCountMedian = -1
PkCountQuartile3 = -1
PkCountMaximum = -1
PkCountMode = -1
PositiveBinSum = 0
BiggestBinFreq = 0
For Looper = 0 to PkCountCeiling
If PkCountDistn(Looper) > 0 Then
PositiveBinSum = PositiveBinSum + PkCountDistn(Looper)
If PkCountMinimum = -1 Then
PkCountMinimum = Looper
End If
If PkCountDistn(Looper) > BiggestBinFreq Then
BiggestBinFreq=PkCountDistn(Looper)
PkCountMode = Looper
End If
PkCountMaximum = Looper
End If
Next
' What target number of spectra must be taken into account to find the first, second, and third quartiles?
Q1BinSum = PositiveBinSum / 4
Q2BinSum = PositiveBinSum / 2
Q3BinSum = Q1BinSum + Q2BinSum
PositiveBinSum = 0
For Looper = 0 to PkCountCeiling
PositiveBinSum = PositiveBinSum + PkCountDistn(Looper)
If PkCountQuartile1 = -1 AND PositiveBinSum >= Q1BinSum Then
PkCountQuartile1 = Looper
End IF
If PkCountMedian = -1 AND PositiveBinSum >= Q2BinSum Then
PkCountMedian = Looper
End IF
If PkCountQuartile3 = -1 AND PositiveBinSum >= Q3BinSum Then
PkCountQuartile3 = Looper
End IF
Next
qcFile.WriteLine("AllSpectraCount" & vbTab & Analysis.SpectraCount)
qcFile.WriteLine("AutoMSnCompoundCount" & vbTab & Analysis.Compounds.Count)
qcFile.WriteLine("CompoundsLackingZ" & vbTab & ZDistn(0))
qcFile.WriteLine()
qcFile.WriteLine("ZMinimum" & vbTab & ZMinimum)
qcFile.WriteLine("ZQuartile1" & vbTab & ZQuartile1)
qcFile.WriteLine("ZMedian" & vbTab & ZMedian)
qcFile.WriteLine("ZQuartile3" & vbTab & ZQuartile3)
qcFile.WriteLine("ZMaximum" & vbTab & ZMaximum)
qcFile.WriteLine("ZMode" & vbTab & ZMode)
qcFile.WriteLine()
qcFile.WriteLine("MS2PkCountMinimum" & vbTab & PkCountMinimum)
qcFile.WriteLine("MS2PkCountQuartile1" & vbTab & PkCountQuartile1)
qcFile.WriteLine("MS2PkCountMedian" & vbTab & PkCountMedian)
qcFile.WriteLine("MS2PkCountQuartile3" & vbTab & PkCountQuartile3)
qcFile.WriteLine("MS2PkCountMaximum" & vbTab & PkCountMaximum)
qcFile.WriteLine("MS2PkCountMode" & vbTab & PkCountMode)
qcFile.WriteLine()
qcFile.WriteLine("CompoundZ" & vbTab & "Frequency")
For Looper = 0 to ZMaximum
qcFile.WriteLine(Looper & vbTab & ZDistn(Looper))
Next
qcFile.WriteLine()
qcFile.WriteLine("MS2PkCount" & vbTab & "Frequency")
For Looper = 0 to PkCountMaximum
qcFile.WriteLine(Looper & vbTab & PkCountDistn(Looper))
Next
qcFile.Close
MsgBox ("Wrote .msalign and .qc.tsv file successfully.")
Form.Close