Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
InternalCalibration.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Alexandra Zerck $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 
36 #ifndef OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
37 #define OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
44 // TODO remove dependency from file reader here!
46 
47 namespace OpenMS
48 {
49 
61  class OPENMS_DLLAPI InternalCalibration :
62  public DefaultParamHandler,
63  public ProgressLogger
64  {
65 public:
68 
71 
84  template <typename InputPeakType>
85  void calibrateMapSpectrumwise(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<double> & ref_masses);
86 
99  template <typename InputPeakType>
100  void calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<double> & ref_masses, String trafo_file_name = "");
101 
115  template <typename InputPeakType>
116  void calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name = "");
117 
126  void calibrateMapGlobally(const FeatureMap & feature_map, FeatureMap & calibrated_feature_map, String trafo_file_name = "");
127 
137  void calibrateMapGlobally(const FeatureMap & feature_map, FeatureMap & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name = "");
138 
139 
140 
141  template <typename InputPeakType>
142  void calibrateMapList(std::vector<MSExperiment<InputPeakType> > & exp_list, std::vector<MSExperiment<InputPeakType> > & calibrated_exp_list, std::vector<double> & ref_masses, std::vector<double> & detected_background_masses);
143 
144 
145 protected:
146 
148  void makeLinearRegression_(std::vector<double> & observed_masses, std::vector<double> & theoretical_masses);
149 
151  void checkReferenceIds_(std::vector<PeptideIdentification> & pep_ids);
152 
154  void checkReferenceIds_(const FeatureMap & feature_map);
155 
157  void applyTransformation_(const FeatureMap & feature_map, FeatureMap & calibrated_feature_map);
158 
161  }; // class InternalCalibration
162 
163 
164  template <typename InputPeakType>
165  void InternalCalibration::calibrateMapSpectrumwise(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<double> & ref_masses)
166  {
167 #ifdef DEBUG_CALIBRATION
168  std::cout.precision(writtenDigits<double>(0.0));
169 #endif
170  if (exp.empty())
171  {
172  std::cout << "Input is empty." << std::endl;
173  return;
174  }
175 
176  if (exp[0].getType() != SpectrumSettings::PEAKS)
177  {
178  std::cout << "Attention: this function is assuming peak data." << std::endl;
179  }
180  calibrated_exp = exp;
181 
182  Size num_ref_peaks = ref_masses.size();
183  bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm") ? true : false;
184  double mz_tol = param_.getValue("mz_tolerance");
185  startProgress(0, exp.size(), "calibrate spectra");
186  // for each spectrum
187  for (Size spec = 0; spec < exp.size(); ++spec)
188  {
189  // calibrate only MS1 spectra
190  if (exp[spec].getMSLevel() != 1)
191  {
192  continue;
193  }
194 
195 
196  std::vector<double> corr_masses, rel_errors, found_ref_masses;
197  UInt corr_peaks = 0;
198  for (Size peak = 0; peak < exp[spec].size(); ++peak)
199  {
200  for (Size ref_peak = 0; ref_peak < num_ref_peaks; ++ref_peak)
201  {
202  if (!use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) < mz_tol)
203  {
204  found_ref_masses.push_back(ref_masses[ref_peak]);
205  corr_masses.push_back(exp[spec][peak].getMZ());
206  ++corr_peaks;
207  break;
208  }
209  else if (use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6 < mz_tol)
210  {
211  found_ref_masses.push_back(ref_masses[ref_peak]);
212  corr_masses.push_back(exp[spec][peak].getMZ());
213  ++corr_peaks;
214  break;
215  }
216  }
217  }
218  if (corr_peaks < 2)
219  {
220  std::cout << "spec: " << spec
221  << " less than 2 reference masses were detected within a reasonable error range\n";
222  std::cout << "This spectrum cannot be calibrated!\n";
223  continue;
224  }
225 
226  // determine rel error in ppm for the two reference masses
227  for (Size ref_peak = 0; ref_peak < found_ref_masses.size(); ++ref_peak)
228  {
229  rel_errors.push_back((found_ref_masses[ref_peak] - corr_masses[ref_peak]) / corr_masses[ref_peak] * 1e6);
230  }
231 
232  makeLinearRegression_(corr_masses, found_ref_masses);
233 
234  // now calibrate the whole spectrum
235  for (unsigned int peak = 0; peak < calibrated_exp[spec].size(); ++peak)
236  {
237 #ifdef DEBUG_CALIBRATION
238  std::cout << calibrated_exp[spec][peak].getMZ() << "\t";
239 #endif
240  double mz = calibrated_exp[spec][peak].getMZ();
241  mz = trafo_.apply(mz);
242  calibrated_exp[spec][peak].setMZ(mz);
243 #ifdef DEBUG_CALIBRATION
244  std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
245 #endif
246 
247  }
248  setProgress(spec);
249  } // for(Size spec=0;spec < exp.size(); ++spec)
250  endProgress();
251  }
252 
253  template <typename InputPeakType>
255  MSExperiment<InputPeakType> & calibrated_exp,
256  std::vector<PeptideIdentification> & ref_ids, String trafo_file_name)
257  {
258  bool use_ppm = param_.getValue("mz_tolerance_unit") == "ppm" ? true : false;
259  double mz_tolerance = param_.getValue("mz_tolerance");
260  if (exp.empty())
261  {
262  std::cout << "Input is empty." << std::endl;
263  return;
264  }
265 
266  if (exp[0].getType() != SpectrumSettings::PEAKS)
267  {
268  std::cout << "Attention: this function is assuming peak data." << std::endl;
269  }
270  // check if the ids contain meta information about the peak positions
271  checkReferenceIds_(ref_ids);
272 
273  std::vector<double> theoretical_masses, observed_masses;
274  for (Size p_id = 0; p_id < ref_ids.size(); ++p_id)
275  {
276  for (Size p_h = 0; p_h < ref_ids[p_id].getHits().size(); ++p_h)
277  {
278  Int charge = ref_ids[p_id].getHits()[p_h].getCharge();
279  double theo_mass = ref_ids[p_id].getHits()[p_h].getSequence().getMonoWeight(Residue::Full, charge) / (double)charge;
280  // first find corresponding ms1-spectrum
281  typename MSExperiment<InputPeakType>::ConstIterator rt_iter = exp.RTBegin(ref_ids[p_id].getRT());
282  while (rt_iter != exp.begin() && rt_iter->getMSLevel() != 1)
283  {
284  --rt_iter;
285  }
286  // now find closest peak
287  typename MSSpectrum<InputPeakType>::ConstIterator mz_iter = rt_iter->MZBegin(ref_ids[p_id].getMZ());
288  //std::cout << mz_iter->getMZ() <<" "<<(double)ref_ids[p_id].getMZ()<<"\t";
289  double dist = ref_ids[p_id].getMZ() - mz_iter->getMZ();
290  //std::cout << dist << "\t";
291  if ((mz_iter + 1) != rt_iter->end()
292  && fabs((mz_iter + 1)->getMZ() - ref_ids[p_id].getMZ()) < fabs(dist)
293  && mz_iter != rt_iter->begin()
294  && fabs((mz_iter - 1)->getMZ() - ref_ids[p_id].getMZ()) < fabs((mz_iter + 1)->getMZ() - ref_ids[p_id].getMZ())) // if mz_iter +1 has smaller dist than mz_iter and mz_iter-1
295  {
296  if ((use_ppm &&
297  fabs((mz_iter + 1)->getMZ() - ref_ids[p_id].getMZ()) / ref_ids[p_id].getMZ() * 1e06 < mz_tolerance) ||
298  (!use_ppm && fabs((mz_iter + 1)->getMZ() - ref_ids[p_id].getMZ()) < mz_tolerance))
299  {
300  //std::cout <<(mz_iter +1)->getMZ() - ref_ids[p_id].getMZ()<<"\t";
301  observed_masses.push_back((mz_iter + 1)->getMZ());
302  theoretical_masses.push_back(theo_mass);
303  //std::cout << (mz_iter +1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
304  //<< "\tplus 1"<< std::endl;
305  }
306  }
307  else if (mz_iter != rt_iter->begin()
308  && fabs((mz_iter - 1)->getMZ() - ref_ids[p_id].getMZ()) < fabs(dist)) // if mz_iter-1 has smaller dist than mz_iter
309  {
310  if ((use_ppm &&
311  fabs((mz_iter - 1)->getMZ() - ref_ids[p_id].getMZ()) / ref_ids[p_id].getMZ() * 1e06 < mz_tolerance) ||
312  (!use_ppm && fabs((mz_iter - 1)->getMZ() - ref_ids[p_id].getMZ()) < mz_tolerance))
313  {
314  //std::cout <<(mz_iter -1)->getMZ() - ref_ids[p_id].getMZ()<<"\t";
315  observed_masses.push_back((mz_iter - 1)->getMZ());
316  theoretical_masses.push_back(theo_mass);
317  //std::cout << (mz_iter -1)->getMZ() << " ~ "<<theo_mass << " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
318  //<< "\tminus 1"<< std::endl;
319  }
320  }
321  else
322  {
323  if ((use_ppm &&
324  fabs((mz_iter)->getMZ() - ref_ids[p_id].getMZ()) / ref_ids[p_id].getMZ() * 1e06 < mz_tolerance) ||
325  (!use_ppm && fabs((mz_iter)->getMZ() - ref_ids[p_id].getMZ()) < mz_tolerance))
326  {
327 
328  observed_masses.push_back(mz_iter->getMZ());
329  theoretical_masses.push_back(theo_mass);
330 // std::cout <<"\t"<< mz_iter->getMZ() << " ~ "<<theo_mass<< " charge: "<<ref_ids[p_id].getHits()[p_h].getCharge()
331 // << "\tat mz_iter"<< std::endl;
332  }
333  }
334  }
335  }
336 
337  makeLinearRegression_(observed_masses, theoretical_masses);
338  static_cast<ExperimentalSettings &>(calibrated_exp) = exp;
339  calibrated_exp.resize(exp.size());
340 
341  // for each spectrum
342  for (Size spec = 0; spec < exp.size(); ++spec)
343  {
344  // calibrate only MS1 spectra
345  if (exp[spec].getMSLevel() != 1)
346  {
347  calibrated_exp[spec] = exp[spec];
348  continue;
349  }
350  // copy the spectrum meta data
351  calibrated_exp[spec] = exp[spec];
352 
353  for (unsigned int peak = 0; peak < exp[spec].size(); ++peak)
354  {
355 #ifdef DEBUG_CALIBRATION
356  std::cout << exp[spec][peak].getMZ() << "\t";
357 #endif
358  double mz = exp[spec][peak].getMZ();
359  mz = trafo_.apply(mz);
360  calibrated_exp[spec][peak].setMZ(mz);
361 #ifdef DEBUG_CALIBRATION
362  std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
363 #endif
364 
365  }
366  } // for(Size spec=0;spec < exp.size(); ++spec)
367  if (trafo_file_name != "")
368  {
369  TransformationXMLFile().store(trafo_file_name, trafo_);
370  }
371  }
372 
373  template <typename InputPeakType>
374  void InternalCalibration::calibrateMapGlobally(const MSExperiment<InputPeakType> & exp, MSExperiment<InputPeakType> & calibrated_exp, std::vector<double> & ref_masses, String trafo_file_name)
375  {
376  if (exp.empty())
377  {
378  std::cout << "Input is empty." << std::endl;
379  return;
380  }
381 
382  if (exp[0].getType() != SpectrumSettings::PEAKS)
383  {
384  std::cout << "Attention: this function is assuming peak data." << std::endl;
385  }
386 
387 
388  Size num_ref_peaks = ref_masses.size();
389  bool use_ppm = (param_.getValue("mz_tolerance_unit") == "ppm") ? true : false;
390  double mz_tol = param_.getValue("mz_tolerance");
391  startProgress(0, exp.size(), "calibrate spectra");
392  std::vector<double> corr_masses, rel_errors, found_ref_masses;
393  UInt corr_peaks = 0;
394  // for each spectrum
395  for (Size spec = 0; spec < exp.size(); ++spec)
396  {
397  // calibrate only MS1 spectra
398  if (exp[spec].getMSLevel() != 1)
399  continue;
400  for (Size peak = 0; peak < exp[spec].size(); ++peak)
401  {
402  for (Size ref_peak = 0; ref_peak < num_ref_peaks; ++ref_peak)
403  {
404  if (!use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) < mz_tol)
405  {
406  found_ref_masses.push_back(ref_masses[ref_peak]);
407  corr_masses.push_back(exp[spec][peak].getMZ());
408  ++corr_peaks;
409  break;
410  }
411  else if (use_ppm && fabs(exp[spec][peak].getMZ() - ref_masses[ref_peak]) / ref_masses[ref_peak] * 1e6 < mz_tol)
412  {
413  found_ref_masses.push_back(ref_masses[ref_peak]);
414  corr_masses.push_back(exp[spec][peak].getMZ());
415  ++corr_peaks;
416  break;
417  }
418  }
419  }
420  }
421  if (corr_peaks < 2)
422  {
423  std::cout << "Less than 2 reference masses were detected within a reasonable error range\n";
424  std::cout << "This spectrum cannot be calibrated!\n";
425  return;
426  }
427 
428  // calculate the (linear) calibration function
429  makeLinearRegression_(corr_masses, found_ref_masses);
430  static_cast<ExperimentalSettings &>(calibrated_exp) = exp;
431  calibrated_exp.resize(exp.size());
432 
433  // apply the calibration function to each peak
434  for (Size spec = 0; spec < exp.size(); ++spec)
435  {
436  // calibrate only MS1 spectra
437  if (exp[spec].getMSLevel() != 1)
438  {
439  calibrated_exp[spec] = exp[spec];
440  continue;
441  }
442 
443  // copy the spectrum data
444  calibrated_exp[spec] = exp[spec];
445 
446  for (unsigned int peak = 0; peak < exp[spec].size(); ++peak)
447  {
448 #ifdef DEBUG_CALIBRATION
449  std::cout << exp[spec][peak].getMZ() << "\t";
450 #endif
451  double mz = exp[spec][peak].getMZ();
452  mz = trafo_.apply(mz);
453  calibrated_exp[spec][peak].setMZ(mz);
454 
455 #ifdef DEBUG_CALIBRATION
456  std::cout << calibrated_exp[spec][peak].getMZ() << std::endl;
457 #endif
458 
459  }
460  setProgress(spec);
461  } // for(Size spec=0;spec < exp.size(); ++spec)
462  endProgress();
463  if (trafo_file_name != "")
464  {
465  TransformationXMLFile().store(trafo_file_name, trafo_);
466  }
467  }
468 
469 } // namespace OpenMS
470 
471 #endif // OPENMS_FILTERING_CALIBRATION_INTERNALCALIBRATION_H
A more convenient string class.
Definition: String.h:57
Size size() const
Definition: MSExperiment.h:117
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:150
A container for features.
Definition: FeatureMap.h:93
A simple calibration method using linear interpolation of given reference masses. ...
Definition: InternalCalibration.h:61
ContainerType::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSSpectrum.h:123
Iterator begin()
Definition: MSExperiment.h:147
void resize(Size s)
Definition: MSExperiment.h:122
void store(String filename, const TransformationDescription &transformation)
Stores the data in an TransformationXML file.
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double apply(double value) const
Applies the transformation to value.
Iterator end()
Definition: MSExperiment.h:157
ConstIterator RTBegin(CoordinateType rt) const
Fast search for spectrum range begin.
Definition: MSExperiment.h:374
void calibrateMapGlobally(const MSExperiment< InputPeakType > &exp, MSExperiment< InputPeakType > &calibrated_exp, std::vector< double > &ref_masses, String trafo_file_name="")
Calibrate a peak map using given reference masses with one calibration function for the whole map...
Definition: InternalCalibration.h:374
Used to load and store TransformationXML files.
Definition: TransformationXMLFile.h:57
void endProgress() const
Ends the progress display.
~InternalCalibration()
Destructor.
Definition: InternalCalibration.h:70
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:69
void calibrateMapSpectrumwise(const MSExperiment< InputPeakType > &exp, MSExperiment< InputPeakType > &calibrated_exp, std::vector< double > &ref_masses)
Calibrate a peak map using given reference masses with a separate calibration function for each spect...
Definition: InternalCalibration.h:165
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:103
void makeLinearRegression_(std::vector< double > &observed_masses, std::vector< double > &theoretical_masses)
the actual calibration function
void checkReferenceIds_(std::vector< PeptideIdentification > &pep_ids)
check if reference ids contain RT and MZ information as meta values
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
bool empty() const
Definition: MSExperiment.h:127
void setProgress(SignedSize value) const
Sets the current progress.
Int writtenDigits< double >(const double &)
Number of digits commonly used for writing a double (a.k.a. precision).
Definition: Types.h:213
Definition: Residue.h:361
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:58
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
int Int
Signed integer type.
Definition: Types.h:96
Description of the experimental settings.
Definition: ExperimentalSettings.h:59
TransformationDescription trafo_
here the transformation is stored
Definition: InternalCalibration.h:160

OpenMS / TOPP release 2.0.0 Documentation generated on Tue Aug 25 2015 05:53:47 using doxygen 1.8.9.1