Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MRMTransitionGroupPicker.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_OPENSWATH_MRMTRANSITIONGROUPPICKER_H
36 #define OPENMS_ANALYSIS_OPENSWATH_MRMTRANSITIONGROUPPICKER_H
37 
43 
45 
48 
51 
52 #include <numeric>
53 
54 // Cross-correlation
57 
58 //#define DEBUG_TRANSITIONGROUPPICKER
59 
60 namespace OpenMS
61 {
62 
81  class OPENMS_DLLAPI MRMTransitionGroupPicker :
82  public DefaultParamHandler
83  {
84 
85 public:
86 
87  // this is the type in which we store the chromatograms for this analysis
89 
93 
97 
114  template <typename SpectrumT, typename TransitionT>
116  {
117  std::vector<RichPeakChromatogram> picked_chroms_;
118 
119  PeakPickerMRM picker;
120  picker.setParameters(param_.copy("PeakPickerMRM:", true));
121 
122  // Pick chromatograms
123  for (Size k = 0; k < transition_group.getChromatograms().size(); k++)
124  {
125  RichPeakChromatogram& chromatogram = transition_group.getChromatograms()[k];
126  if (!chromatogram.isSorted())
127  {
128  chromatogram.sortByPosition();
129  }
130 
131  RichPeakChromatogram picked_chrom;
132  picker.pickChromatogram(chromatogram, picked_chrom);
133  picked_chrom.sortByIntensity(); // we could do without that
134  picked_chroms_.push_back(picked_chrom);
135  }
136 
137  // Find features (peak groups) in this group of transitions.
138  // While there are still peaks left, one will be picked and used to create
139  // a feature. Whenever we run out of peaks, we will get -1 back as index
140  // and terminate.
141  int chr_idx, peak_idx, cnt = 0;
142  std::vector<MRMFeature> features;
143  while (true)
144  {
145  chr_idx = -1; peak_idx = -1;
146  findLargestPeak(picked_chroms_, chr_idx, peak_idx);
147  if (chr_idx == -1 && peak_idx == -1) break;
148 
149  // Compute a feature from the individual chromatograms and add non-zero features
150  MRMFeature mrm_feature = createMRMFeature(transition_group, picked_chroms_, chr_idx, peak_idx);
151  if (mrm_feature.getIntensity() > 0)
152  {
153  features.push_back(mrm_feature);
154  }
155 
156  cnt++;
157  if ((stop_after_feature_ > 0 && cnt > stop_after_feature_) &&
158  mrm_feature.getIntensity() / (double)mrm_feature.getMetaValue("total_xic") < stop_after_intensity_ratio_)
159  {
160  break;
161  }
162  }
163 
164  // Check for completely overlapping features
165  for (Size i = 0; i < features.size(); i++)
166  {
167  MRMFeature& mrm_feature = features[i];
168  bool skip = false;
169  for (Size j = 0; j < i; j++)
170  {
171  if ((double)mrm_feature.getMetaValue("leftWidth") >= (double)features[j].getMetaValue("leftWidth") &&
172  (double)mrm_feature.getMetaValue("rightWidth") <= (double)features[j].getMetaValue("rightWidth") )
173  { skip = true; }
174  }
175  if (mrm_feature.getIntensity() > 0 && !skip)
176  {
177  transition_group.addFeature(mrm_feature);
178  }
179  }
180 
181  }
182 
184  template <typename SpectrumT, typename TransitionT>
186  std::vector<SpectrumT>& picked_chroms, int& chr_idx, int& peak_idx)
187  {
188  MRMFeature mrmFeature;
189  mrmFeature.setIntensity(0.0);
190  double best_left = picked_chroms[chr_idx].getFloatDataArrays()[1][peak_idx];
191  double best_right = picked_chroms[chr_idx].getFloatDataArrays()[2][peak_idx];
192  double peak_apex = picked_chroms[chr_idx][peak_idx].getRT();
193  LOG_DEBUG << "**** Creating MRMFeature for peak " << chr_idx << " " << peak_idx << " " <<
194  picked_chroms[chr_idx][peak_idx] << " with borders " << best_left << " " <<
195  best_right << " (" << best_right - best_left << ")" << std::endl;
196 
197  if (recalculate_peaks_)
198  {
199  // This may change best_left / best_right
200  recalculatePeakBorders_(picked_chroms, best_left, best_right, recalculate_peaks_max_z_);
201  if (peak_apex < best_left || peak_apex > best_right)
202  {
203  // apex fell out of range, lets correct it
204  peak_apex = (best_left + best_right) / 2.0;
205  }
206  }
207  picked_chroms[chr_idx][peak_idx].setIntensity(0.0);
208 
209  // Remove other, overlapping, picked peaks (in this and other
210  // chromatograms) and then ensure that at least one peak is set to zero
211  // (the currently best peak).
212  remove_overlapping_features(picked_chroms, best_left, best_right);
213 
214  // Check for minimal peak width -> return empty feature (Intensity zero)
215  if (min_peak_width_ > 0.0 && std::fabs(best_right - best_left) < min_peak_width_) {return mrmFeature;}
216 
217  if (compute_peak_quality_)
218  {
219  String outlier = "none";
220  double qual = computeQuality_(transition_group, picked_chroms, chr_idx, best_left, best_right, outlier);
221  if (qual < min_qual_) {return mrmFeature; }
222  mrmFeature.setMetaValue("potentialOutlier", outlier);
223  mrmFeature.setMetaValue("initialPeakQuality", qual);
224  mrmFeature.setOverallQuality(qual);
225  }
226 
227  // Prepare linear resampling of all the chromatograms, here creating the
228  // empty master_peak_container with the same RT (m/z) values as the reference
229  // chromatogram.
230  SpectrumT master_peak_container;
231  const SpectrumT& ref_chromatogram = transition_group.getChromatograms()[chr_idx];
232  prepareMasterContainer_(ref_chromatogram, master_peak_container, best_left, best_right);
233 
234  double total_intensity = 0; double total_peak_apices = 0; double total_xic = 0;
235  for (Size k = 0; k < transition_group.getChromatograms().size(); k++)
236  {
237  const SpectrumT& chromatogram = transition_group.getChromatograms()[k];
238  for (typename SpectrumT::const_iterator it = chromatogram.begin(); it != chromatogram.end(); it++)
239  {
240  total_xic += it->getIntensity();
241  }
242 
243  // resample the current chromatogram
244  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left, best_right);
245  // const SpectrumT& used_chromatogram = chromatogram; // instead of resampling
246 
247  Feature f;
248  double quality = 0;
249  f.setQuality(0, quality);
250  f.setOverallQuality(quality);
251 
252  ConvexHull2D::PointArrayType hull_points;
253  double intensity_sum(0.0), rt_sum(0.0);
254  double peak_apex_int = -1;
255  double peak_apex_dist = std::fabs(used_chromatogram.begin()->getMZ() - peak_apex);
256  // FEATURE : use RTBegin / MZBegin -> for this we need to know whether the template param is a real chromatogram or a spectrum!
257  for (typename SpectrumT::const_iterator it = used_chromatogram.begin(); it != used_chromatogram.end(); it++)
258  {
259  if (it->getMZ() > best_left && it->getMZ() < best_right)
260  {
261  DPosition<2> p;
262  p[0] = it->getMZ();
263  p[1] = it->getIntensity();
264  hull_points.push_back(p);
265  if (std::fabs(it->getMZ() - peak_apex) <= peak_apex_dist)
266  {
267  peak_apex_int = p[1];
268  peak_apex_dist = std::fabs(it->getMZ() - peak_apex);
269  }
270  rt_sum += it->getMZ();
271  intensity_sum += it->getIntensity();
272  }
273  }
274 
275  if (background_subtraction_ != "none")
276  {
277  double background = 0;
278  if (background_subtraction_ == "smoothed")
279  {
280  throw Exception::NotImplemented(__FILE__, __LINE__, __PRETTY_FUNCTION__);
281  /*
282  * Currently we do not have access to the smoothed chromatogram any more
283  if (smoothed_chroms_.size() <= k)
284  {
285  std::cerr << "Tried to calculate background estimation without any smoothed chromatograms" << std::endl;
286  background = 0;
287  }
288  else
289  {
290  background = calculateBgEstimation_(smoothed_chroms_[k], best_left, best_right);
291  }
292  */
293  }
294  else if (background_subtraction_ == "original")
295  {
296  background = calculateBgEstimation_(used_chromatogram, best_left, best_right);
297  }
298  intensity_sum -= background;
299  if (intensity_sum < 0)
300  {
301  std::cerr << "Warning: Intensity was below 0 after background subtraction: " << intensity_sum << ". Setting it to 0." << std::endl;
302  intensity_sum = 0;
303  }
304  }
305 
306  f.setRT(picked_chroms[chr_idx][peak_idx].getMZ());
307  f.setMZ(chromatogram.getMetaValue("product_mz"));
308  f.setIntensity(intensity_sum);
309  ConvexHull2D hull;
310  hull.setHullPoints(hull_points);
311  f.getConvexHulls().push_back(hull);
312  f.setMetaValue("MZ", chromatogram.getMetaValue("product_mz"));
313  f.setMetaValue("native_id", chromatogram.getNativeID());
314  f.setMetaValue("peak_apex_int", peak_apex_int);
315  //f.setMetaValue("leftWidth", best_left);
316  //f.setMetaValue("rightWidth", best_right);
317 
318  total_intensity += intensity_sum;
319  total_peak_apices += peak_apex_int;
320  mrmFeature.addFeature(f, chromatogram.getNativeID()); //map index and feature
321  }
322 
323  // Also pick the precursor chromatogram (note total_xic is not extracted here, only for fragment traces)
324  if (transition_group.hasPrecursorChromatogram("Precursor_i0"))
325  {
326  const SpectrumT& chromatogram = transition_group.getPrecursorChromatogram("Precursor_i0");
327 
328  // resample the current chromatogram
329  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram, master_peak_container, best_left, best_right);
330 
331  Feature f;
332  double quality = 0;
333  f.setQuality(0, quality);
334  f.setOverallQuality(quality);
335 
336  ConvexHull2D::PointArrayType hull_points;
337  double intensity_sum(0.0), rt_sum(0.0);
338  double peak_apex_int = -1;
339  double peak_apex_dist = std::fabs(used_chromatogram.begin()->getMZ() - peak_apex);
340  // FEATURE : use RTBegin / MZBegin -> for this we need to know whether the template param is a real chromatogram or a spectrum!
341  for (typename SpectrumT::const_iterator it = used_chromatogram.begin(); it != used_chromatogram.end(); it++)
342  {
343  if (it->getMZ() > best_left && it->getMZ() < best_right)
344  {
345  DPosition<2> p;
346  p[0] = it->getMZ();
347  p[1] = it->getIntensity();
348  hull_points.push_back(p);
349  if (std::fabs(it->getMZ() - peak_apex) <= peak_apex_dist)
350  {
351  peak_apex_int = p[1];
352  peak_apex_dist = std::fabs(it->getMZ() - peak_apex);
353  }
354  rt_sum += it->getMZ();
355  intensity_sum += it->getIntensity();
356  }
357  }
358 
359  if (chromatogram.metaValueExists("precursor_mz"))
360  {
361  f.setMZ(chromatogram.getMetaValue("precursor_mz"));
362  }
363  f.setRT(picked_chroms[chr_idx][peak_idx].getMZ());
364  f.setIntensity(intensity_sum);
365  ConvexHull2D hull;
366  hull.setHullPoints(hull_points);
367  f.getConvexHulls().push_back(hull);
368  f.setMetaValue("native_id", chromatogram.getNativeID());
369  f.setMetaValue("peak_apex_int", peak_apex_int);
370 
371  mrmFeature.addPrecursorFeature(f, "Precursor_i0");
372  }
373 
374  mrmFeature.setRT(peak_apex);
375  mrmFeature.setIntensity(total_intensity);
376  mrmFeature.setMetaValue("PeptideRef", transition_group.getTransitionGroupID());
377  mrmFeature.setMetaValue("leftWidth", best_left);
378  mrmFeature.setMetaValue("rightWidth", best_right);
379  mrmFeature.setMetaValue("total_xic", total_xic);
380  mrmFeature.setMetaValue("peak_apices_sum", total_peak_apices);
381 
382  mrmFeature.ensureUniqueId();
383  return mrmFeature;
384  }
385 
386  // maybe private, but we have tests
387 
399  template <typename SpectrumT>
400  void remove_overlapping_features(std::vector<SpectrumT>& picked_chroms, double best_left, double best_right)
401  {
402  // delete all seeds that lie within the current seed
403  //std::cout << "Removing features for peak between " << best_left << " " << best_right << std::endl;
404  for (Size k = 0; k < picked_chroms.size(); k++)
405  {
406  for (Size i = 0; i < picked_chroms[k].size(); i++)
407  {
408  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
409  {
410  //std::cout << "For Chrom " << k << " removing peak " << picked_chroms[k][i].getMZ() << " l/r : " << picked_chroms[k].getFloatDataArrays()[1][i] << " " <<
411  // picked_chroms[k].getFloatDataArrays()[2][i] << " with int " << picked_chroms[k][i].getIntensity() <<std::endl;
412  picked_chroms[k][i].setIntensity(0.0);
413  }
414  }
415  }
416 
417  // delete all seeds that overlap within the current seed
418  for (Size k = 0; k < picked_chroms.size(); k++)
419  {
420  for (Size i = 0; i < picked_chroms[k].size(); i++)
421  {
422  if (picked_chroms[k][i].getIntensity() <= 0.0) {continue; }
423 
424  double left = picked_chroms[k].getFloatDataArrays()[1][i];
425  double right = picked_chroms[k].getFloatDataArrays()[2][i];
426  if ((left > best_left && left < best_right)
427  || (right > best_left && right < best_right))
428  {
429  //std::cout << "= For Chrom " << k << " removing contained peak " << picked_chroms[k][i].getMZ() << " l/r : " << picked_chroms[k].getFloatDataArrays()[1][i] << " " <<
430  // picked_chroms[k].getFloatDataArrays()[2][i] << " with int " << picked_chroms[k][i].getIntensity() <<std::endl;
431  picked_chroms[k][i].setIntensity(0.0);
432  }
433  }
434  }
435  }
436 
438  void findLargestPeak(std::vector<RichPeakChromatogram>& picked_chroms, int& chr_idx, int& peak_idx);
439 
440 protected:
441 
443  void updateMembers_();
444 
446  MRMTransitionGroupPicker& operator=(const MRMTransitionGroupPicker& rhs);
447 
464  template <typename SpectrumT, typename TransitionT>
466  std::vector<SpectrumT>& picked_chroms, const int chr_idx,
467  const double best_left, const double best_right, String& outlier)
468  {
469 
470  // Resample all chromatograms around the current estimated peak and
471  // collect the raw intensities. For resampling, use a bit more on either
472  // side to correctly identify shoulders etc.
473  double resample_boundary = 15.0; // sample 15 seconds more on each side
474  SpectrumT master_peak_container;
475  const SpectrumT& ref_chromatogram = transition_group.getChromatograms()[chr_idx];
476  prepareMasterContainer_(ref_chromatogram, master_peak_container, best_left - resample_boundary, best_right + resample_boundary);
477  std::vector<std::vector<double> > all_ints;
478  for (Size k = 0; k < transition_group.getChromatograms().size(); k++)
479  {
480  const SpectrumT& chromatogram = transition_group.getChromatograms()[k];
481  const SpectrumT used_chromatogram = resampleChromatogram_(chromatogram,
482  master_peak_container, best_left - resample_boundary, best_right + resample_boundary);
483 
484  std::vector<double> int_here;
485  for (Size i = 0; i < used_chromatogram.size(); i++)
486  {
487  int_here.push_back(used_chromatogram[i].getIntensity());
488  }
489  all_ints.push_back(int_here);
490  }
491 
492  // Compute the cross-correlation for the collected intensities
493  std::vector<double> mean_shapes;
494  std::vector<double> mean_coel;
495  for (Size k = 0; k < all_ints.size(); k++)
496  {
497  std::vector<double> shapes;
498  std::vector<double> coel;
499  for (Size i = 0; i < all_ints.size(); i++)
500  {
501  if (i == k) {continue;}
502  std::map<int, double> res = OpenSwath::Scoring::normalizedCrossCorrelation(
503  all_ints[k], all_ints[i], boost::numeric_cast<int>(all_ints[i].size()), 1);
504 
505  // the first value is the x-axis (retention time) and should be an int -> it show the lag between the two
506  double res_coelution = std::abs(OpenSwath::Scoring::xcorrArrayGetMaxPeak(res)->first);
507  double res_shape = std::abs(OpenSwath::Scoring::xcorrArrayGetMaxPeak(res)->second);
508 
509  shapes.push_back(res_shape);
510  coel.push_back(res_coelution);
511  }
512 
513  // We have computed the cross-correlation of chromatogram k against
514  // all others. Use the mean of these computations as the value for k.
516  msc = std::for_each(shapes.begin(), shapes.end(), msc);
517  double shapes_mean = msc.mean();
518  msc = std::for_each(coel.begin(), coel.end(), msc);
519  double coel_mean = msc.mean();
520 
521  // mean shape scores below 0.5-0.6 should be a real sign of trouble ... !
522  // mean coel scores above 3.5 should be a real sign of trouble ... !
523  mean_shapes.push_back(shapes_mean);
524  mean_coel.push_back(coel_mean);
525  }
526 
527  // find the chromatogram with the minimal shape score and the maximal
528  // coelution score -> if it is the same chromatogram, the chance is
529  // pretty good that it is different from the others...
530  int min_index_shape = std::distance(mean_shapes.begin(), std::min_element(mean_shapes.begin(), mean_shapes.end()));
531  int max_index_coel = std::distance(mean_coel.begin(), std::max_element(mean_coel.begin(), mean_coel.end()));
532 
533  // Look at the picked peaks that are within the current left/right borders
534  int missing_peaks = 0;
535  int multiple_peaks = 0;
536 
537  // collect all seeds that lie within the current seed
538  std::vector<double> left_borders;
539  std::vector<double> right_borders;
540  for (Size k = 0; k < picked_chroms.size(); k++)
541  {
542  double l_tmp;
543  double r_tmp;
544  double max_int = -1;
545 
546  int pfound = 0;
547  l_tmp = -1;
548  r_tmp = -1;
549  for (Size i = 0; i < picked_chroms[k].size(); i++)
550  {
551  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
552  {
553  pfound++;
554  if (picked_chroms[k][i].getIntensity() > max_int)
555  {
556  max_int = picked_chroms[k][i].getIntensity() > max_int;
557  l_tmp = picked_chroms[k].getFloatDataArrays()[1][i];
558  r_tmp = picked_chroms[k].getFloatDataArrays()[2][i];
559  }
560  }
561  }
562 
563  if (l_tmp > 0.0) left_borders.push_back(l_tmp);
564  if (r_tmp > 0.0) right_borders.push_back(r_tmp);
565 
566  if (pfound == 0) missing_peaks++;
567  if (pfound > 1) multiple_peaks++;
568  }
569 
570  // Check how many chromatograms had exactly one peak picked between our
571  // current left/right borders -> this would be a sign of consistency.
572  LOG_DEBUG << " Overall found missing : " << missing_peaks << " and multiple : " << multiple_peaks << std::endl;
573 
575 
576  // Is there one transitions that is very different from the rest (e.g.
577  // the same element has a bad shape and a bad coelution score) -> potential outlier
578  if (min_index_shape == max_index_coel)
579  {
580  LOG_DEBUG << " element " << min_index_shape << " is a candidate for removal ... " << std::endl;
581  outlier = String(transition_group.getTransitions()[min_index_shape].getNativeID());
582  }
583  else
584  {
585  outlier = "none";
586  }
587 
588  // For the final score (larger is better), consider these scores:
589  // - missing_peaks (the more peaks are missing, the worse)
590  // - multiple_peaks
591  // - mean of the shapes (1 is very good, 0 is bad)
592  // - mean of the co-elution scores (0 is good, 1 is ok, above 1 is pretty bad)
593  double shape_score = std::accumulate(mean_shapes.begin(), mean_shapes.end(), 0.0) / mean_shapes.size();
594  double coel_score = std::accumulate(mean_coel.begin(), mean_coel.end(), 0.0) / mean_coel.size();
595  coel_score = (coel_score - 1.0) / 2.0;
596 
597  double score = shape_score - coel_score - 1.0 * missing_peaks / picked_chroms.size();
598 
599  LOG_DEBUG << " computed score " << score << " (from " << shape_score <<
600  " - " << coel_score << " - " << 1.0 * missing_peaks / picked_chroms.size() << ")" << std::endl;
601 
602  return score;
603  }
604 
614  template <typename SpectrumT>
615  void recalculatePeakBorders_(std::vector<SpectrumT>& picked_chroms, double& best_left, double& best_right, double max_z)
616  {
617  // 1. Collect all seeds that lie within the current seed
618  // - Per chromatogram only the most intense one counts, otherwise very
619  // - low intense peaks can contribute disproportionally to the voting
620  // - procedure.
621  std::vector<double> left_borders;
622  std::vector<double> right_borders;
623  for (Size k = 0; k < picked_chroms.size(); k++)
624  {
625  double max_int = -1;
626  double left = -1;
627  double right = -1;
628  for (Size i = 0; i < picked_chroms[k].size(); i++)
629  {
630  if (picked_chroms[k][i].getMZ() >= best_left && picked_chroms[k][i].getMZ() <= best_right)
631  {
632  if (picked_chroms[k].getFloatDataArrays()[0][i] > max_int)
633  {
634  max_int = picked_chroms[k].getFloatDataArrays()[0][i];
635  left = picked_chroms[k].getFloatDataArrays()[1][i];
636  right = picked_chroms[k].getFloatDataArrays()[2][i];
637  }
638  }
639  }
640  if (max_int > -1 )
641  {
642  left_borders.push_back(left);
643  right_borders.push_back(right);
644  LOG_DEBUG << " * " << k << " left boundary " << left_borders.back() << " with int " << max_int << std::endl;
645  LOG_DEBUG << " * " << k << " right boundary " << right_borders.back() << " with int " << max_int << std::endl;
646  }
647  }
648 
649  // Return for empty peak list
650  if (right_borders.empty())
651  {
652  return;
653  }
654 
655  // FEATURE IDEA: instead of Z-score use modified Z-score for small data sets
656  // http://d-scholarship.pitt.edu/7948/1/Seo.pdf
657  // http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
658  // 1. calculate median
659  // 2. MAD = calculate difference to median for each value -> take median of that
660  // 3. Mi = 0.6745*(xi - median) / MAD
661 
662  // 2. Calculate mean and standard deviation
663  // If the coefficient of variation is too large for one border, we use a
664  // "pseudo-median" instead of the border of the most intense peak.
665  double mean, stdev;
666 
667  // Right borders
668  mean = std::accumulate(right_borders.begin(), right_borders.end(), 0.0) / (double) right_borders.size();
669  stdev = std::sqrt(std::inner_product(right_borders.begin(), right_borders.end(), right_borders.begin(), 0.0)
670  / right_borders.size() - mean * mean);
671  std::sort(right_borders.begin(), right_borders.end());
672 
673  LOG_DEBUG << " - Recalculating right peak boundaries " << mean << " mean / best "
674  << best_right << " std " << stdev << " : " << std::fabs(best_right - mean) / stdev
675  << " coefficient of variation" << std::endl;
676 
677  // Compare right borders of best transition with the mean
678  if (std::fabs(best_right - mean) / stdev > max_z)
679  {
680  best_right = right_borders[right_borders.size() / 2]; // pseudo median
681  LOG_DEBUG << " - Setting right boundary to " << best_right << std::endl;
682  }
683 
684  // Left borders
685  mean = std::accumulate(left_borders.begin(), left_borders.end(), 0.0) / (double) left_borders.size();
686  stdev = std::sqrt(std::inner_product(left_borders.begin(), left_borders.end(), left_borders.begin(), 0.0)
687  / left_borders.size() - mean * mean);
688  std::sort(left_borders.begin(), left_borders.end());
689 
690  LOG_DEBUG << " - Recalculating left peak boundaries " << mean << " mean / best "
691  << best_left << " std " << stdev << " : " << std::fabs(best_left - mean) / stdev
692  << " coefficient of variation" << std::endl;
693 
694  // Compare left borders of best transition with the mean
695  if (std::fabs(best_left - mean) / stdev > max_z)
696  {
697  best_left = left_borders[left_borders.size() / 2]; // pseudo median
698  LOG_DEBUG << " - Setting left boundary to " << best_left << std::endl;
699  }
700 
701  }
702 
704 
705  template <typename SpectrumT>
707  void prepareMasterContainer_(const SpectrumT& ref_chromatogram,
708  SpectrumT& master_peak_container, double best_left, double best_right)
709  {
710  // search for begin / end of the reference chromatogram (and add one more point)
711  typename SpectrumT::const_iterator begin = ref_chromatogram.begin();
712  while (begin != ref_chromatogram.end() && begin->getMZ() < best_left) {begin++; }
713  if (begin != ref_chromatogram.begin()) {begin--; }
714 
715  typename SpectrumT::const_iterator end = begin;
716  while (end != ref_chromatogram.end() && end->getMZ() < best_right) {end++; }
717  if (end != ref_chromatogram.end()) {end++; }
718 
719  // resize the master container and set the m/z values to the ones of the master container
720  master_peak_container.resize(distance(begin, end));
721  typename SpectrumT::iterator it = master_peak_container.begin();
722  for (typename SpectrumT::const_iterator chrom_it = begin; chrom_it != end; chrom_it++, it++)
723  {
724  it->setMZ(chrom_it->getMZ());
725  }
726  }
727 
729  template <typename SpectrumT>
730  SpectrumT resampleChromatogram_(const SpectrumT& chromatogram,
731  SpectrumT& master_peak_container, double best_left, double best_right)
732  {
733  // get the start / end point of this chromatogram => go one past
734  // best_left / best_right to make the resampling accurate also at the
735  // edge.
736  typename SpectrumT::const_iterator begin = chromatogram.begin();
737  while (begin != chromatogram.end() && begin->getMZ() < best_left) {begin++; }
738  if (begin != chromatogram.begin()) {begin--; }
739 
740  typename SpectrumT::const_iterator end = begin;
741  while (end != chromatogram.end() && end->getMZ() < best_right) {end++; }
742  if (end != chromatogram.end()) {end++; }
743 
744  SpectrumT resampled_peak_container = master_peak_container; // copy the master container, which contains the RT values
745  LinearResamplerAlign lresampler;
746  lresampler.raster(begin, end, resampled_peak_container.begin(), resampled_peak_container.end());
747 
748  return resampled_peak_container;
749  }
750 
752 
759  double calculateBgEstimation_(const RichPeakChromatogram& chromatogram, double best_left, double best_right);
760 
761  // Members
765  double min_qual_;
766 
771  };
772 }
773 
774 #endif // OPENMS_ANALYSIS_OPENSWATH_MRMTRANSITIONGROUPPICKER_H
775 
const double k
const DataValue & getMetaValue(const String &name) const
returns the value corresponding to a string
void raster(SpecT< PeakType > &spectrum)
Applies the resampling algorithm to an MSSpectrum.
Definition: LinearResamplerAlign.h:67
void setMetaValue(const String &name, const DataValue &value)
sets the DataValue corresponding to a name
double min_qual_
Definition: MRMTransitionGroupPicker.h:765
String background_subtraction_
Definition: MRMTransitionGroupPicker.h:762
const std::vector< TransitionType > & getTransitions() const
Definition: MRMTransitionGroup.h:125
double recalculate_peaks_max_z_
Definition: MRMTransitionGroupPicker.h:770
A more convenient string class.
Definition: String.h:57
bool compute_peak_quality_
Definition: MRMTransitionGroupPicker.h:764
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:197
void sortByPosition()
Lexicographically sorts the peaks by their position.
Definition: MSSpectrum.h:419
IntensityType getIntensity() const
Definition: Peak2D.h:161
OPENSWATHALGO_DLLAPI XCorrArrayType normalizedCrossCorrelation(std::vector< double > &data1, std::vector< double > &data2, int maxdelay, int lag)
const std::vector< SpectrumType > & getChromatograms() const
Definition: MRMTransitionGroup.h:151
SpectrumType & getPrecursorChromatogram(String key)
Definition: MRMTransitionGroup.h:193
std::vector< PointType > PointArrayType
Definition: ConvexHull2D.h:77
bool isSorted() const
Checks if all peaks are sorted with respect to ascending m/z.
Definition: MSSpectrum.h:481
void prepareMasterContainer_(const SpectrumT &ref_chromatogram, SpectrumT &master_peak_container, double best_left, double best_right)
create an empty master peak container that has the correct mz / RT values set
Definition: MRMTransitionGroupPicker.h:707
The PeakPickerMRM finds peaks a single chromatogram.
Definition: PeakPickerMRM.h:63
int stop_after_feature_
Definition: MRMTransitionGroupPicker.h:767
double mean() const
Definition: StatsHelpers.h:208
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setParameters(const Param &param)
Sets the parameters.
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling...
Definition: ConvexHull2D.h:73
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:131
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:167
void addFeature(Feature &feature, const String &key)
Adds an feature from a single chromatogram into the feature.
double min_peak_width_
Definition: MRMTransitionGroupPicker.h:769
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:209
The MRMTransitionGroupPicker finds peaks in chromatograms that belong to the same precursors...
Definition: MRMTransitionGroupPicker.h:81
bool recalculate_peaks_
Definition: MRMTransitionGroupPicker.h:763
const std::vector< ConvexHull2D > & getConvexHulls() const
Non-mutable access to the convex hulls.
void sortByIntensity(bool reverse=false)
Lexicographically sorts the peaks by their intensity.
Definition: MSSpectrum.h:342
void setQuality(Size index, QualityType q)
Set the quality in dimension c.
Linear Resampling of raw data with alignment.
Definition: LinearResamplerAlign.h:57
void pickChromatogram(const RichPeakChromatogram &chromatogram, RichPeakChromatogram &picked_chrom)
Finds peaks in a single chromatogram and annotates left/right borders.
void setHullPoints(const PointArrayType &points)
accessor for the outer(!) points (no checking is performed if this is actually a convex hull) ...
double computeQuality_(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, std::vector< SpectrumT > &picked_chroms, const int chr_idx, const double best_left, const double best_right, String &outlier)
Compute transition group quality (higher score is better)
Definition: MRMTransitionGroupPicker.h:465
An LC-MS feature.
Definition: Feature.h:70
functor to compute the mean and stddev of sequence using the std::foreach algorithm ...
Definition: StatsHelpers.h:170
void addFeature(MRMFeature &feature)
Definition: MRMTransitionGroup.h:213
void addPrecursorFeature(Feature &feature, const String &key)
Adds a precursor feature from a single chromatogram into the feature.
void setOverallQuality(QualityType q)
Set the overall quality.
bool hasPrecursorChromatogram(String key) const
Definition: MRMTransitionGroup.h:198
SpectrumT resampleChromatogram_(const SpectrumT &chromatogram, SpectrumT &master_peak_container, double best_left, double best_right)
use the master container from above to resample a chromatogram at those points stored in the master c...
Definition: MRMTransitionGroupPicker.h:730
void pickTransitionGroup(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group)
Pick a group of chromatograms belonging to the same peptide.
Definition: MRMTransitionGroupPicker.h:115
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
MRMFeature createMRMFeature(MRMTransitionGroup< SpectrumT, TransitionT > &transition_group, std::vector< SpectrumT > &picked_chroms, int &chr_idx, int &peak_idx)
Create feature from a vector of chromatograms and a specified peak.
Definition: MRMTransitionGroupPicker.h:185
void remove_overlapping_features(std::vector< SpectrumT > &picked_chroms, double best_left, double best_right)
Remove overlapping features.
Definition: MRMTransitionGroupPicker.h:400
OPENSWATHALGO_DLLAPI XCorrArrayType::iterator xcorrArrayGetMaxPeak(XCorrArrayType &array)
Find best peak in an cross-correlation (highest apex)
double stop_after_intensity_ratio_
Definition: MRMTransitionGroupPicker.h:768
A multi-chromatogram MRM feature.
Definition: MRMFeature.h:50
void recalculatePeakBorders_(std::vector< SpectrumT > &picked_chroms, double &best_left, double &best_right, double max_z)
Recalculate the borders of the peak.
Definition: MRMTransitionGroupPicker.h:615
MSSpectrum< ChromatogramPeak > RichPeakChromatogram
Definition: MRMTransitionGroupPicker.h:88
const String & getTransitionGroupID() const
Definition: MRMTransitionGroup.h:115
Not implemented exception.
Definition: Exception.h:437
Size ensureUniqueId()
Assigns a valid unique id, but only if the present one is invalid. Returns 1 if the unique id was cha...
Definition: UniqueIdInterface.h:159

OpenMS / TOPP release 2.0.0 Documentation generated on Tue Aug 25 2015 05:53:49 using doxygen 1.8.9.1