Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SwathFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_FORMAT_SWATHFILE_H
36 #define OPENMS_FORMAT_SWATHFILE_H
37 
39 #include <OpenMS/FORMAT/MzMLFile.h>
41 #ifdef OPENMS_FORMAT_SWATHFILE_MZXMLSUPPORT
43 #endif
44 
46 
47 namespace OpenMS
48 {
49 
62  class OPENMS_DLLAPI SwathFile :
63  public ProgressLogger
64  {
65 public:
66 
68  std::vector<OpenSwath::SwathMap> loadSplit(StringList file_list, String tmp,
69  boost::shared_ptr<ExperimentalSettings>& exp_meta, String readoptions = "normal")
70  {
71  int progress = 0;
72  startProgress(0, file_list.size(), "Loading data");
73 
74  std::vector<OpenSwath::SwathMap> swath_maps(file_list.size());
75 #ifdef _OPENMP
76 #pragma omp parallel for
77 #endif
78  for (SignedSize i = 0; i < boost::numeric_cast<SignedSize>(file_list.size()); ++i)
79  {
80 
81 #ifdef _OPENMP
82 #pragma omp critical (OPENMS_SwathFile_loadSplit)
83 #endif
84  {
85  std::cout << "Loading file " << i << " with name " << file_list[i] << " using readoptions " << readoptions << std::endl;
86  }
87 
88  String tmp_fname = "openswath_tmpfile_" + String(i) + ".mzML";
89 
90  boost::shared_ptr<MSExperiment<Peak1D> > exp(new MSExperiment<Peak1D>);
91  OpenSwath::SpectrumAccessPtr spectra_ptr;
92 
93  // Populate meta-data
94  if (i == 0)
95  {
96  exp_meta = populateMetaData_(file_list[i]);
97  }
98 
99  if (readoptions == "normal")
100  {
101  MzMLFile().load(file_list[i], *exp.get());
103  }
104  else if (readoptions == "cache")
105  {
106  // Cache and load the exp (metadata only) file again
107  spectra_ptr = doCacheFile_(file_list[i], tmp, tmp_fname, exp);
108  }
109  else
110  {
111  throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
112  "Unknown option " + readoptions);
113  }
114 
115  OpenSwath::SwathMap swath_map;
116 
117  bool ms1 = false;
118  double upper = -1, lower = -1;
119  if (exp->size() == 0)
120  {
121  std::cerr << "WARNING: File " << file_list[i] << "\n does not have any scans - I will skip it" << std::endl;
122  continue;
123  }
124  if (exp->getSpectra()[0].getPrecursors().size() == 0)
125  {
126  std::cout << "NOTE: File " << file_list[i] << "\n does not have any precursors - I will assume it is the MS1 scan." << std::endl;
127  ms1 = true;
128  }
129  else
130  {
131  // Checks that this is really a SWATH map and extracts upper/lower window
132  OpenSwathHelper::checkSwathMap(*exp.get(), lower, upper);
133  }
134 
135  swath_map.sptr = spectra_ptr;
136  swath_map.lower = lower;
137  swath_map.upper = upper;
138  swath_map.ms1 = ms1;
139 #ifdef _OPENMP
140 #pragma omp critical (OPENMS_SwathFile_loadSplit)
141 #endif
142  {
143  LOG_DEBUG << "Adding Swath file " << file_list[i] << " with " << swath_map.lower << " to " << swath_map.upper << std::endl;
144  swath_maps[i] = swath_map;
145  setProgress(progress++);
146  }
147  }
148  endProgress();
149  return swath_maps;
150  }
151 
153  std::vector<OpenSwath::SwathMap> loadMzML(String file, String tmp,
154  boost::shared_ptr<ExperimentalSettings>& exp_meta, String readoptions = "normal")
155  {
156  std::cout << "Loading mzML file " << file << " using readoptions " << readoptions << std::endl;
157  String tmp_fname = "openswath_tmpfile";
158 
159  startProgress(0, 1, "Loading metadata file " + file);
160  boost::shared_ptr<MSExperiment<Peak1D> > experiment_metadata = populateMetaData_(file);
161  exp_meta = experiment_metadata;
162 
163  // First pass through the file -> get the meta data
164  std::cout << "Will analyze the metadata first to determine the number of SWATH windows and the window sizes." << std::endl;
165  std::vector<int> swath_counter;
166  int nr_ms1_spectra;
167  std::vector<OpenSwath::SwathMap> known_window_boundaries;
168  countScansInSwath_(experiment_metadata->getSpectra(), swath_counter, nr_ms1_spectra, known_window_boundaries);
169  std::cout << "Determined there to be " << swath_counter.size() <<
170  " SWATH windows and in total " << nr_ms1_spectra << " MS1 spectra" << std::endl;
171  endProgress();
172 
173  FullSwathFileConsumer* dataConsumer;
174  boost::shared_ptr<MSExperiment<Peak1D> > exp(new MSExperiment<Peak1D>);
175  startProgress(0, 1, "Loading data file " + file);
176  if (readoptions == "normal")
177  {
178  dataConsumer = new RegularSwathFileConsumer(known_window_boundaries);
179  MzMLFile().transform(file, dataConsumer, *exp.get());
180  }
181  else if (readoptions == "cache")
182  {
183  dataConsumer = new CachedSwathFileConsumer(known_window_boundaries, tmp, tmp_fname, nr_ms1_spectra, swath_counter);
184  MzMLFile().transform(file, dataConsumer, *exp.get());
185  }
186  else
187  {
188  throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
189  "Unknown or unsupported option " + readoptions);
190  }
191  LOG_DEBUG << "Finished parsing Swath file " << std::endl;
192  std::vector<OpenSwath::SwathMap> swath_maps;
193  dataConsumer->retrieveSwathMaps(swath_maps);
194  delete dataConsumer;
195 
196  endProgress();
197  return swath_maps;
198  }
199 
201  std::vector<OpenSwath::SwathMap> loadMzXML(String file, String tmp,
202  boost::shared_ptr<ExperimentalSettings>& exp_meta, String readoptions = "normal")
203  {
204  std::cout << "Loading mzXML file " << file << " using readoptions " << readoptions << std::endl;
205  String tmp_fname = "openswath_tmpfile";
206 
207  startProgress(0, 1, "Loading metadata file " + file);
208  boost::shared_ptr<MSExperiment<Peak1D> > experiment_metadata(new MSExperiment<Peak1D>);
209  MzXMLFile f;
211  f.getOptions().setFillData(false);
212  f.load(file, *experiment_metadata);
213  exp_meta = experiment_metadata;
214 
215  // First pass through the file -> get the meta data
216  std::cout << "Will analyze the metadata first to determine the number of SWATH windows and the window sizes." << std::endl;
217  std::vector<int> swath_counter;
218  int nr_ms1_spectra;
219  std::vector<OpenSwath::SwathMap> known_window_boundaries;
220  countScansInSwath_(experiment_metadata->getSpectra(), swath_counter, nr_ms1_spectra, known_window_boundaries);
221  std::cout << "Determined there to be " << swath_counter.size() <<
222  " SWATH windows and in total " << nr_ms1_spectra << " MS1 spectra" << std::endl;
223  endProgress();
224 
225  FullSwathFileConsumer* dataConsumer;
226  boost::shared_ptr<MSExperiment<Peak1D> > exp(new MSExperiment<Peak1D>);
227  startProgress(0, 1, "Loading data file " + file);
228  if (readoptions == "normal")
229  {
230  dataConsumer = new RegularSwathFileConsumer(known_window_boundaries);
231  MzXMLFile().transform(file, dataConsumer, *exp.get());
232  }
233  else if (readoptions == "cache")
234  {
235  dataConsumer = new CachedSwathFileConsumer(known_window_boundaries, tmp, tmp_fname, nr_ms1_spectra, swath_counter);
236  MzXMLFile().transform(file, dataConsumer, *exp.get());
237  }
238  else
239  {
240  throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
241  "Unknown or unsupported option " + readoptions);
242  }
243  LOG_DEBUG << "Finished parsing Swath file " << std::endl;
244  std::vector<OpenSwath::SwathMap> swath_maps;
245  dataConsumer->retrieveSwathMaps(swath_maps);
246  delete dataConsumer;
247 
248  endProgress();
249  return swath_maps;
250  }
251 
252 protected:
253 
256  boost::shared_ptr<MSExperiment<Peak1D> > experiment_metadata)
257  {
258  String cached_file = tmp + tmp_fname + ".cached";
259  String meta_file = tmp + tmp_fname;
260 
261  // Create new consumer, transform infile, write out metadata
262  MSDataCachedConsumer* cachedConsumer = new MSDataCachedConsumer(cached_file, true);
263  MzMLFile().transform(in, cachedConsumer, *experiment_metadata.get());
264  CachedmzML().writeMetadata(*experiment_metadata.get(), meta_file, true);
265  delete cachedConsumer; // ensure that filestream gets closed
266 
267  boost::shared_ptr<MSExperiment<Peak1D> > exp(new MSExperiment<Peak1D>);
268  MzMLFile().load(meta_file, *exp.get());
270  }
271 
273  boost::shared_ptr< MSExperiment<Peak1D> > populateMetaData_(String file)
274  {
275  boost::shared_ptr<MSExperiment<Peak1D> > experiment_metadata(new MSExperiment<Peak1D>);
276  MzMLFile f;
278  f.getOptions().setFillData(false);
279  f.load(file, *experiment_metadata);
280  return experiment_metadata;
281  }
282 
284  void countScansInSwath_(const std::vector<MSSpectrum<> > exp,
285  std::vector<int>& swath_counter, int& nr_ms1_spectra,
286  std::vector<OpenSwath::SwathMap>& known_window_boundaries)
287  {
288  int ms1_counter = 0;
289  for (Size i = 0; i < exp.size(); i++)
290  {
291  const MSSpectrum<>& s = exp[i];
292  {
293  if (s.getMSLevel() == 1)
294  {
295  ms1_counter++;
296  }
297  else
298  {
299  if (s.getPrecursors().empty())
300  {
301  throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__,
302  "Found SWATH scan (MS level 2 scan) without a precursor. Cannot determine SWATH window.");
303  }
304  const std::vector<Precursor> prec = s.getPrecursors();
305  double center = prec[0].getMZ();
306  bool found = false;
307  for (Size j = 0; j < known_window_boundaries.size(); j++)
308  {
309  // We group by the precursor mz (center of the window) since this
310  // should be present
311  if (std::fabs(center - known_window_boundaries[j].center) < 1e-6)
312  {
313  found = true;
314  swath_counter[j]++;
315  }
316  }
317  if (!found)
318  {
319  // we found a new SWATH scan
320  swath_counter.push_back(1);
321  double lower = prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset();
322  double upper = prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset();
323  OpenSwath::SwathMap boundary;
324  boundary.lower = lower;
325  boundary.upper = upper;
326  boundary.center = center;
327  known_window_boundaries.push_back(boundary);
328 
329  LOG_DEBUG << "Adding Swath centered at " << center
330  << " m/z with an isolation window of " << lower << " to " << upper
331  << " m/z." << std::endl;
332  }
333  }
334  }
335  }
336  nr_ms1_spectra = ms1_counter;
337 
338  std::cout << "Determined there to be " << swath_counter.size() <<
339  " SWATH windows and in total " << nr_ms1_spectra << " MS1 spectra" << std::endl;
340  }
341 
342  };
343 }
344 
345 #endif
static OpenSwath::SpectrumAccessPtr getSpectrumAccessOpenMSPtr(boost::shared_ptr< OpenMS::MSExperiment< OpenMS::Peak1D > > exp)
Simple Factory method to get a SpectrumAccess Ptr from an MSExperiment.
A more convenient string class.
Definition: String.h:57
File adapter for MzXML 2.1 files.
Definition: MzXMLFile.h:53
UInt getMSLevel() const
Returns the MS level.
Definition: MSSpectrum.h:259
boost::shared_ptr< ISpectrumAccess > SpectrumAccessPtr
Definition: openswathalgo/include/OpenMS/ANALYSIS/OPENSWATH/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:90
void load(const String &filename, MapType &map)
Loads a map from a MzXML file.
Definition: MzXMLFile.h:81
std::vector< OpenSwath::SwathMap > loadMzML(String file, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a single mzML file.
Definition: SwathFile.h:153
void retrieveSwathMaps(std::vector< OpenSwath::SwathMap > &maps)
Populate the vector of swath maps after consuming all spectra.
Definition: SwathFileConsumer.h:150
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:128
Transforming and cached writing consumer of MS data.
Definition: MSDataCachedConsumer.h:51
std::vector< OpenSwath::SwathMap > loadMzXML(String file, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a single mzXML file.
Definition: SwathFile.h:201
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
boost::shared_ptr< MSExperiment< Peak1D > > populateMetaData_(String file)
Only read the meta data from a file and use it to populate exp_meta.
Definition: SwathFile.h:273
Abstract base class which can consume spectra coming from SWATH experiment stored in a single file...
Definition: SwathFileConsumer.h:99
void transform(const String &filename_in, Interfaces::IMSDataConsumer< MapType > *consumer, bool skip_full_count=false)
Transforms a map while loading using the supplied MSDataConsumer.
Definition: MzXMLFile.h:127
File adapter for MzML files.
Definition: MzMLFile.h:55
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
A method or algorithm argument contains illegal values.
Definition: Exception.h:634
File adapter for Swath files.
Definition: SwathFile.h:62
On-disk cached implementation of FullSwathFileConsumer.
Definition: SwathFileConsumer.h:391
void load(const String &filename, MapType &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
Definition: MzMLFile.h:83
Data structure to hold one SWATH map with information about upper / lower isolation window and whethe...
Definition: SwathMap.h:46
OpenSwath::SpectrumAccessPtr sptr
Definition: SwathMap.h:48
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:348
OpenSwath::SpectrumAccessPtr doCacheFile_(String in, String tmp, String tmp_fname, boost::shared_ptr< MSExperiment< Peak1D > > experiment_metadata)
Cache a file to disk.
Definition: SwathFile.h:255
void countScansInSwath_(const std::vector< MSSpectrum<> > exp, std::vector< int > &swath_counter, int &nr_ms1_spectra, std::vector< OpenSwath::SwathMap > &known_window_boundaries)
Counts the number of scans in a full Swath file (e.g. concatenated non-split file) ...
Definition: SwathFile.h:284
In-memory implementation of FullSwathFileConsumer.
Definition: SwathFileConsumer.h:333
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:74
void transform(const String &filename_in, Interfaces::IMSDataConsumer< MapType > *consumer, bool skip_full_count=false, bool skip_first_pass=false)
Transforms a map while loading using the supplied MSDataConsumer.
Definition: MzMLFile.h:135
std::vector< OpenSwath::SwathMap > loadSplit(StringList file_list, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a list of split mzML files.
Definition: SwathFile.h:68
An class that uses on-disk caching to read and write spectra and chromatograms.
Definition: CachedMzML.h:62
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
double center
Definition: SwathMap.h:51
void writeMetadata(MapType exp, String out_meta, bool addCacheMetaValue=false)
Write only the meta data of an MSExperiment.
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setAlwaysAppendData(bool only)
sets whether or not to always append the data to the given map (even if a consumer is given) ...
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
double upper
Definition: SwathMap.h:50
void setFillData(bool only)
sets whether to fill the actual data into the container (spectrum/chromatogram)
bool ms1
Definition: SwathMap.h:52
double lower
Definition: SwathMap.h:49
static void checkSwathMap(const OpenMS::MSExperiment< Peak1D > &swath_map, double &lower, double &upper)
Get the lower / upper offset for this SWATH map and do some sanity checks.

OpenMS / TOPP release 2.0.0 Documentation generated on Tue Aug 25 2015 05:53:53 using doxygen 1.8.9.1