pappsomspp
Library for mass spectrometry
msrundatasettree.cpp
Go to the documentation of this file.
1 // GPL 3+
2 // Filippo Rusconi
3 
4 #include <map>
5 #include <limits>
6 #include <iostream>
7 #include <iomanip>
8 
9 #include "msrundatasettree.h"
10 
11 #include "../pappsoexception.h"
12 #include "../exception/exceptionnotpossible.h"
13 
14 
15 namespace pappso
16 {
17 
18 
20  : mcsp_msRunId(ms_run_id_csp)
21 {
22 }
23 
24 
26 {
27  // qDebug();
28 
29  for(auto &&node : m_rootNodes)
30  {
31  // Each node is responsible for freeing its children nodes!
32 
33  delete node;
34  }
35 
36  m_rootNodes.clear();
37 
38  // Beware not to delete the node member of the map, as we have already
39  // destroyed them above!
40  //
41  // for(auto iterator = m_indexNodeMap.begin(); iterator !=
42  // m_indexNodeMap.end();
43  //++iterator)
44  //{
45  // delete(iterator->second);
46  //}
47 
48  // qDebug();
49 }
50 
51 
54  QualifiedMassSpectrumCstSPtr mass_spectrum_csp)
55 {
56  // qDebug();
57 
58  if(mass_spectrum_csp == nullptr)
59  qFatal("Cannot be nullptr");
60 
61  if(mass_spectrum_csp.get() == nullptr)
62  qFatal("Cannot be nullptr");
63 
64  // We need to get the precursor spectrum index, in case this spectrum is a
65  // fragmentation index.
66 
67  MsRunDataSetTreeNode *new_node_p = nullptr;
68 
69  std::size_t precursor_spectrum_index =
70  mass_spectrum_csp->getPrecursorSpectrumIndex();
71 
72  // qDebug() << "The precursor_spectrum_index:" << precursor_spectrum_index;
73 
74  if(precursor_spectrum_index == std::numeric_limits<std::size_t>::max())
75  {
76  // This spectrum is a full scan spectrum, not a fragmentation spectrum.
77  // Create a new node with no parent and push it back to the root nodes
78  // vector.
79 
80  new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, nullptr);
81 
82  // Since there is no parent in this overload, it is assumed that the node
83  // to be populated with the new node is the root node.
84 
85  m_rootNodes.push_back(new_node_p);
86 
87  // qDebug() << "to the roots node vector.";
88  }
89  else
90  {
91  // This spectrum is a fragmentation spectrum.
92 
93  // Sanity check
94 
95  if(mass_spectrum_csp->getMsLevel() <= 1)
96  {
98  "The MS level needs to be > 1 in a fragmentation spectrum."));
99  }
100 
101  // Get the node that contains the precursor ion mass spectrum.
102  MsRunDataSetTreeNode *parent_node_p = findNode(precursor_spectrum_index);
103 
104  if(parent_node_p == nullptr)
105  {
106  throw(ExceptionNotPossible(
107  "Could not find a a tree node matching the index."));
108  }
109 
110  // qDebug() << "Fragmentation spectrum"
111  //<< "Found parent node:" << parent_node_p
112  //<< "for precursor index:" << precursor_spectrum_index;
113 
114  // At this point, create a new node with the right parent.
115 
116  new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_node_p);
117 
118  parent_node_p->m_children.push_back(new_node_p);
119  }
120 
121  // And now document that addition in the node index map.
122  m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
123  mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
124 
125  // We also want to document the new node relating to the
126  // retention time.
127 
129  mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
130 
131  // Likewise for the drift time.
132 
134  mass_spectrum_csp->getDtInMilliSeconds(), new_node_p, DataKind::dt);
135 
136  ++m_spectrumCount;
137 
138  // qDebug() << "New index/node map:"
139  //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
140  //<< new_node_p;
141 
142  return new_node_p;
143 }
144 
145 
146 const std::map<std::size_t, MsRunDataSetTreeNode *> &
148 {
149  return m_indexNodeMap;
150 }
151 
152 
153 std::size_t
155 {
156  // We have a node and we want to get the matching mass spectrum index.
157 
158  if(node == nullptr)
159  throw("Cannot be that the node pointer is nullptr");
160 
161  std::map<std::size_t, MsRunDataSetTreeNode *>::const_iterator iterator =
162  std::find_if(
163  m_indexNodeMap.begin(),
164  m_indexNodeMap.end(),
165  [node](const std::pair<std::size_t, MsRunDataSetTreeNode *> pair) {
166  return pair.second == node;
167  });
168 
169  if(iterator != m_indexNodeMap.end())
170  return iterator->first;
171 
172  return std::numeric_limits<std::size_t>::max();
173 }
174 
175 
176 std::size_t
178  QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp) const
179 {
180  MsRunDataSetTreeNode *node_p = findNode(qualified_mass_spectrum_csp);
181 
182  return massSpectrumIndex(node_p);
183 }
184 
185 
186 const std::vector<MsRunDataSetTreeNode *> &
188 {
189  return m_rootNodes;
190 }
191 
192 
193 void
195 {
196  // qDebug() << "Going to call node->accept(visitor) for each root node.";
197 
198  for(auto &&node : m_rootNodes)
199  {
200  // qDebug() << "Calling accept for root node:" << node;
201 
202  if(visitor.shouldStop())
203  break;
204 
205  node->accept(visitor);
206  }
207 }
208 
209 
210 void
213  std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_begin_iterator,
214  std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_end_iterator)
215 {
216  // qDebug() << "Visitor:" << &visitor << "The distance is between iterators
217  // is:"
218  //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
219 
220  using Iterator = std::vector<MsRunDataSetTreeNode *>::const_iterator;
221 
222  Iterator iter = nodes_begin_iterator;
223 
224  // Inform the visitor of the number of nodes to work on.
225 
226  std::size_t node_count =
227  std::distance(nodes_begin_iterator, nodes_end_iterator);
228 
229  visitor.setNodesToProcessCount(node_count);
230 
231  while(iter != nodes_end_iterator)
232  {
233  // qDebug() << "Visitor:" << &visitor
234  //<< "The distance is between iterators is:"
235  //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
236 
237  // qDebug() << "Node visited:" << (*iter)->toString();
238 
239  if(visitor.shouldStop())
240  break;
241 
242  (*iter)->accept(visitor);
243  ++iter;
244  }
245 }
246 
247 
250 {
251  // qDebug();
252 
253  for(auto &node : m_rootNodes)
254  {
255  // qDebug() << "In one node of the root nodes.";
256 
257  MsRunDataSetTreeNode *iterNode = node->findNode(mass_spectrum_csp);
258  if(iterNode != nullptr)
259  return iterNode;
260  }
261 
262  return nullptr;
263 }
264 
265 
267 MsRunDataSetTree::findNode(std::size_t spectrum_index) const
268 {
269  // qDebug();
270 
271  for(auto &node : m_rootNodes)
272  {
273  // qDebug() << "In one node of the root nodes.";
274 
275  MsRunDataSetTreeNode *iterNode = node->findNode(spectrum_index);
276  if(iterNode != nullptr)
277  return iterNode;
278  }
279 
280  return nullptr;
281 }
282 
283 
284 std::vector<MsRunDataSetTreeNode *>
286 {
287  // We want to push back all the nodes of the tree in a flat vector of nodes.
288 
289  std::vector<MsRunDataSetTreeNode *> nodes;
290 
291  for(auto &&node : m_rootNodes)
292  {
293  // The node will store itself and all of its children.
294  node->flattenedView(nodes, true /* with_descendants */);
295  }
296 
297  return nodes;
298 }
299 
300 
301 std::vector<MsRunDataSetTreeNode *>
303  bool with_descendants)
304 {
305  std::vector<MsRunDataSetTreeNode *> nodes;
306 
307  // Logically, ms_level cannot be 0.
308 
309  if(!ms_level)
310  {
311  qFatal(
312  "Fatal error at %s@%d -- %s(). "
313  "The MS level cannot be 0."
314  "Program aborted.",
315  __FILE__,
316  __LINE__,
317  __FUNCTION__);
318 
319  return nodes;
320  }
321 
322  // The depth of the tree at which we are right at this point is 0, we have not
323  // gone into the children yet.
324 
325  std::size_t depth = 0;
326 
327  // If ms_level is 1, then that means that we want the nodes starting right at
328  // the root nodes with or without the descendants.
329 
330  // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
331  //<< "ms_level: " << ms_level << " depth: " << depth << std::endl;
332 
333  if(ms_level == 1)
334  {
335  for(auto &&node : m_rootNodes)
336  {
337  // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__
338  //<< " () "
339  //<< "Handling one of the root nodes at ms_level = 1."
340  //<< std::endl;
341 
342  node->flattenedView(nodes, with_descendants);
343  }
344 
345  return nodes;
346  }
347 
348  // At this point, we know that we want the descendants of the root nodes since
349  // we want ms_level > 1, so we need go to to the children of the root nodes.
350 
351  // Let depth to 0, because if we go to the children of the root nodes we will
352  // still be at depth 0, that is MS level 1.
353 
354  for(auto &node : m_rootNodes)
355  {
356  // std::cout
357  //<< __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
358  //<< std::setprecision(15)
359  //<< "Requesting a flattened view of the root's child nodes with depth: "
360  //<< depth << std::endl;
361 
362  node->flattenedViewMsLevelNodes(ms_level, depth, nodes, with_descendants);
363  }
364 
365  return nodes;
366 }
367 
368 
371  std::size_t product_spectrum_index)
372 {
373 
374  // qDebug();
375 
376  // Find the node that holds the mass spectrum that was acquired as the
377  // precursor that when fragmented gave a spectrum at spectrum_index;
378 
379  // Get the node that contains the product_spectrum_index first.
380  MsRunDataSetTreeNode *node = nullptr;
381  node = findNode(product_spectrum_index);
382 
383  // Now get the node that contains the precursor_spectrum_index.
384 
385  return findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex());
386 }
387 
388 
389 std::vector<MsRunDataSetTreeNode *>
391  std::size_t precursor_spectrum_index)
392 {
393  std::vector<MsRunDataSetTreeNode *> nodes;
394 
395  // First get the node of the precursor spectrum index.
396 
397  MsRunDataSetTreeNode *precursor_node = findNode(precursor_spectrum_index);
398 
399  if(precursor_node == nullptr)
400  return nodes;
401 
402  nodes.assign(precursor_node->m_children.begin(),
403  precursor_node->m_children.end());
404 
405  return nodes;
406 }
407 
408 
409 std::vector<MsRunDataSetTreeNode *>
411  PrecisionPtr precision_ptr)
412 {
413 
414  // Find all the precursor nodes holding a mass spectrum that contained a
415  // precursor mz-value.
416 
417  if(precision_ptr == nullptr)
418  qFatal(
419  "Fatal error at %s@%d -- %s(). "
420  "precision_ptr cannot be nullptr."
421  "Program aborted.",
422  __FILE__,
423  __LINE__,
424  __FUNCTION__);
425 
426  std::vector<MsRunDataSetTreeNode *> product_nodes;
427 
428  // As a first step, find all the nodes that hold a mass spectrum that was
429  // acquired as a fragmentation spectrum of an ion of mz, that is, search all
430  // the product ion nodes for which precursor was mz.
431 
432  for(auto &&node : m_rootNodes)
433  {
434  node->productNodesByPrecursorMz(mz, precision_ptr, product_nodes);
435  }
436 
437  // Now, for each node found get the precursor node
438 
439  std::vector<MsRunDataSetTreeNode *> precursor_nodes;
440 
441  for(auto &&node : product_nodes)
442  {
443  precursor_nodes.push_back(
444  findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex()));
445  }
446 
447  return precursor_nodes;
448 }
449 
450 
451 bool
453  MsRunDataSetTreeNode *node_p,
454  DataKind data_kind)
455 {
456  // qDebug();
457 
458  using NodeVector = std::vector<MsRunDataSetTreeNode *>;
459  using DoubleNodeVectorMap = std::map<double, NodeVector>;
460  using MapPair = std::pair<double, NodeVector>;
461  using MapIterator = DoubleNodeVectorMap::iterator;
462 
463  DoubleNodeVectorMap *map_p;
464 
465  if(data_kind == DataKind::rt)
466  {
467  map_p = &m_rtDoubleNodeVectorMap;
468  }
469  else if(data_kind == DataKind::dt)
470  {
471  map_p = &m_dtDoubleNodeVectorMap;
472  }
473  else
474  qFatal("Programming error.");
475 
476  // There are two possibilities:
477  //
478  // 1. The time was never encountered yet. We won't find it. We need to
479  // allocate a vector of Node's and set it associated to time in the map.
480  //
481  // 2. The time was encountered already, we will find it in the maps, we'll
482  // just push_back the Node in the vector of nodes.
483 
484  MapIterator found_iterator = map_p->find(time);
485 
486  if(found_iterator != map_p->end())
487  {
488  // The time value was encountered already.
489 
490  found_iterator->second.push_back(node_p);
491 
492  // qDebug() << "Found iterator for time:" << time;
493  }
494  else
495  {
496  // We need to create a new vector with the node.
497 
498  NodeVector node_vector = {node_p};
499 
500  map_p->insert(MapPair(time, node_vector));
501 
502  // qDebug() << "Inserted new time:node_vector pair.";
503  }
504 
505  return true;
506 }
507 
508 
511  QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
512  MsRunDataSetTreeNode *parent_p)
513 {
514  // qDebug();
515 
516  // We want to add a mass spectrum. Either the parent_p argument is nullptr or
517  // not. If it is nullptr, then we just append the mass spectrum to the vector
518  // of root nodes. If it is not nullptr, we need to append the mass spectrum to
519  // that node.
520 
521  MsRunDataSetTreeNode *new_node_p =
522  new MsRunDataSetTreeNode(mass_spectrum_csp, parent_p);
523 
524  if(parent_p == nullptr)
525  {
526  m_rootNodes.push_back(new_node_p);
527 
528  // qDebug() << "Pushed back" << new_node << "to root nodes:" <<
529  // &m_rootNodes;
530  }
531  else
532  {
533  parent_p->m_children.push_back(new_node_p);
534 
535  // qDebug() << "Pushed back" << new_node << "with parent:" << parent_p;
536  }
537 
538  ++m_spectrumCount;
539 
540  // And now document that addition in the node index map.
541  m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
542  mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
543 
544  // We also want to document the new node relating to the
545  // retention time.
546 
548  mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
549 
550  // Likewise for the drift time.
551 
553  mass_spectrum_csp->getDtInMilliSeconds(), new_node_p, DataKind::dt);
554 
555  // qDebug() << "New index/node map:"
556  //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
557  //<< new_node;
558 
559  return new_node_p;
560 }
561 
562 
565  QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
566  std::size_t precursor_spectrum_index)
567 {
568  // qDebug();
569 
570  // First get the node containing the mass spectrum that was acquired at index
571  // precursor_spectrum_index.
572 
573  // qDebug() << "Need to find the precursor's mass spectrum node for precursor
574  // "
575  //"spectrum index:"
576  //<< precursor_spectrum_index;
577 
578  MsRunDataSetTreeNode *mass_spec_data_node_p =
579  findNode(precursor_spectrum_index);
580 
581  // qDebug() << "Found node" << mass_spec_data_node_p
582  //<< "for precursor index:" << precursor_spectrum_index;
583 
584  if(mass_spec_data_node_p == nullptr)
585  {
586  throw(ExceptionNotPossible(
587  "Could not find a a tree node matching the index."));
588  }
589 
590  // qDebug() << "Calling addMassSpectrum with parent node:"
591  //<< mass_spec_data_node_p;
592 
593  return addMassSpectrum(mass_spectrum_csp, mass_spec_data_node_p);
594 }
595 
596 
597 std::size_t
599  double end,
600  NodeVector &nodes,
601  DataKind data_kind) const
602 {
603  using NodeVector = std::vector<MsRunDataSetTreeNode *>;
604  using DoubleNodeVectorMap = std::map<double, NodeVector>;
605  using MapIterator = DoubleNodeVectorMap::const_iterator;
606 
607  const DoubleNodeVectorMap *map_p;
608 
609  if(data_kind == DataKind::rt)
610  {
611  map_p = &m_rtDoubleNodeVectorMap;
612  }
613  else if(data_kind == DataKind::dt)
614  {
615  map_p = &m_dtDoubleNodeVectorMap;
616  }
617  else
618  qFatal("Programming error.");
619 
620  std::size_t added_nodes = 0;
621 
622  // Get the iterator to the map item that has the key greater or equal to
623  // start.
624 
625  MapIterator start_iterator = map_p->lower_bound(start);
626 
627  if(start_iterator == map_p->end())
628  return 0;
629 
630  // Now get the end of the map useful range of items.
631 
632  MapIterator end_iterator = map_p->upper_bound(end);
633 
634  // Now that we have the iterator range, iterate in it and get the mass spectra
635  // from each item's pair.second node vector.
636 
637  for(MapIterator iterator = start_iterator; iterator != end_iterator;
638  ++iterator)
639  {
640  // We are iterating in MapPair items.
641 
642  NodeVector node_vector = iterator->second;
643 
644  // All the nodes in the node vector need to be copied to the mass_spectra
645  // vector passed as parameter.
646 
647  for(auto &&node_p : node_vector)
648  {
649  nodes.push_back(node_p);
650 
651  ++added_nodes;
652  }
653  }
654 
655  return added_nodes;
656 }
657 
658 
659 std::size_t
661  double start, double end, NodeVector &nodes, DataKind data_kind) const
662 {
663  using NodeVector = std::vector<MsRunDataSetTreeNode *>;
664  using NodeVectorIterator = NodeVector::iterator;
665 
666  using DoubleNodeVectorMap = std::map<double, NodeVector>;
667  using MapIterator = DoubleNodeVectorMap::const_iterator;
668 
669  const DoubleNodeVectorMap *map_p;
670 
671  if(data_kind == DataKind::rt)
672  {
673  map_p = &m_rtDoubleNodeVectorMap;
674  }
675  else if(data_kind == DataKind::dt)
676  {
677  map_p = &m_dtDoubleNodeVectorMap;
678  }
679  else
680  qFatal("Programming error.");
681 
682  std::size_t removed_vector_items = 0;
683 
684  // We want to remove from the nodes vector all the nodes that contain a mass
685  // spectrum acquired at a time range outside of [ start-end ], that is, the
686  // time values [begin() - start [ and ]end -- end()[.
687 
688  // Get the iterator to the map item that has the key less to
689  // start (we want to keep the map item having key == start).
690 
691  MapIterator first_end_iterator = (*map_p).upper_bound(start);
692 
693  // Now that we have the first_end_iterator, we can iterate between [begin --
694  // first_end_iterator[
695 
696  for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
697  ++iterator)
698  {
699  // Remove from the nodes vector the nodes.
700 
701  // We are iterating in MapPair items.
702 
703  NodeVector node_vector = iterator->second;
704 
705  // All the nodes in the node vector need to be removed from the
706  // mass_spectra vector passed as parameter if found.
707 
708  for(auto &&node_p : node_vector)
709  {
710  NodeVectorIterator iterator =
711  std::find(nodes.begin(), nodes.end(), node_p);
712 
713  if(iterator != nodes.end())
714  {
715  // We found the node: remove it.
716 
717  nodes.erase(iterator);
718 
719  ++removed_vector_items;
720  }
721  }
722  }
723 
724  // Now the second begin iterator, so that we can remove all the items
725  // contained in the second range, that is, ]end--end()[.
726 
727  // The second_first_iterator will point to the item having its time value less
728  // or equal to end. But we do not want to get items having their time equal to
729  // end, only < end. So, if the iterator is not begin(), we just need to
730  // decrement it once.
731  MapIterator second_first_iterator = map_p->upper_bound(end);
732  if(second_first_iterator != map_p->begin())
733  --second_first_iterator;
734 
735  for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
736  ++iterator)
737  {
738  // We are iterating in MapPair items.
739 
740  NodeVector node_vector = iterator->second;
741 
742  // All the nodes in the node vector need to be removed from the
743  // mass_spectra vector passed as parameter if found.
744 
745  for(auto &&node_p : node_vector)
746  {
747  NodeVectorIterator iterator =
748  std::find(nodes.begin(), nodes.end(), node_p);
749 
750  if(iterator != nodes.end())
751  {
752  // We found the node: remove it.
753 
754  nodes.erase(iterator);
755 
756  ++removed_vector_items;
757  }
758  }
759  }
760 
761  return removed_vector_items;
762 }
763 
764 
765 std::size_t
767  double start,
768  double end,
769  QualMassSpectraVector &mass_spectra,
770  DataKind data_kind) const
771 {
772  //qDebug();
773 
774  if(start == end)
775  qDebug() << "Special case, start and end are equal:" << start;
776 
777  using DoubleNodeVectorMap = std::map<double, NodeVector>;
778  using MapIterator = DoubleNodeVectorMap::const_iterator;
779 
780  const DoubleNodeVectorMap *map_p;
781 
782  if(data_kind == DataKind::rt)
783  {
784  map_p = &m_rtDoubleNodeVectorMap;
785  }
786  else if(data_kind == DataKind::dt)
787  {
788  map_p = &m_dtDoubleNodeVectorMap;
789  }
790  else
791  qFatal("Programming error.");
792 
793  //qDebug() << "The rt |dt / mass spectra map has size:" << map_p->size()
794  //<< "The start:" << start << "the end:" << end;
795 
796  std::size_t added_mass_spectra = 0;
797 
798  // Get the iterator to the map item that has the key greater or equal to
799  // start.
800 
801  MapIterator start_iterator = map_p->lower_bound(start);
802 
803  if(start_iterator == map_p->end())
804  {
805  qDebug() << "The start iterator is end()!";
806  return 0;
807  }
808 
809  //qDebug() << "The start_iterator points to:" << start_iterator->first
810  //<< "as a rt|dt time.";
811 
812  // Now get the end of the map useful range of items.
813 
814  MapIterator end_iterator = map_p->upper_bound(end);
815 
816  //if(end_iterator == map_p->end())
817  //{
818  //qDebug() << "The end_iterator points to the end of the map.";
819  //}
820  //else
821  //{
822  //qDebug() << "The end_iterator points to:" << end_iterator->first
823  //<< "as a rt|dt time.";
824  //}
825 
826  //qDebug() << "The number of time values to iterate through:"
827  //<< std::distance(start_iterator, end_iterator);
828 
829  // Now that we have the iterator range, iterate in it and get the mass
830  // spectra from each item's pair.second node vector.
831 
832  for(MapIterator iterator = start_iterator; iterator != end_iterator;
833  ++iterator)
834  {
835  // We are iterating in MapPair items.
836 
837  NodeVector node_vector = iterator->second;
838 
839  // All the nodes in the node vector need to be copied to the
840  // mass_spectra vector passed as parameter.
841 
842  for(auto &&node_p : node_vector)
843  {
844  mass_spectra.push_back(node_p->getQualifiedMassSpectrum());
845 
846  ++added_mass_spectra;
847  }
848  }
849 
850  //qDebug() << "Returning added_mass_spectra:" << added_mass_spectra;
851 
852  return added_mass_spectra;
853 }
854 
855 
856 std::size_t
858  double start,
859  double end,
860  QualMassSpectraVector &mass_spectra,
861  DataKind data_kind) const
862 {
863  using QualMassSpectraVectorIterator = QualMassSpectraVector::iterator;
864 
865  using DoubleNodeVectorMap = std::map<double, NodeVector>;
866  using MapIterator = DoubleNodeVectorMap::const_iterator;
867 
868  const DoubleNodeVectorMap *map_p;
869 
870  if(data_kind == DataKind::rt)
871  {
872  map_p = &m_rtDoubleNodeVectorMap;
873  }
874  else if(data_kind == DataKind::dt)
875  {
876  map_p = &m_dtDoubleNodeVectorMap;
877  }
878  else
879  qFatal("Programming error.");
880 
881  qDebug() << "The map has size:" << map_p->size() << "start:" << start
882  << "end:" << end;
883 
884  std::size_t removed_vector_items = 0;
885 
886  // We want to remove from the nodes vector all the nodes that contain a mass
887  // spectrum acquired at a time range outside of [ start-end ], that is, the
888  // time values [begin() - start [ and ]end -- end()[.
889 
890  // Looking for an iterator that points to an item having a time < start.
891 
892  // lower_bound returns an iterator pointing to the first element in the
893  // range [first, last) that is not less than (i.e. greater or equal to)
894  // value, or last if no such element is found.
895 
896  MapIterator first_end_iterator = (*map_p).lower_bound(start);
897 
898  // first_end_iterator points to the item that has the next time value with
899  // respect to start. This is fine because we'll not remove that point
900  // because the for loop below will stop one item short of
901  // first_end_iterator. That means that we effectively remove all the items
902  // [begin() -> start[ (start not include). Exactly what we want.
903 
904  qDebug() << "lower_bound for start:" << first_end_iterator->first;
905 
906  // Now that we have the first_end_iterator, we can iterate between [begin --
907  // first_end_iterator[
908 
909  for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
910  ++iterator)
911  {
912  // Remove from the nodes vector the nodes.
913 
914  // We are iterating in MapPair items.
915 
916  NodeVector node_vector = iterator->second;
917 
918  // All the nodes in the node vector need to be removed from the
919  // mass_spectra vector passed as parameter if found.
920 
921  for(auto &&node_p : node_vector)
922  {
923  QualMassSpectraVectorIterator iterator =
924  std::find(mass_spectra.begin(),
925  mass_spectra.end(),
926  node_p->getQualifiedMassSpectrum());
927 
928  if(iterator != mass_spectra.end())
929  {
930  // We found the mass spectrum: remove it.
931 
932  mass_spectra.erase(iterator);
933 
934  ++removed_vector_items;
935  }
936  }
937  }
938 
939  // Now the second begin iterator, so that we can remove all the items
940  // contained in the second range, that is, ]end--end()[.
941 
942  // The second_first_iterator will point to the item having its time value
943  // less or equal to end. But we do not want to get items having their time
944  // equal to end, only < end. So, if the iterator is not begin(), we just
945  // need to decrement it once.
946 
947  MapIterator second_first_iterator = map_p->upper_bound(end);
948 
949  // second_first_iterator now points to the item after the one having time
950  // end. Which is exactly what we want: we want to remove ]end--end()[ and
951  // this is exactly what the loop starting a the point after end below.
952 
953  qDebug() << "second_first_iterator for end:" << second_first_iterator->first;
954 
955  for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
956  ++iterator)
957  {
958  // We are iterating in MapPair items.
959 
960  NodeVector node_vector = iterator->second;
961 
962  // All the nodes in the node vector need to be removed from the
963  // mass_spectra vector passed as parameter if found.
964 
965  for(auto &&node_p : node_vector)
966  {
967  QualMassSpectraVectorIterator iterator =
968  std::find(mass_spectra.begin(),
969  mass_spectra.end(),
970  node_p->getQualifiedMassSpectrum());
971 
972  if(iterator != mass_spectra.end())
973  {
974  // We found the node: remove it.
975 
976  mass_spectra.erase(iterator);
977 
978  ++removed_vector_items;
979  }
980  }
981  }
982 
983  return removed_vector_items;
984 }
985 
986 
987 std::size_t
989 {
990  // We want to know what is the depth of the tree, that is the highest level
991  // of MSn, that is, n.
992 
993  if(!m_rootNodes.size())
994  return 0;
995 
996  // qDebug() << "There are" << m_rootNodes.size() << "root nodes";
997 
998  // By essence, we are at MS0: only if we have at least one root node do we
999  // know we have MS1 data. So we already know that we have at least one
1000  // child, so start with depth 1.
1001 
1002  std::size_t depth = 1;
1003  std::size_t tmp_depth = 0;
1004  std::size_t greatest_depth = 0;
1005 
1006  for(auto &node : m_rootNodes)
1007  {
1008  tmp_depth = node->depth(depth);
1009 
1010  // qDebug() << "Returned depth:" << tmp_depth;
1011 
1012  if(tmp_depth > greatest_depth)
1013  greatest_depth = tmp_depth;
1014  }
1015 
1016  return greatest_depth;
1017 }
1018 
1019 
1020 std::size_t
1022 {
1023 
1024  std::size_t cumulative_node_count = 0;
1025 
1026  for(auto &node : m_rootNodes)
1027  {
1028  node->size(cumulative_node_count);
1029 
1030  // qDebug() << "Returned node_count:" << node_count;
1031  }
1032 
1033  return cumulative_node_count;
1034 }
1035 
1036 
1037 std::size_t
1039 {
1040  return m_indexNodeMap.size();
1041 }
1042 
1043 
1044 std::size_t
1046 {
1047  return m_spectrumCount;
1048 }
1049 
1050 
1051 } // namespace pappso
pappso::MsRunDataSetTree::precursorNodesByPrecursorMz
std::vector< MsRunDataSetTreeNode * > precursorNodesByPrecursorMz(pappso_double mz, PrecisionPtr precision_ptr)
Definition: msrundatasettree.cpp:410
pappso::MsRunDataSetTree::findNode
MsRunDataSetTreeNode * findNode(QualifiedMassSpectrumCstSPtr mass_spectrum_csp) const
Definition: msrundatasettree.cpp:249
pappso::MsRunDataSetTree::flattenedViewMsLevel
std::vector< MsRunDataSetTreeNode * > flattenedViewMsLevel(std::size_t ms_level, bool with_descendants=false)
Definition: msrundatasettree.cpp:302
pappso::pappso_double
double pappso_double
A type definition for doubles.
Definition: types.h:67
pappso::MsRunDataSetTree::getIndexNodeMap
const std::map< std::size_t, MsRunDataSetTreeNode * > & getIndexNodeMap() const
Definition: msrundatasettree.cpp:147
pappso::MsRunDataSetTree::indexNodeMapSize
std::size_t indexNodeMapSize() const
Definition: msrundatasettree.cpp:1038
pappso::MsRunDataSetTreeNodeVisitorInterface::shouldStop
virtual bool shouldStop() const =0
pappso::MsRunDataSetTree::size
std::size_t size() const
Definition: msrundatasettree.cpp:1021
pappso
Definition: aa.cpp:38
pappso::MsRunDataSetTreeNode
Definition: msrundatasettreenode.h:32
pappso::MsRunDataSetTreeNode::m_children
std::vector< MsRunDataSetTreeNode * > m_children
Definition: msrundatasettreenode.h:95
pappso::DataKind
DataKind
Definition: types.h:189
pappso::MsRunDataSetTree::m_rootNodes
std::vector< MsRunDataSetTreeNode * > m_rootNodes
Definition: msrundatasettree.h:118
pappso::MsRunIdCstSPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:63
pappso::MsRunDataSetTree::NodeVector
std::vector< MsRunDataSetTreeNode * > NodeVector
Definition: msrundatasettree.h:86
pappso::MsRunDataSetTreeNode::mcsp_massSpectrum
QualifiedMassSpectrumCstSPtr mcsp_massSpectrum
Definition: msrundatasettreenode.h:91
pappso::DataKind::rt
Retention time.
pappso::MsRunDataSetTree::getRootNodes
const std::vector< MsRunDataSetTreeNode * > & getRootNodes() const
Definition: msrundatasettree.cpp:187
pappso::MsRunDataSetTreeNodeVisitorInterface::setNodesToProcessCount
virtual void setNodesToProcessCount(std::size_t)=0
pappso::ExceptionNotPossible
Definition: exceptionnotpossible.h:50
pappso::MsRunDataSetTree::massSpectrumIndex
std::size_t massSpectrumIndex(const MsRunDataSetTreeNode *node) const
Definition: msrundatasettree.cpp:154
pappso::MsRunDataSetTree::QualMassSpectraVector
std::vector< QualifiedMassSpectrumCstSPtr > QualMassSpectraVector
Definition: msrundatasettree.h:87
pappso::MsRunDataSetTree::addDataSetQualMassSpectraInsideDtRtRange
std::size_t addDataSetQualMassSpectraInsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
Definition: msrundatasettree.cpp:766
pappso::MsRunDataSetTree::DoubleNodeVectorMap
std::map< double, NodeVector > DoubleNodeVectorMap
Definition: msrundatasettree.h:89
pappso::MsRunDataSetTree::documentNodeInDtRtMap
bool documentNodeInDtRtMap(double time, MsRunDataSetTreeNode *node_p, DataKind data_kind)
Definition: msrundatasettree.cpp:452
pappso::MsRunDataSetTree::accept
void accept(MsRunDataSetTreeNodeVisitorInterface &visitor)
Definition: msrundatasettree.cpp:194
pappso::MsRunDataSetTree::productNodesByPrecursorSpectrumIndex
std::vector< MsRunDataSetTreeNode * > productNodesByPrecursorSpectrumIndex(std::size_t precursor_spectrum_index)
Definition: msrundatasettree.cpp:390
msrundatasettree.h
pappso::MsRunDataSetTree::getSpectrumCount
std::size_t getSpectrumCount() const
Definition: msrundatasettree.cpp:1045
pappso::MsRunDataSetTree::precursorNodeByProductSpectrumIndex
MsRunDataSetTreeNode * precursorNodeByProductSpectrumIndex(std::size_t product_spectrum_index)
Definition: msrundatasettree.cpp:370
pappso::MsRunDataSetTreeNode::findNode
MsRunDataSetTreeNode * findNode(std::size_t spectrum_index)
Definition: msrundatasettreenode.cpp:133
pappso::MsRunDataSetTree::MsRunDataSetTree
MsRunDataSetTree(MsRunIdCstSPtr ms_run_id_csp)
Definition: msrundatasettree.cpp:19
pappso::QualifiedMassSpectrumCstSPtr
std::shared_ptr< const QualifiedMassSpectrum > QualifiedMassSpectrumCstSPtr
Definition: qualifiedmassspectrum.h:61
pappso::PrecisionPtr
const typedef PrecisionBase * PrecisionPtr
Definition: precision.h:141
pappso::MsRunDataSetTree::addDataSetTreeNodesInsideDtRtRange
std::size_t addDataSetTreeNodesInsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
Definition: msrundatasettree.cpp:598
pappso::MsRunDataSetTree::m_spectrumCount
std::size_t m_spectrumCount
Definition: msrundatasettree.h:116
pappso::MsRunDataSetTreeNodeVisitorInterface
Definition: msrundatasettreevisitor.h:23
pappso::MsRunDataSetTree::flattenedView
std::vector< MsRunDataSetTreeNode * > flattenedView()
Definition: msrundatasettree.cpp:285
pappso::MsRunDataSetTree::removeDataSetQualMassSpectraOutsideDtRtRange
std::size_t removeDataSetQualMassSpectraOutsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
Definition: msrundatasettree.cpp:857
pappso::PrecisionUnit::mz
pappso::MsRunDataSetTree::m_dtDoubleNodeVectorMap
DoubleNodeVectorMap m_dtDoubleNodeVectorMap
Definition: msrundatasettree.h:124
pappso::MsRunDataSetTree::m_indexNodeMap
std::map< std::size_t, MsRunDataSetTreeNode * > m_indexNodeMap
Definition: msrundatasettree.h:119
pappso::DataKind::dt
Drift time.
pappso::MsRunDataSetTree::removeDataSetTreeNodesOutsideDtRtRange
std::size_t removeDataSetTreeNodesOutsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
Definition: msrundatasettree.cpp:660
pappso::MsRunDataSetTree::m_rtDoubleNodeVectorMap
DoubleNodeVectorMap m_rtDoubleNodeVectorMap
Definition: msrundatasettree.h:125
pappso::MsRunDataSetTree::~MsRunDataSetTree
virtual ~MsRunDataSetTree()
Definition: msrundatasettree.cpp:25
pappso::MsRunDataSetTree::addMassSpectrum
MsRunDataSetTreeNode * addMassSpectrum(QualifiedMassSpectrumCstSPtr mass_spectrum)
Definition: msrundatasettree.cpp:53
pappso::MsRunDataSetTree::depth
std::size_t depth() const
Definition: msrundatasettree.cpp:988