libpappsomspp
Library for mass spectrometry
filterlowintensitysignalremoval.cpp
Go to the documentation of this file.
1/* BEGIN software license
2 *
3 * msXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2021 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the msXpertSuite project.
10 *
11 * The msXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34#include <qmath.h>
35
36#include <limits>
37#include <iterator>
38
39#include <QVector>
40#include <QDebug>
41
42#include "../../exception/exceptionnotrecognized.h"
43
45#include "../../trace/maptrace.h"
46
47
48namespace pappso
49{
50
51
53 double mean, double std_dev, double threshold)
54{
55 m_noiseMean = mean;
56 m_noiseStdDev = std_dev;
57 m_threshold = threshold;
58}
59
60
62 const QString &parameters)
63{
64 buildFilterFromString(parameters);
65}
66
67
70{
74}
75
76
78{
79}
80
81
85{
86 if(&other == this)
87 return *this;
88
92
93 return *this;
94}
95
96
97void
99 const QString &parameters)
100{
101 // Typical string: "FloorAmplitudePercentage|15"
102 if(parameters.startsWith(QString("%1|").arg(name())))
103 {
104 QStringList params = parameters.split("|").back().split(";");
105
106 m_noiseMean = params.at(0).toDouble();
107 m_noiseStdDev = params.at(1).toDouble();
108 m_threshold = params.at(2).toDouble();
109 }
110 else
111 {
113 QString(
114 "Building of FilterLowIntensitySignalRemoval from string %1 failed")
115 .arg(parameters));
116 }
117}
118
119
120std::size_t
122{
123 // We want to iterate in the trace and check if we can get the apicess
124 // isolated one by one. An apex is nothing more than the top cusp of a
125 // triangle. We only store apices that have a value > m_threshold.
126
127 m_clusters.clear();
128
129 std::size_t trace_size = trace.size();
130 if(trace_size <= 2)
131 {
132 //qDebug() << "The original trace has less than 3 points. Returning it "
133 //"without modification.";
134 return m_clusters.size();
135 }
136 //else
137 //qDebug() << "The original trace has" << trace_size << "data points";
138
139 // Seed the system with the first point of the trace.
140 m_curIter = trace.cbegin();
141 m_prevIter = trace.cbegin();
142
143 //qDebug() << "First trace point: "
144 //<< "(" << m_prevIter->x << "," << m_prevIter->y << ");";
145
146 // We still have not found any apex!
147 m_prevApex = trace.cend();
148
149 // We will need to know if we were ascending to an apex.
150 bool was_ascending_to_apex = false;
151
152 // Prepare a first apex cluster.
153 ApicesSPtr cluster_apices = std::make_shared<ClusterApices>();
154
155 // Now that we have seeded the system with the first point of the original
156 // trace, go to the next one.
157 ++m_curIter;
158
159 // And now iterate in the original trace and make sure we detect all the
160 // apicess.
161
162 while(m_curIter != trace.cend())
163 {
164 //qDebug() << "Current trace point: "
165 //<< "(" << m_curIter->x << "," << m_curIter->y << ");";
166
167 // We monitor if we are going up or down a peak.
168
169 if(m_curIter->y > m_prevIter->y)
170 // We are ascending a peak. We do not know if we are at the apex.
171 {
172 //qDebug().noquote() << "We are ascending to an apex.\n";
173 was_ascending_to_apex = true;
174 }
175 else
176 // We are descending a peak.
177 {
178 //qDebug().noquote() << "Descending a peak. ";
179
180 // There are two situations:
181 //
182 // 1. Either we were ascending to an apex, and m_prev is that apex,
183 //
184 // 2. Or we were not ascending to an apex and in fact all we are doing
185 // is going down an apex that occurred more than one trace point ago.
186
187 if(!was_ascending_to_apex)
188 // We were not ascending to an apex.
189 {
190 // Do nothing here.
191
192 //qDebug().noquote()
193 //<< "But, we were not ascending to an apex, so do nothing.\n";
194 }
195 else
196 // We are effectively descending a peak right after the apex was
197 // reached at the previous iteration.
198 {
200
201 //qDebug().noquote()
202 //<< "And, we were ascending to an apex, so "
203 //"m_curApex has become m_prevIter: ("
204 //<< m_curApex->x << "," << m_curApex->y << ")\n";
205
206 // We might have two situations:
207
208 // 1. We had already encountered an apex.
209 // 2. We had not yet encountered an apex.
210
211 if(m_prevApex != trace.cend())
212 // We had already encountered an apex.
213 {
214 // Was that apex far on the left of the current apex ?
215
216 if(m_curApex->x - m_prevApex->x >
218 // The distance is not compatible with both apices to belong
219 // to the same apices cluster.
220 {
221 // We are creating a new isotopic apices.
222
223 // But, since another apex had been encountered already,
224 // that means that an isotopic apices was cooking
225 // already. We must store it.
226
227 if(!cluster_apices->size())
228 qFatal("Cannot be that the apices has no apex.");
229
230 // Store the crafted apices cluster.
231 m_clusters.push_back(cluster_apices);
232
233 //qDebug().noquote()
234 //<< "There was a previous apex already, BUT "
235 //"outside of apices range. "
236 //"Pushing the cooking apices that has size:"
237 //<< cluster_apices->size();
238
239 // Now create a brand new apices for later work.
240 cluster_apices = std::make_shared<ClusterApices>();
241
242 //qDebug() << "Created a brand new apices.";
243
244 // We only start the new apices with the current apex if
245 // that apex is above the threshold.
246
247 if(m_curApex->y > m_threshold)
248 {
249 // And start it with the current apex.
250 cluster_apices->push_back(m_curApex);
251
252 //qDebug()
253 //<< "Since the current apex is above the threshold, "
254 //"we PUSH it to the newly created apices: ("
255 //<< m_curApex->x << "," << m_curApex->y << ")";
256
258
259 //qDebug() << "Set prev apex to be cur apex.";
260 }
261 else
262 {
263 //qDebug()
264 //<< "Since the current apex is below the threshold, "
265 //"we DO NOT push it to the newly created "
266 //"apices: ("
267 //<< m_curApex->x << "," << m_curApex->y << ")";
268
269 // Since previous apex went to the closed apices, we
270 // need to reset it.
271
272 m_prevApex = trace.cend();
273
274 //qDebug() << "Since the previous apex went to the "
275 //"closed apices, and cur apex has too "
276 //"small an intensity, we reset prev apex "
277 //"to trace.cend().";
278 }
279 }
280 else
281 // The distance is compatible with both apices to belong to
282 // the same isotopic apices.
283 {
284
285 // But we only push back the current apex to the apices
286 // if its intensity is above the threshold.
287
288 if(m_curApex->y > m_threshold)
289 // The current apex was above the threshold
290 {
291 cluster_apices->push_back(m_curApex);
292
293 //qDebug().noquote()
294 //<< "There was an apex already inside of apices "
295 //"range. "
296 //"AND, since the current apex was above the "
297 //"threshold, we indeed push it to apices.\n";
298
299 //qDebug().noquote()
300 //<< "Current apex PUSHED: " << m_curApex->x << ", "
301 //<< m_curApex->y;
302
304
305 //qDebug() << "We set prev apex to be cur apex.";
306 }
307 else
308 {
309 //qDebug().noquote()
310 //<< "There was an apex already inside of apices "
311 //"range. "
312 //"BUT, since the current apex was below the "
313 //"threshold, we do not push it to apices.\n";
314
315 //qDebug().noquote()
316 //<< "Current apex NOT pushed: " << m_curApex->x
317 //<< ", " << m_curApex->y;
318 }
319 }
320 }
321 else
322 // No apex was previously found. We are fillin-up a new isotopic
323 // apices.
324 {
325 if(m_curApex->y > m_threshold)
326 // We can actually add that apex to a new isotopic apices.
327 {
328 if(cluster_apices->size())
329 qCritical(
330 "At this point, the apices should be new and "
331 "empty.");
332
333 cluster_apices->push_back(m_curApex);
334
335 //qDebug().noquote()
336 //<< "No previous apex was found. Since current apex' "
337 //"intensity is above threshold, we push it back to "
338 //"the "
339 //"apices.\n";
340
341 // Store current apex as previous apex for next rounds.
343
344 //qDebug().noquote() << "And thus we store the current "
345 //"apex as previous apex.\n";
346 }
347 else
348 {
349 //qDebug().noquote()
350 //<< "No previous apex was found. Since current apex' "
351 //"intensity is below threshold, we do nothing.\n";
352 }
353 }
354 }
355 // End of
356 // ! if(!was_ascending_to_apex)
357 // That is, we were ascending to an apex.
358
359 // Tell what it is: we were not ascending to an apex.
360 was_ascending_to_apex = false;
361 }
362 // End of
363 // ! if(m_curIter->y > m_prevIter->y)
364 // That is, we are descending a peak.
365
366 // At this point, prepare next round.
367
368 //qDebug().noquote()
369 //<< "Preparing next round, with m_prevIter = m_curIter and ++index.\n";
370
372
373 ++m_curIter;
374 }
375 // End of
376 // while(index < trace_size)
377
378 // At this point, if a apices had been cooking, add it.
379
380 if(cluster_apices->size())
381 m_clusters.push_back(cluster_apices);
382
383 return m_clusters.size();
384}
385
386
387Trace::const_iterator
389 Trace::const_iterator iter,
390 double distance_threshold)
391{
392 // We receive an iterator that points to an apex. We want iterate back in the
393 // trace working copy and look if there are apices that are distant less than
394 // 1.1~Th.
395
396 Trace::const_iterator init_iter = iter;
397
398 if(iter == trace.cbegin())
399 return iter;
400
401 // Seed the previous iter to iter, because we'll move from there right away.
402 Trace::const_iterator prev_iter = init_iter;
403
404 Trace::const_iterator last_apex_iter = init_iter;
405
406 // We will need to know if we were ascending to an apex.
407 bool was_ascending_to_apex = false;
408
409 // Now that we have seeded the system, we can move iter one data point:
410 --iter;
411
412 while(iter != trace.cbegin())
413 {
414 // If we are already outside of distance_threshold, return the last apex
415 // that was found (or initial iter if none was encountered)
416
417 if(abs(init_iter->x - iter->x) >= distance_threshold)
418 return last_apex_iter;
419
420 if(iter->y > prev_iter->y)
421 {
422 // New data point has greater intensity. Just record that fact.
423 was_ascending_to_apex = true;
424 }
425 else
426 {
427 // New data point has smaller intensity. We are descending a peak.
428
429 if(was_ascending_to_apex)
430 {
431 // We had an apex at previous iter. We are inside the distance
432 // threshold. That is good. But we still could find another apex
433 // while moving along the trace that is in the distance threshold.
434 // This is why we keep going, but we store the previous iter as
435 // the last encountered apex.
436
437 last_apex_iter = prev_iter;
438 }
439 }
440
441 prev_iter = iter;
442
443 // Move.
444 --iter;
445 }
446
447 //qDebug() << "Init m/z: " << init_iter->x
448 //<< "Returning m/z: " << last_apex_iter->x
449 //<< "at distance:" << std::distance(last_apex_iter, init_iter);
450
451 // At this point last_apex_iter is the same as the initial iter.
452 return last_apex_iter;
453}
454
455
456Trace::const_iterator
458 Trace::const_iterator iter,
459 double distance_threshold)
460{
461 // We receive an iterator that points to an apex. We want iterate back in the
462 // trace working copy and look if there are apices that are distant less than
463 // 1.1~Th.
464
465 Trace::const_iterator init_iter = iter;
466
467 if(iter == trace.cend())
468 return iter;
469
470 // Seed the previous iter to iter, because we'll move from there right away.
471 Trace::const_iterator prev_iter = init_iter;
472
473 Trace::const_iterator last_apex_iter = init_iter;
474
475 // We will need to know if we were ascending to an apex.
476 bool was_ascending_to_apex = false;
477
478 // Now that we have seeded the system, we can move iter one data point:
479 ++iter;
480
481 while(iter != trace.cend())
482 {
483 // If we are already outside of distance_threshold, return the last apex
484 // that was found (or initial iter if none was encountered)
485
486 // FIXME: maybe we should compare prev_iter->x with iter->x so that we
487 // continue moving if all the succcessive apices found are each one in the
488 // distance_threshold from the previous one ?
489
490 if(abs(init_iter->x - iter->x) >= distance_threshold)
491 return last_apex_iter;
492
493 if(iter->y > prev_iter->y)
494 {
495 // New data point has greater intensity. Just record that fact.
496 was_ascending_to_apex = true;
497 }
498 else
499 {
500 // New data point has smaller intensity. We are descending a peak.
501
502 if(was_ascending_to_apex)
503 {
504 // We had an apex at previous iter. We are inside the distance
505 // threshold. That is good. But we still could find another apex
506 // while moving along the trace that is in the distance threshold.
507 // This is why we keep going, but we store the previous iter as
508 // the last encountered apex.
509
510 last_apex_iter = prev_iter;
511 }
512 }
513
514 prev_iter = iter;
515
516 // Move.
517 ++iter;
518 }
519
520 // At this point last_apex_iter is the same as the initial iter.
521 return last_apex_iter;
522}
523
524
525Trace
527{
528 // We start from the vector of apices and try to remake a high resolution
529 // trace out of these apices.
530
531 MapTrace map_trace;
532
533 // The general idea is that apices were detected, and only apices having
534 // their intensity above the threshold were stored. That means that we need to
535 // add points to the trace to reconstruct a meaningful trace. Indeed, imagine
536 // a heavy peptide isotopic cluster: the first peak's apex is below threshold
537 // and was not stored. The second peak is also below. But the third isotopic
538 // cluster peak is above and was stored.
539 //
540 // How do we reconstruct the trace to have all these points that were
541 // preceding the first isotopic cluster apex that was detected.
542 //
543 // The same applies to the last peaks of the cluster that are below the
544 // threshold whil the preceeding ones were above!
545
546 // The general idea is to iterate in the vector of apices and for each apex
547 // that is encountered, ask if there were apices
548
549 // m_clusters is a vector that contains vectors of Trace::const_iter.
550 // Each vector in m_clusters should represent all the apices of a cluster.
551
552 //qDebug() << "Reconstructing trace with " << m_clusters.size() << "clusters.";
553
554 Trace::const_iterator left_begin_iter = trace.cend();
555 Trace::const_iterator right_end_iter = trace.cend();
556
557 for(auto &&cluster_apices : m_clusters)
558 {
559 // cluster_apices is a vector of Trace::const_iterator. If we want to
560 // reconstruct the Trace, we need to iterate through all the DataPoint
561 // objects in between cluster_apices.begin() and cluster_apices.end().
562
563 Trace::const_iterator begin_iter = *(cluster_apices->begin());
564 Trace::const_iterator end_iter = *(std::prev(cluster_apices->end()));
565
566 //qDebug() << "Iterating in a new cluster apices with boundaries:"
567 //<< begin_iter->x << "-" << end_iter->x;
568
569 left_begin_iter =
571
572 //qDebug() << "After backwardFindApex, left_begin_iter points to:"
573 //<< left_begin_iter->toString() << "with distance:"
574 //<< std::distance(left_begin_iter, begin_iter);
575
576 right_end_iter = forwardFindApex(
577 trace, end_iter, 1.5 * INTRA_CLUSTER_INTER_PEAK_DISTANCE);
578
579 for(Trace::const_iterator iter = left_begin_iter; iter != right_end_iter;
580 ++iter)
581 {
582 map_trace[iter->x] = iter->y;
583 }
584
585 // Now reset the left and right iters to intensity 0 to avoid having
586 // disgraceful oblique lines connnecting trace segments.
587
588 map_trace[left_begin_iter->x] = 0;
589 map_trace[std::prev(right_end_iter)->x] = 0;
590
591 }
592
593 return map_trace.toTrace();
594}
595
596
597Trace &
599{
600 //qDebug();
601
602 // Horrible hack to have a non const filtering process.
603 return const_cast<FilterLowIntensitySignalRemoval *>(this)->nonConstFilter(
604 trace);
605}
606
607
608Trace &
610{
611 //qDebug();
612
613 if(trace.size() <= 2)
614 {
615 //qDebug() << "The original trace has less than 3 points. Returning it "
616 //"without modification.";
617 return trace;
618 }
619 //else
620 //qDebug() << "The original trace had" << trace.size() << "data points";
621
622 std::size_t cluster_count = detectClusterApices(trace);
623
624 //qDebug() << "Number of detected cluster apices: " << cluster_count;
625
626 if(!cluster_count)
627 return trace;
628
629 // At this point we want to work on the apices and reconstruct a full
630 // trace.
631
632 Trace reconstructed_trace = reconstructTrace(trace);
633
634 trace = std::move(reconstructed_trace);
635
636 return trace;
637}
638
639
640double
642{
643 return m_threshold;
644}
645
646
647//! Return a string with the textual representation of the configuration data.
648QString
650{
651 return QString("%1|%2").arg(name()).arg(QString::number(m_threshold, 'f', 2));
652}
653
654
655QString
657{
658 return "FilterLowIntensitySignalRemoval";
659}
660
661} // namespace pappso
excetion to use when an item type is not recognized
Redefines the floor intensity of the Trace.
Trace::const_iterator backwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval(double mean, double std_dev, double threshold)
Trace & filter(Trace &data_points) const override
Trace::const_iterator forwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval & operator=(const FilterLowIntensitySignalRemoval &other)
void buildFilterFromString(const QString &strBuildParams) override
build this filter using a string
QString toString() const override
Return a string with the textual representation of the configuration data.
Trace toTrace() const
Definition: maptrace.cpp:219
A simple container of DataPoint instances.
Definition: trace.h:148
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39