libpappsomspp
Library for mass spectrometry
pappso::specpeptidoms::SemiGlobalAlignment Class Reference

#include <semiglobalalignment.h>

Public Member Functions

 SemiGlobalAlignment (ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, AaCode &aaCode)
 
 ~SemiGlobalAlignment ()
 
void fastAlign (const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id)
 perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations. More...
 
void preciseAlign (const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
 performs the second alignment search between a protein subsequence and a spectrum. IMPLEMENTATION MATRICE DES ORIGINES => ARBRE ? More...
 
void postProcessingAlign (const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
 performs the post-processing : generates corrected spectra and align them More...
 
LocationSaver getLocationSaver () const
 
Scenario getScenario () const
 
const AlignmentgetBestAlignment (const SpOMSSpectrumCsp &spectrum) const
 

Static Public Member Functions

static std::vector< double > getPotentialMassErrors (const Alignment &alignment, const QString &protein_seq)
 

Private Member Functions

void saveBestAlignment (const QString sequence, const SpOMSSpectrumCsp &spectrum, std::size_t offset)
 Stores the best alignment from m_scenario in m_best_alignment. More...
 
void correctAlign (const QString &protein_seq, const QString &protein_id, const SpOMSSpectrumCsp &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
 
void updateAlignmentMatrix (const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrumCsp &spectrum, const bool fast_align, const QString &protein)
 updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario. More...
 
bool perfectShiftPossible (const QString &sequence, const SpOMSSpectrumCsp &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
 indicates if a perfect shift is possible between the provided positions More...
 
std::size_t perfectShiftPossibleFrom0 (const QString &sequence, const SpOMSSpectrumCsp &spectrum, const std::size_t current_row, const std::size_t r_peak) const
 
std::size_t perfectShiftPossibleEnd (const QString &sequence, const SpOMSSpectrumCsp &spectrum, std::size_t end_row, std::size_t end_peak) const
 

Private Attributes

std::vector< KeyCellm_interest_cells
 
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
 
ScoreValuesm_scorevalues
 
const int min_score = 15
 
pappso::PrecisionPtr m_precision_ptr
 
AaCodem_aaCode
 
LocationSaver m_location_saver
 
Scenario m_scenario
 
Alignment m_best_alignment
 
Alignment m_best_corrected_alignment
 
Alignment m_best_post_processed_alignment
 

Detailed Description

Definition at line 67 of file semiglobalalignment.h.

Constructor & Destructor Documentation

◆ SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::SemiGlobalAlignment ( ScoreValues score_values,
const pappso::PrecisionPtr  precision_ptr,
pappso::AaCode aaCode 
)

Default constructor

Definition at line 40 of file semiglobalalignment.cpp.

42 : m_scorevalues(score_values), m_aaCode(aaCode)
43{
44 m_precision_ptr = precision_ptr;
45 m_interest_cells.push_back({0, 0, 0, 0});
46}

References m_interest_cells, and m_precision_ptr.

◆ ~SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::~SemiGlobalAlignment ( )

Destructor

Definition at line 581 of file semiglobalalignment.cpp.

582{
583}

Member Function Documentation

◆ correctAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::correctAlign ( const QString &  protein_seq,
const QString &  protein_id,
const SpOMSSpectrumCsp spectrum,
std::vector< std::size_t >  peaks_to_remove,
std::size_t  offset 
)
private

Definition at line 180 of file semiglobalalignment.cpp.

185{
186 Aa aa('A');
187 std::vector<AaPosition> aa_positions;
188 CorrectionTree correction_tree;
189
190 m_interest_cells.at(0).n_row = 0;
191 m_interest_cells.at(0).score = 0;
192 m_interest_cells.at(0).beginning = 0;
193 m_interest_cells.at(0).tree_id = 0;
194 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
195 {
196 m_interest_cells.at(i).n_row = 0;
198 m_interest_cells.at(i).beginning = 0;
199 m_interest_cells.at(i).tree_id = 0;
200 }
201
203 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
204 {
205 if(sequence[row_number - 1] == 'U' or sequence[row_number - 1] == 'X')
206 {
207 continue;
208 }
209 aa = Aa(sequence[row_number - 1].unicode());
210 aa_positions = spectrum->getAaPositions(aa, peaks_to_remove);
211 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_id);
212 }
213
214 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
215 // are generated and aligned. The best alignment is kept.
217 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
218 {
219 saveBestAlignment(sequence, spectrum, offset);
220 for(std::size_t iter : m_best_alignment.peaks)
221 {
222 if(iter > spectrum->getComplementaryPeak(iter))
223 {
224 break;
225 }
226 else if(std::find(m_best_alignment.peaks.begin(),
228 spectrum->getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
229 {
230 correction_tree.addPeaks(iter, spectrum->getComplementaryPeak(iter));
231 }
232 }
233 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
234 if(corrections.size() > 0)
235 {
236 for(auto peaks_to_remove : corrections)
237 {
238 correctAlign(sequence, protein_id, spectrum, peaks_to_remove, offset);
239 }
240 }
242 {
244 }
245 }
246}
void updateAlignmentMatrix(const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrumCsp &spectrum, const bool fast_align, const QString &protein)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void saveBestAlignment(const QString sequence, const SpOMSSpectrumCsp &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
void correctAlign(const QString &protein_seq, const QString &protein_id, const SpOMSSpectrumCsp &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
@ aa
best possible : more than one direct MS2 fragmentation in same MSRUN
const int MIN_ALIGNMENT_SCORE(15)
std::vector< std::size_t > peaks

References pappso::specpeptidoms::CorrectionTree::addPeaks(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::init, and pappso::specpeptidoms::MIN_ALIGNMENT_SCORE().

◆ fastAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::fastAlign ( const SpOMSSpectrumCsp spectrum,
const QString &  protein_seq,
const QString &  protein_id 
)

perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.

Parameters
proteinProtein to align
spectrumSpectrum to align

Definition at line 49 of file semiglobalalignment.cpp.

52{
53 std::size_t sequence_length = protein_seq_in.size();
54 QString protein_seq(protein_seq_in);
55 std::reverse(protein_seq.begin(), protein_seq.end());
56 pappso::Aa aa('A');
57 std::vector<AaPosition> aa_positions;
58 int score_found, score_shift, best_score, current_score;
59 std::size_t best_column;
60 m_interest_cells.at(0).n_row = 0;
61 m_interest_cells.at(0).score = 0;
62 m_interest_cells.at(0).beginning = 0;
63 m_interest_cells.at(0).tree_id = 0;
64
65 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
66 {
67 m_interest_cells.at(i).n_row = 0;
69 m_interest_cells.at(i).beginning = 0;
70 m_interest_cells.at(i).tree_id = 0;
71 }
72
73 for(std::size_t iter = m_interest_cells.size(); iter < spectrum->size(); iter++)
74 {
76 }
78 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
79 {
80 if(protein_seq[row_number - 1] == 'U' or protein_seq[row_number - 1] == 'X')
81 {
82 continue;
83 }
84 aa = Aa(protein_seq[row_number - 1].unicode());
85 aa_positions = spectrum->getAaPositions(aa);
86 updateAlignmentMatrix(protein_seq, row_number, aa_positions, spectrum, true, protein_id);
87 }
88}
Definition: aa.h:45

References pappso::specpeptidoms::init.

◆ getBestAlignment()

const pappso::specpeptidoms::Alignment & pappso::specpeptidoms::SemiGlobalAlignment::getBestAlignment ( const SpOMSSpectrumCsp spectrum) const

Definition at line 751 of file semiglobalalignment.cpp.

752{
753 return m_best_alignment;
754}

◆ getLocationSaver()

pappso::specpeptidoms::LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::getLocationSaver ( ) const

Definition at line 586 of file semiglobalalignment.cpp.

587{
588 return m_location_saver;
589}

◆ getPotentialMassErrors()

std::vector< double > pappso::specpeptidoms::SemiGlobalAlignment::getPotentialMassErrors ( const Alignment alignment,
const QString &  protein_seq 
)
static

Definition at line 757 of file semiglobalalignment.cpp.

759{
760 std::vector<double> potential_mass_errors(alignment.shifts);
761 double shift = alignment.end_shift;
762 std::size_t index = alignment.beginning - 1;
763 while(shift > 0 && index > 0)
764 {
765 potential_mass_errors.push_back(shift);
766 shift -= Aa(protein_seq.at(index).unicode()).getMass();
767 index--;
768 }
769 shift = alignment.begin_shift;
770 index = alignment.end + 1;
771 while(shift > 0 && index < (std::size_t)protein_seq.size())
772 {
773 potential_mass_errors.push_back(shift);
774 shift -= Aa(protein_seq.at(index).unicode()).getMass();
775 index++;
776 }
777 return potential_mass_errors;
778}

References pappso::specpeptidoms::Alignment::begin_shift, pappso::specpeptidoms::Alignment::beginning, pappso::specpeptidoms::Alignment::end, pappso::specpeptidoms::Alignment::end_shift, pappso::Aa::getMass(), pappso::specpeptidoms::shift, and pappso::specpeptidoms::Alignment::shifts.

◆ getScenario()

pappso::specpeptidoms::Scenario pappso::specpeptidoms::SemiGlobalAlignment::getScenario ( ) const

Definition at line 592 of file semiglobalalignment.cpp.

593{
594 return m_scenario;
595}

◆ perfectShiftPossible()

bool pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossible ( const QString &  sequence,
const SpOMSSpectrumCsp spectrum,
const std::size_t  origin_row,
const std::size_t  current_row,
const std::size_t  l_peak,
const std::size_t  r_peak 
) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
origin_rowbeginning row of the aa gap to verify (== index of the first missing aa in sequence)
current_rowrow being processed (== index of the current AaPosition in sequence)
l_peakleft peak index of the mz gap to verify
r_peakright peak index of the mz gap to verify

Definition at line 510 of file semiglobalalignment.cpp.

516{
517 double missing_mass = 0;
518 for(QString::const_iterator iter = sequence.begin() + origin_row;
519 iter != sequence.begin() + current_row;
520 iter++)
521 {
522 missing_mass += Aa(iter->unicode()).getMass();
523 }
524 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
525 return mz_range.contains(spectrum->getMZShift(l_peak, r_peak));
526}

References pappso::MzRange::contains(), and pappso::Aa::getMass().

◆ perfectShiftPossibleEnd()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleEnd ( const QString &  sequence,
const SpOMSSpectrumCsp spectrum,
std::size_t  end_row,
std::size_t  end_peak 
) const
private

Definition at line 555 of file semiglobalalignment.cpp.

560{
561 std::size_t perfect_shift_end = end_row + 1;
562 double missing_mass = spectrum->getMissingMass(end_peak);
563 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
564 double aa_mass = 0;
565 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
566 !mz_range.contains(aa_mass))
567 {
568 aa_mass += Aa(sequence.at(perfect_shift_end - 1).unicode()).getMass();
569 perfect_shift_end++;
570 }
571 if(mz_range.contains(aa_mass))
572 {
573 return perfect_shift_end - 1;
574 }
575 else
576 {
577 return end_row;
578 }
579}

References pappso::MzRange::contains(), and pappso::Aa::getMass().

◆ perfectShiftPossibleFrom0()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleFrom0 ( const QString &  sequence,
const SpOMSSpectrumCsp spectrum,
const std::size_t  current_row,
const std::size_t  r_peak 
) const
private

Definition at line 529 of file semiglobalalignment.cpp.

534{
535 std::size_t perfect_shift_origin = current_row;
536 double missing_mass = spectrum->getMZShift(0, r_peak);
537 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
538 double aa_mass = 0;
539 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.contains(aa_mass))
540 {
541 aa_mass += Aa(sequence.at(perfect_shift_origin - 1).unicode()).getMass();
542 perfect_shift_origin--;
543 }
544 if(mz_range.contains(aa_mass))
545 {
546 return perfect_shift_origin;
547 }
548 else
549 {
550 return current_row;
551 }
552}

References pappso::MzRange::contains(), and pappso::Aa::getMass().

◆ postProcessingAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::postProcessingAlign ( const SpOMSSpectrumCsp spectrum,
const QString &  protein_seq,
const QString &  protein_id,
std::size_t  beginning,
std::size_t  length,
const std::vector< double > &  shifts 
)

performs the post-processing : generates corrected spectra and align them

Parameters
shiftsList of potential precursor mass errors to test

Definition at line 249 of file semiglobalalignment.cpp.

255{
256 std::size_t current_SPC = m_best_alignment.SPC;
257 int current_best_score = m_best_alignment.score;
259 for(double precursor_mass_error : shifts)
260 {
261 SpOMSSpectrumCsp corrected_spectrum =
262 std::make_shared<SpOMSSpectrum>(SpOMSSpectrum(spectrum, precursor_mass_error));
263 preciseAlign(corrected_spectrum, protein_seq, protein_id, beginning, length);
265 {
267 }
268 }
269 if(m_best_post_processed_alignment.SPC > current_SPC &&
270 m_best_post_processed_alignment.score >= current_best_score)
271 {
273 }
274}
void preciseAlign(const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum....
std::shared_ptr< const SpOMSSpectrum > SpOMSSpectrumCsp
Definition: spomsspectrum.h:65

◆ preciseAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::preciseAlign ( const SpOMSSpectrumCsp spectrum,
const QString &  protein_seq,
const QString &  protein_id,
const std::size_t  beginning,
const std::size_t  length 
)

performs the second alignment search between a protein subsequence and a spectrum. IMPLEMENTATION MATRICE DES ORIGINES => ARBRE ?

Definition at line 91 of file semiglobalalignment.cpp.

96{
97 std::size_t length2;
98 if((qsizetype)(beginning + length) <= protein_seq.size())
99 {
100 length2 = length;
101 }
102 else
103 {
104 length2 = protein_seq.size() - beginning;
105 }
106
107 QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
108 std::reverse(sequence.begin(), sequence.end());
109 Aa aa('A');
110 std::vector<AaPosition> aa_positions;
111 CorrectionTree correction_tree;
112
113 m_scenario.reserve(length2 + 1, spectrum->size());
114 m_interest_cells.reserve(spectrum->size());
115 m_interest_cells.at(0).n_row = 0;
116 m_interest_cells.at(0).score = 0;
117 m_interest_cells.at(0).beginning = 0;
118 m_interest_cells.at(0).tree_id = 0;
119 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
120 {
121 m_interest_cells.at(i).n_row = 0;
123 m_interest_cells.at(i).beginning = 0;
124 m_interest_cells.at(i).tree_id = 0;
125 }
126 for(std::size_t iter = m_interest_cells.size(); iter < spectrum->size(); iter++)
127 {
128 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
129 }
131 for(std::size_t row_number = 1; row_number <= length2; row_number++)
132 {
133 if(sequence[row_number - 1] == 'U' or sequence[row_number - 1] == 'X')
134 {
135 continue;
136 }
137 aa = Aa(sequence[row_number - 1].unicode());
138 aa_positions = spectrum->getAaPositions(aa);
139 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_id);
140 }
141 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
142
143 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
144 // are generated and aligned. The best alignment is kept.
146 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
147 {
149 for(std::size_t iter : m_best_alignment.peaks)
150 {
151 if(iter > spectrum->getComplementaryPeak(iter))
152 {
153 break;
154 }
155 else if(std::find(m_best_alignment.peaks.begin(),
157 spectrum->getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
158 {
159 correction_tree.addPeaks(iter, spectrum->getComplementaryPeak(iter));
160 }
161 }
162 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
163 if(corrections.size() > 0)
164 {
165 std::cin.get();
166 m_best_alignment.score = 0; // Reset the best alignment score (we dont want to keep the
167 // original alignment if corrections are needed)
168 for(auto peaks_to_remove : corrections)
169 {
171 sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
172 }
173 std::cin.get();
175 }
176 }
177}
void reserve(std::size_t n_rows, std::size_t n_columns)
Allocate new storage to the backtracking matrix if needed.
Definition: scenario.cpp:65

References pappso::specpeptidoms::CorrectionTree::addPeaks(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::init, and pappso::specpeptidoms::MIN_ALIGNMENT_SCORE().

◆ saveBestAlignment()

void pappso::specpeptidoms::SemiGlobalAlignment::saveBestAlignment ( const QString  sequence,
const SpOMSSpectrumCsp spectrum,
std::size_t  offset 
)
private

Stores the best alignment from m_scenario in m_best_alignment.

Definition at line 598 of file semiglobalalignment.cpp.

601{
602 m_best_alignment.peaks.clear();
603 m_best_alignment.shifts.clear();
604 std::vector<QString> temp_interpretation;
605 std::size_t previous_row; // FIXME : may be used uninitialised
606 std::size_t previous_column = 0;
607 int score_found, score_shift;
608 std::size_t perfect_shift_end;
609 std::pair<std::vector<ScenarioCell>, int> best_alignment = m_scenario.getBestAlignment();
610 m_best_alignment.score = best_alignment.second;
611 QString skipped_aa;
612 double skipped_mass;
613 AlignType first_align_type;
614 // Retrieving beginning and end
615 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
616 m_best_alignment.end = offset - best_alignment.first.back().previous_row;
617
618 // Filling temp_interpretation and peaks vectors
619 for(auto cell : best_alignment.first)
620 {
621 switch(cell.alignment_type)
622 {
623 case AlignType::found:
624 temp_interpretation.push_back(sequence.at(previous_row - 1));
625 if(previous_row > cell.previous_row + 1)
626 {
627 skipped_mass = Aa(sequence.at(previous_row - 1).unicode()).getMass();
628 skipped_aa =
629 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
630 for(auto aa : skipped_aa)
631 {
632 temp_interpretation.push_back('[' + aa + ']');
633 skipped_mass += Aa(aa.unicode()).getMass();
634 }
635 temp_interpretation.push_back(
636 '[' +
637 QString::number(spectrum->getMZShift(cell.previous_column, previous_column) -
638 skipped_mass) +
639 ']');
640 }
641 m_best_alignment.peaks.push_back(cell.previous_column);
642 break;
644 temp_interpretation.push_back('[' + sequence.at(previous_row - 1) + ']');
645 break;
646 case AlignType::shift:
647 m_best_alignment.peaks.push_back(cell.previous_column);
648 temp_interpretation.push_back(sequence.at(previous_row - 1));
649 temp_interpretation.push_back(
650 '[' +
651 QString::number(spectrum->getMZShift(cell.previous_column, previous_column) -
652 Aa(sequence.at(previous_row - 1).unicode()).getMass()) +
653 ']');
654 m_best_alignment.shifts.push_back(
655 spectrum->getMZShift(cell.previous_column, previous_column) -
656 Aa(sequence.at(previous_row - 1).unicode()).getMass());
657 break;
659 m_best_alignment.peaks.push_back(cell.previous_column);
660 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
661 std::reverse(skipped_aa.begin(), skipped_aa.end());
662 temp_interpretation.push_back(skipped_aa);
663 break;
664 case AlignType::init:
665 previous_row = cell.previous_row;
666 previous_column = cell.previous_column;
667 m_best_alignment.peaks.push_back(cell.previous_column);
668 break;
669 }
670 previous_row = cell.previous_row;
671 previous_column = cell.previous_column;
672 }
673 std::reverse(temp_interpretation.begin(), temp_interpretation.end());
674 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
675
676 // Compute begin_shift and end_shift
677 MzRange zero(0, m_precision_ptr);
678 m_best_alignment.begin_shift = spectrum->getMZShift(0, m_best_alignment.peaks.front());
679 m_best_alignment.end_shift = spectrum->getMissingMass(m_best_alignment.peaks.back());
680 if(zero.contains(m_best_alignment.end_shift))
681 {
683 }
684
685 // Computing SPC
687 for(auto peak : m_best_alignment.peaks)
688 {
689 switch(spectrum->at(peak).type)
690 {
692 qDebug() << peak << "native";
694 break;
696 qDebug() << peak << "both";
698 break;
700 qDebug() << peak << "synthetic";
701 break;
703 qDebug() << peak << "symmetric";
705 break;
706 }
707 }
708
709 // Final check of the end shift
711 {
712 perfect_shift_end = perfectShiftPossibleEnd(sequence,
713 spectrum,
714 best_alignment.first.front().previous_row,
715 m_best_alignment.peaks.back());
716 if(perfect_shift_end != best_alignment.first.front().previous_row)
717 {
718 skipped_aa =
719 sequence.sliced(best_alignment.first.front().previous_row,
720 perfect_shift_end - best_alignment.first.front().previous_row);
721 for(auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
722 {
723 temp_interpretation.push_back('[' + *aa + ']');
724 }
725 m_best_alignment.beginning = offset - perfect_shift_end;
727 }
728 else
729 {
731 }
732 }
733
734 // Writing final interpretation
737 {
738 m_best_alignment.interpretation += '[' + QString::number(m_best_alignment.end_shift) + ']';
739 }
740 for(auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
741 {
743 }
745 {
746 m_best_alignment.interpretation += '[' + QString::number(m_best_alignment.begin_shift) + ']';
747 }
748}
std::pair< std::vector< ScenarioCell >, int > getBestAlignment() const
Returns the scenario cells corresponding to the best alignment and the best alignment's score.
Definition: scenario.cpp:72
std::size_t perfectShiftPossibleEnd(const QString &sequence, const SpOMSSpectrumCsp &spectrum, std::size_t end_row, std::size_t end_peak) const
@ synthetic
does not correspond to existing peak, for computational purpose
@ both
both, the ion and the complement exists in the original spectrum
@ symmetric
new peak : computed symmetric mass from a corresponding native peak

References pappso::specglob::both, pappso::MzRange::contains(), pappso::specpeptidoms::found, pappso::specpeptidoms::foundShift, pappso::Aa::getMass(), pappso::specpeptidoms::init, pappso::specglob::native, pappso::specpeptidoms::notFound, pappso::specpeptidoms::perfectShift, pappso::specpeptidoms::shift, pappso::specglob::symmetric, and pappso::specglob::synthetic.

◆ updateAlignmentMatrix()

void pappso::specpeptidoms::SemiGlobalAlignment::updateAlignmentMatrix ( const QString &  sequence,
const std::size_t  row_number,
const std::vector< AaPosition aa_positions,
const SpOMSSpectrumCsp spectrum,
const bool  fast_align,
const QString &  protein 
)
private

updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.

Parameters
sequenceReversed sequence of the protein being aligned
row_numbernumber of the row to update (== index in sequence of the amino acid being aligned)
aa_positionslist of the AaPositions of the current amino acid
spectrumSpectrum being aligned
fast_alignWhether to use the fast version of the algorithm (for 1st alignemnt step)

Definition at line 277 of file semiglobalalignment.cpp.

284{
285 int score_found, score_shift, best_score, current_score, alt_score, tree_id;
286 uint32_t condition; // FIXME : may be used uninitialised
287 std::size_t best_column, alt_column, shift, beginning, missing_aas, length, perfect_shift_origin;
288 KeyCell *current_cell_ptr, *tested_cell_ptr;
289 AlignType alignment_type, temp_align_type;
290
291 m_updated_cells.reserve(aa_positions.size());
292
293 // Computation of the threePeaks condition, see spomsspectrum.h for more details.
294 if(fast_align)
295 {
296 condition = 3;
297 if(row_number > 1)
298 {
299 condition += 2 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
300 qDebug() << "condition" << condition << sequence[row_number - 2]
301 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
302 }
303 }
304
305 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
306 aa_position != aa_positions.end();
307 aa_position++)
308 {
309 if(((condition & aa_position->condition) != 0) ||
310 !fast_align) // Verification of the threePeaks condition (only during first alignment).
311 {
312 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
313 if(spectrum->peakType(aa_position->r_peak) ==
315 {
318 }
319 else
320 {
321 score_found = m_scorevalues.get(ScoreType::found);
323 }
324
325 // not found case (always computed)
326 best_column = aa_position->r_peak;
327 best_score = current_cell_ptr->score + (row_number - current_cell_ptr->n_row) *
329 beginning = current_cell_ptr->beginning;
330 tree_id = current_cell_ptr->tree_id;
331 alignment_type = AlignType::notFound;
332
333 // found case (Can only happen if the left peak is supported)
334 if(aa_position->l_support)
335 {
336 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
337 if(aa_position->l_peak == 0)
338 {
339 alt_score = tested_cell_ptr->score + score_found;
340 }
341 else
342 {
343 if(tested_cell_ptr->n_row == row_number - 1)
344 {
345 alt_score = tested_cell_ptr->score +
346 (row_number - tested_cell_ptr->n_row - 1) *
348 score_found;
349 }
350 else
351 {
352 alt_score = tested_cell_ptr->score +
353 (row_number - tested_cell_ptr->n_row - 1) *
355 score_shift;
356 }
357 }
358 if(alt_score >= best_score)
359 {
360 alignment_type = AlignType::found;
361 best_score = alt_score;
362 best_column = aa_position->l_peak;
363 if(best_column == 0)
364 {
365 if(row_number < ALIGNMENT_SURPLUS)
366 {
367 beginning = 0;
368 }
369 else
370 {
371 beginning =
372 std::max((std::size_t)(row_number - ALIGNMENT_SURPLUS), (std::size_t)0);
373 }
374 if(fast_align)
375 {
376 tree_id = m_location_saver.getNextTree();
377 }
378 }
379 else
380 {
381 beginning = tested_cell_ptr->beginning;
382 tree_id = tested_cell_ptr->tree_id;
383 }
384 }
385 }
386
387 // generic shift case (all shifts are tested)
388 shift = 0;
389 while(shift < aa_position->next_l_peak)
390 {
391 tested_cell_ptr = &m_interest_cells.at(aa_position->next_l_peak - shift);
392 // verification saut parfait
393 if(perfectShiftPossible(sequence,
394 spectrum,
395 tested_cell_ptr->n_row,
396 row_number,
397 aa_position->next_l_peak - shift,
398 aa_position->r_peak))
399 {
400 alt_score = tested_cell_ptr->score +
401 (row_number - tested_cell_ptr->n_row - 1) *
403 score_found;
404 temp_align_type = AlignType::perfectShift;
405 }
406 else
407 {
408 alt_score = tested_cell_ptr->score +
409 (row_number - tested_cell_ptr->n_row - 1) *
411 score_shift;
412 temp_align_type = AlignType::shift;
413 }
414 if(alt_score > best_score)
415 {
416 alignment_type = temp_align_type;
417 best_score = alt_score;
418 best_column = aa_position->next_l_peak - shift;
419 beginning = tested_cell_ptr->beginning;
420 tree_id = tested_cell_ptr->tree_id;
421 }
422 shift++;
423 }
424
425 // case shift from column 0 (no penalties if all precedent amino acids are missed)
426 tested_cell_ptr = &m_interest_cells.at(0);
427 // verification saut parfait
428 perfect_shift_origin =
429 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
430 if(perfect_shift_origin != row_number)
431 {
432 alt_score = tested_cell_ptr->score + score_found;
433 temp_align_type = AlignType::perfectShift;
434 }
435 else
436 {
437 alt_score = tested_cell_ptr->score + score_shift;
438 temp_align_type = AlignType::shift;
439 }
440 if(alt_score > best_score)
441 {
442 alignment_type = temp_align_type;
443 best_score = alt_score;
444 best_column = 0;
445 missing_aas =
446 std::floor(spectrum->getMZShift(0, aa_position->l_peak) / m_aaCode.getMass('G'));
447 if(row_number < ALIGNMENT_SURPLUS + missing_aas)
448 {
449 beginning = 0;
450 }
451 else
452 {
453 beginning = std::max((std::size_t)(row_number - missing_aas - ALIGNMENT_SURPLUS),
454 (std::size_t)0);
455 }
456 if(fast_align)
457 {
458 tree_id = m_location_saver.getNextTree();
459 }
460 }
461 if(best_column != aa_position->r_peak)
462 {
463 m_updated_cells.push_back(
464 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
465 }
466 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
467 {
468 length =
469 row_number - beginning + 1 +
470 std::ceil(spectrum->getMissingMass(aa_position->r_peak) / Aa('G').getMass()) +
472 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
473 }
474 else if(!fast_align)
475 {
476 if(alignment_type == AlignType::perfectShift && best_column == 0)
477 {
478 m_scenario.saveOrigin(row_number,
479 aa_position->r_peak,
480 perfect_shift_origin,
481 0,
482 best_score,
484 }
485 else
486 {
487 m_scenario.saveOrigin(row_number,
488 aa_position->r_peak,
489 m_interest_cells.at(best_column).n_row,
490 best_column,
491 best_score,
492 alignment_type);
493 }
494 }
495 }
496 }
497
498 // Update row number in column 0
499 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
500
501 // Save updated key cells in the matrix
502 while(m_updated_cells.size() > 0)
503 {
504 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
505 m_updated_cells.pop_back();
506 }
507}
uint8_t getAaCode(char aa_letter) const
get the integer code of an amino acid with the one letter code
Definition: aacode.cpp:81
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
Definition: aacode.cpp:179
int getMinScore(int tree_id) const
Returns the minimum score for a location with the provided tree_id to be saved in the heap.
void addLocation(std::size_t beginning, std::size_t length, int tree, int score, const QString &protein)
Adds a location to the locations heap. If a saved location has the same tree_id, it will replace it....
std::size_t getNextTree()
Creates a new alignment tree and returns its id.
void saveOrigin(std::size_t current_row, std::size_t current_column, std::size_t previous_row, std::size_t previous_column, int score, AlignType alignment_type)
Stores the origin (cell location and alignment type) of the provided cell in the backtracking matrix.
Definition: scenario.cpp:41
bool perfectShiftPossible(const QString &sequence, const SpOMSSpectrumCsp &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
std::size_t perfectShiftPossibleFrom0(const QString &sequence, const SpOMSSpectrumCsp &spectrum, const std::size_t current_row, const std::size_t r_peak) const
const uint ALIGNMENT_SURPLUS(5)

References pappso::specpeptidoms::ALIGNMENT_SURPLUS(), pappso::specpeptidoms::KeyCell::beginning, pappso::specglob::both, pappso::specpeptidoms::found, pappso::specpeptidoms::foundDouble, pappso::specpeptidoms::foundShift, pappso::specpeptidoms::foundShiftDouble, pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::notFound, pappso::specpeptidoms::perfectShift, pappso::specpeptidoms::KeyCell::score, pappso::specpeptidoms::shift, and pappso::specpeptidoms::KeyCell::tree_id.

Member Data Documentation

◆ m_aaCode

AaCode& pappso::specpeptidoms::SemiGlobalAlignment::m_aaCode
private

Definition at line 126 of file semiglobalalignment.h.

◆ m_best_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_alignment
private

Definition at line 129 of file semiglobalalignment.h.

◆ m_best_corrected_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_corrected_alignment
private

Definition at line 129 of file semiglobalalignment.h.

◆ m_best_post_processed_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_post_processed_alignment
private

Definition at line 129 of file semiglobalalignment.h.

◆ m_interest_cells

std::vector<KeyCell> pappso::specpeptidoms::SemiGlobalAlignment::m_interest_cells
private

Definition at line 121 of file semiglobalalignment.h.

Referenced by SemiGlobalAlignment().

◆ m_location_saver

LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::m_location_saver
private

Definition at line 127 of file semiglobalalignment.h.

◆ m_precision_ptr

pappso::PrecisionPtr pappso::specpeptidoms::SemiGlobalAlignment::m_precision_ptr
private

Definition at line 125 of file semiglobalalignment.h.

Referenced by SemiGlobalAlignment().

◆ m_scenario

Scenario pappso::specpeptidoms::SemiGlobalAlignment::m_scenario
private

Definition at line 128 of file semiglobalalignment.h.

◆ m_scorevalues

ScoreValues& pappso::specpeptidoms::SemiGlobalAlignment::m_scorevalues
private

Definition at line 123 of file semiglobalalignment.h.

◆ m_updated_cells

std::vector<std::pair<std::size_t, KeyCell> > pappso::specpeptidoms::SemiGlobalAlignment::m_updated_cells
private

Definition at line 122 of file semiglobalalignment.h.

◆ min_score

const int pappso::specpeptidoms::SemiGlobalAlignment::min_score = 15
private

Definition at line 124 of file semiglobalalignment.h.


The documentation for this class was generated from the following files: