36#include "../../amino_acid/aacode.h"
37#include "../../protein/protein.h"
42 : m_scorevalues(score_values), m_aaCode(aaCode)
50 const QString &protein_seq_in,
51 const QString &protein_id)
53 std::size_t sequence_length = protein_seq_in.size();
54 QString protein_seq(protein_seq_in);
55 std::reverse(protein_seq.begin(), protein_seq.end());
57 std::vector<AaPosition> aa_positions;
58 int score_found, score_shift, best_score, current_score;
59 std::size_t best_column;
60 m_interest_cells.at(0).n_row = 0;
61 m_interest_cells.at(0).score = 0;
62 m_interest_cells.at(0).beginning = 0;
63 m_interest_cells.at(0).tree_id = 0;
65 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
67 m_interest_cells.at(i).n_row = 0;
69 m_interest_cells.at(i).beginning = 0;
70 m_interest_cells.at(i).tree_id = 0;
73 for(std::size_t iter = m_interest_cells.size(); iter < spectrum->size(); iter++)
75 m_interest_cells.push_back({0, m_scorevalues.get(
ScoreType::init), 0, 0});
77 m_location_saver.resetLocationSaver();
78 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
80 if(protein_seq[row_number - 1] ==
'U' or protein_seq[row_number - 1] ==
'X')
84 aa =
Aa(protein_seq[row_number - 1].unicode());
85 aa_positions = spectrum->getAaPositions(aa);
86 updateAlignmentMatrix(protein_seq, row_number, aa_positions, spectrum,
true, protein_id);
92 const QString &protein_seq,
93 const QString &protein_id,
94 const std::size_t beginning,
95 const std::size_t length)
98 if((qsizetype)(beginning + length) <= protein_seq.size())
104 length2 = protein_seq.size() - beginning;
107 QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
108 std::reverse(sequence.begin(), sequence.end());
110 std::vector<AaPosition> aa_positions;
113 m_scenario.reserve(length2 + 1, spectrum->size());
114 m_interest_cells.reserve(spectrum->size());
115 m_interest_cells.at(0).n_row = 0;
116 m_interest_cells.at(0).score = 0;
117 m_interest_cells.at(0).beginning = 0;
118 m_interest_cells.at(0).tree_id = 0;
119 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
121 m_interest_cells.at(i).n_row = 0;
123 m_interest_cells.at(i).beginning = 0;
124 m_interest_cells.at(i).tree_id = 0;
126 for(std::size_t iter = m_interest_cells.size(); iter < spectrum->size(); iter++)
128 m_interest_cells.push_back({0, m_scorevalues.get(
ScoreType::init), 0, 0});
130 m_scenario.resetScenario();
131 for(std::size_t row_number = 1; row_number <= length2; row_number++)
133 if(sequence[row_number - 1] ==
'U' or sequence[row_number - 1] ==
'X')
137 aa =
Aa(sequence[row_number - 1].unicode());
138 aa_positions = spectrum->getAaPositions(aa);
139 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum,
false, protein_id);
141 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
145 if(m_scenario.getBestScore() >
148 m_best_corrected_alignment.score = 0;
149 for(std::size_t iter : m_best_alignment.peaks)
151 if(iter > spectrum->getComplementaryPeak(iter))
155 else if(std::find(m_best_alignment.peaks.begin(),
156 m_best_alignment.peaks.end(),
157 spectrum->getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
159 correction_tree.
addPeaks(iter, spectrum->getComplementaryPeak(iter));
162 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
163 if(corrections.size() > 0)
166 m_best_alignment.score = 0;
168 for(
auto peaks_to_remove : corrections)
171 sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
174 m_best_alignment = m_best_corrected_alignment;
181 const QString &protein_id,
183 std::vector<std::size_t> peaks_to_remove,
187 std::vector<AaPosition> aa_positions;
190 m_interest_cells.at(0).n_row = 0;
191 m_interest_cells.at(0).score = 0;
192 m_interest_cells.at(0).beginning = 0;
193 m_interest_cells.at(0).tree_id = 0;
194 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
196 m_interest_cells.at(i).n_row = 0;
198 m_interest_cells.at(i).beginning = 0;
199 m_interest_cells.at(i).tree_id = 0;
202 m_scenario.resetScenario();
203 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
205 if(sequence[row_number - 1] ==
'U' or sequence[row_number - 1] ==
'X')
209 aa =
Aa(sequence[row_number - 1].unicode());
210 aa_positions = spectrum->getAaPositions(aa, peaks_to_remove);
211 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum,
false, protein_id);
216 if(m_scenario.getBestScore() >
219 saveBestAlignment(sequence, spectrum, offset);
220 for(std::size_t iter : m_best_alignment.peaks)
222 if(iter > spectrum->getComplementaryPeak(iter))
226 else if(std::find(m_best_alignment.peaks.begin(),
227 m_best_alignment.peaks.end(),
228 spectrum->getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
230 correction_tree.
addPeaks(iter, spectrum->getComplementaryPeak(iter));
233 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
234 if(corrections.size() > 0)
236 for(
auto peaks_to_remove : corrections)
238 correctAlign(sequence, protein_id, spectrum, peaks_to_remove, offset);
241 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
243 m_best_corrected_alignment = m_best_alignment;
250 const QString &protein_seq,
251 const QString &protein_id,
252 std::size_t beginning,
254 const std::vector<double> &shifts)
256 std::size_t current_SPC = m_best_alignment.SPC;
257 int current_best_score = m_best_alignment.score;
258 m_best_post_processed_alignment.score = 0;
259 for(
double precursor_mass_error : shifts)
262 std::make_shared<SpOMSSpectrum>(
SpOMSSpectrum(spectrum, precursor_mass_error));
263 preciseAlign(corrected_spectrum, protein_seq, protein_id, beginning, length);
264 if(m_best_alignment.score >= m_best_post_processed_alignment.score)
266 m_best_post_processed_alignment = m_best_alignment;
269 if(m_best_post_processed_alignment.SPC > current_SPC &&
270 m_best_post_processed_alignment.score >= current_best_score)
272 m_best_alignment = m_best_post_processed_alignment;
278 const QString &sequence,
279 const std::size_t row_number,
280 const std::vector<AaPosition> aa_positions,
282 const bool fast_align,
283 const QString &protein)
285 int score_found, score_shift, best_score, current_score, alt_score, tree_id;
287 std::size_t best_column, alt_column,
shift, beginning, missing_aas, length, perfect_shift_origin;
288 KeyCell *current_cell_ptr, *tested_cell_ptr;
289 AlignType alignment_type, temp_align_type;
291 m_updated_cells.reserve(aa_positions.size());
299 condition += 2 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
300 qDebug() <<
"condition" << condition << sequence[row_number - 2]
301 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
305 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
306 aa_position != aa_positions.end();
309 if(((condition & aa_position->condition) != 0) ||
312 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
313 if(spectrum->peakType(aa_position->r_peak) ==
326 best_column = aa_position->r_peak;
327 best_score = current_cell_ptr->
score + (row_number - current_cell_ptr->
n_row) *
330 tree_id = current_cell_ptr->
tree_id;
334 if(aa_position->l_support)
336 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
337 if(aa_position->l_peak == 0)
339 alt_score = tested_cell_ptr->
score + score_found;
343 if(tested_cell_ptr->
n_row == row_number - 1)
345 alt_score = tested_cell_ptr->
score +
346 (row_number - tested_cell_ptr->
n_row - 1) *
352 alt_score = tested_cell_ptr->
score +
353 (row_number - tested_cell_ptr->
n_row - 1) *
358 if(alt_score >= best_score)
361 best_score = alt_score;
362 best_column = aa_position->l_peak;
376 tree_id = m_location_saver.getNextTree();
382 tree_id = tested_cell_ptr->
tree_id;
389 while(shift < aa_position->next_l_peak)
391 tested_cell_ptr = &m_interest_cells.at(aa_position->next_l_peak -
shift);
393 if(perfectShiftPossible(sequence,
395 tested_cell_ptr->
n_row,
397 aa_position->next_l_peak -
shift,
398 aa_position->r_peak))
400 alt_score = tested_cell_ptr->
score +
401 (row_number - tested_cell_ptr->
n_row - 1) *
408 alt_score = tested_cell_ptr->
score +
409 (row_number - tested_cell_ptr->
n_row - 1) *
414 if(alt_score > best_score)
416 alignment_type = temp_align_type;
417 best_score = alt_score;
418 best_column = aa_position->next_l_peak -
shift;
420 tree_id = tested_cell_ptr->
tree_id;
426 tested_cell_ptr = &m_interest_cells.at(0);
428 perfect_shift_origin =
429 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
430 if(perfect_shift_origin != row_number)
432 alt_score = tested_cell_ptr->
score + score_found;
437 alt_score = tested_cell_ptr->
score + score_shift;
440 if(alt_score > best_score)
442 alignment_type = temp_align_type;
443 best_score = alt_score;
446 std::floor(spectrum->getMZShift(0, aa_position->l_peak) / m_aaCode.getMass(
'G'));
453 beginning = std::max((std::size_t)(row_number - missing_aas -
ALIGNMENT_SURPLUS),
458 tree_id = m_location_saver.getNextTree();
461 if(best_column != aa_position->r_peak)
463 m_updated_cells.push_back(
464 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
466 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
469 row_number - beginning + 1 +
470 std::ceil(spectrum->getMissingMass(aa_position->r_peak) /
Aa(
'G').getMass()) +
472 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
478 m_scenario.saveOrigin(row_number,
480 perfect_shift_origin,
487 m_scenario.saveOrigin(row_number,
489 m_interest_cells.at(best_column).n_row,
499 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
502 while(m_updated_cells.size() > 0)
504 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
505 m_updated_cells.pop_back();
512 const std::size_t origin_row,
513 const std::size_t current_row,
514 const std::size_t l_peak,
515 const std::size_t r_peak)
const
517 double missing_mass = 0;
518 for(QString::const_iterator iter = sequence.begin() + origin_row;
519 iter != sequence.begin() + current_row;
522 missing_mass +=
Aa(iter->unicode()).
getMass();
525 return mz_range.
contains(spectrum->getMZShift(l_peak, r_peak));
530 const QString &sequence,
532 const std::size_t current_row,
533 const std::size_t r_peak)
const
535 std::size_t perfect_shift_origin = current_row;
536 double missing_mass = spectrum->getMZShift(0, r_peak);
539 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.
contains(aa_mass))
541 aa_mass +=
Aa(sequence.at(perfect_shift_origin - 1).unicode()).
getMass();
542 perfect_shift_origin--;
546 return perfect_shift_origin;
556 const QString &sequence,
559 std::size_t end_peak)
const
561 std::size_t perfect_shift_end = end_row + 1;
562 double missing_mass = spectrum->getMissingMass(end_peak);
565 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
568 aa_mass +=
Aa(sequence.at(perfect_shift_end - 1).unicode()).
getMass();
573 return perfect_shift_end - 1;
588 return m_location_saver;
602 m_best_alignment.peaks.clear();
603 m_best_alignment.shifts.clear();
604 std::vector<QString> temp_interpretation;
605 std::size_t previous_row;
606 std::size_t previous_column = 0;
607 int score_found, score_shift;
608 std::size_t perfect_shift_end;
609 std::pair<std::vector<ScenarioCell>,
int> best_alignment = m_scenario.getBestAlignment();
610 m_best_alignment.score = best_alignment.second;
615 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
616 m_best_alignment.end = offset - best_alignment.first.back().previous_row;
619 for(
auto cell : best_alignment.first)
621 switch(cell.alignment_type)
624 temp_interpretation.push_back(sequence.at(previous_row - 1));
625 if(previous_row > cell.previous_row + 1)
627 skipped_mass =
Aa(sequence.at(previous_row - 1).unicode()).
getMass();
629 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
630 for(
auto aa : skipped_aa)
632 temp_interpretation.push_back(
'[' + aa +
']');
633 skipped_mass +=
Aa(aa.unicode()).
getMass();
635 temp_interpretation.push_back(
637 QString::number(spectrum->getMZShift(cell.previous_column, previous_column) -
641 m_best_alignment.peaks.push_back(cell.previous_column);
644 temp_interpretation.push_back(
'[' + sequence.at(previous_row - 1) +
']');
647 m_best_alignment.peaks.push_back(cell.previous_column);
648 temp_interpretation.push_back(sequence.at(previous_row - 1));
649 temp_interpretation.push_back(
651 QString::number(spectrum->getMZShift(cell.previous_column, previous_column) -
652 Aa(sequence.at(previous_row - 1).unicode()).
getMass()) +
654 m_best_alignment.shifts.push_back(
655 spectrum->getMZShift(cell.previous_column, previous_column) -
656 Aa(sequence.at(previous_row - 1).unicode()).
getMass());
659 m_best_alignment.peaks.push_back(cell.previous_column);
660 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
661 std::reverse(skipped_aa.begin(), skipped_aa.end());
662 temp_interpretation.push_back(skipped_aa);
665 previous_row = cell.previous_row;
666 previous_column = cell.previous_column;
667 m_best_alignment.peaks.push_back(cell.previous_column);
670 previous_row = cell.previous_row;
671 previous_column = cell.previous_column;
673 std::reverse(temp_interpretation.begin(), temp_interpretation.end());
674 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
677 MzRange zero(0, m_precision_ptr);
678 m_best_alignment.begin_shift = spectrum->getMZShift(0, m_best_alignment.peaks.front());
679 m_best_alignment.end_shift = spectrum->getMissingMass(m_best_alignment.peaks.back());
680 if(zero.
contains(m_best_alignment.end_shift))
682 m_best_alignment.end_shift = 0;
686 m_best_alignment.SPC = 0;
687 for(
auto peak : m_best_alignment.peaks)
689 switch(spectrum->at(peak).type)
692 qDebug() << peak <<
"native";
693 m_best_alignment.SPC += 1;
696 qDebug() << peak <<
"both";
697 m_best_alignment.SPC += 2;
700 qDebug() << peak <<
"synthetic";
703 qDebug() << peak <<
"symmetric";
704 m_best_alignment.SPC += 1;
710 if(m_best_alignment.end_shift > 0)
712 perfect_shift_end = perfectShiftPossibleEnd(sequence,
714 best_alignment.first.front().previous_row,
715 m_best_alignment.peaks.back());
716 if(perfect_shift_end != best_alignment.first.front().previous_row)
719 sequence.sliced(best_alignment.first.front().previous_row,
720 perfect_shift_end - best_alignment.first.front().previous_row);
721 for(
auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
723 temp_interpretation.push_back(
'[' + *aa +
']');
725 m_best_alignment.beginning = offset - perfect_shift_end;
726 m_best_alignment.end_shift = 0;
735 m_best_alignment.interpretation =
"";
736 if(m_best_alignment.end_shift > 0)
738 m_best_alignment.interpretation +=
'[' + QString::number(m_best_alignment.end_shift) +
']';
740 for(
auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
742 m_best_alignment.interpretation += *(str);
744 if(m_best_alignment.begin_shift > 0)
746 m_best_alignment.interpretation +=
'[' + QString::number(m_best_alignment.begin_shift) +
']';
753 return m_best_alignment;
758 const QString &protein_seq)
760 std::vector<double> potential_mass_errors(alignment.
shifts);
762 std::size_t index = alignment.
beginning - 1;
763 while(
shift > 0 && index > 0)
765 potential_mass_errors.push_back(
shift);
770 index = alignment.
end + 1;
771 while(
shift > 0 && index < (std::size_t)protein_seq.size())
773 potential_mass_errors.push_back(
shift);
777 return potential_mass_errors;
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
pappso_double getMass() const override
bool contains(pappso_double) const
std::vector< std::vector< std::size_t > > getPeaks() const
void addPeaks(std::size_t peak1, std::size_t peak2)
void preciseAlign(const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum....
void updateAlignmentMatrix(const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrumCsp &spectrum, const bool fast_align, const QString &protein)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
SemiGlobalAlignment(ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, AaCode &aaCode)
void fastAlign(const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
Scenario getScenario() const
void postProcessingAlign(const SpOMSSpectrumCsp &spectrum, const QString &protein_seq, const QString &protein_id, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
const Alignment & getBestAlignment(const SpOMSSpectrumCsp &spectrum) const
void saveBestAlignment(const QString sequence, const SpOMSSpectrumCsp &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
bool perfectShiftPossible(const QString &sequence, const SpOMSSpectrumCsp &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
void correctAlign(const QString &protein_seq, const QString &protein_id, const SpOMSSpectrumCsp &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
static std::vector< double > getPotentialMassErrors(const Alignment &alignment, const QString &protein_seq)
std::size_t perfectShiftPossibleFrom0(const QString &sequence, const SpOMSSpectrumCsp &spectrum, const std::size_t current_row, const std::size_t r_peak) const
std::vector< KeyCell > m_interest_cells
std::size_t perfectShiftPossibleEnd(const QString &sequence, const SpOMSSpectrumCsp &spectrum, std::size_t end_row, std::size_t end_peak) const
pappso::PrecisionPtr m_precision_ptr
LocationSaver getLocationSaver() const
save corrections to apply
@ synthetic
does not correspond to existing peak, for computational purpose
@ both
both, the ion and the complement exists in the original spectrum
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
std::shared_ptr< const SpOMSSpectrum > SpOMSSpectrumCsp
const uint ALIGNMENT_SURPLUS(5)
const int MIN_ALIGNMENT_SCORE(15)
protein to spectrum alignment
std::vector< double > shifts
This header contains all the type re-definitions and all the global variables definitions used in the...