diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathHelper.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathHelper.h index 4cc25f44ec0..4496559b656 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathHelper.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathHelper.h @@ -198,6 +198,22 @@ namespace OpenMS static std::map simpleFindBestFeature(const OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType & transition_group_map, bool useQualCutoff = false, double qualCutoff = 0.0); + + /** + * @brief Subsample a library based on a given + * + * This method is used to subsample the library intelligently in order to ensure that the subsampled compounds are distributed across RT space. + * + * @param nrBins Number of bins across RT + * @param peptidesPerBin aim number of peptides per bin, if not reached a warning is issued + * + * @return LightTargetedExperiment with the subsampled library + */ + static OpenSwath::LightTargetedExperiment subsampleLibrary(OpenSwath::LightTargetedExperiment& library, + int nrBins, + int minPeptidesPerBin); + + }; } // namespace OpenMS diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h index a6622d72e7d..4f738991645 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h @@ -280,6 +280,20 @@ namespace OpenMS bool pasef = false, bool load_into_memory = false); + TransformationDescription performAutoRTNormalization( + OpenSwath::LightTargetedExperiment& irt_transitions, + std::vector< OpenSwath::SwathMap > & swath_maps, + TransformationDescription& im_trafo, + double min_rsq, + double min_coverage, + const Param& feature_finder_param, + ChromExtractParams& cp_irt, + const Param& irt_detection_param, + const Param& calibration_param, + Size debug_level, + bool pasef, + bool load_into_memory); + public: /** @brief Perform retention time and m/z calibration diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathHelper.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathHelper.cpp index f810215e2c7..82c76c633d5 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathHelper.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathHelper.cpp @@ -7,6 +7,7 @@ // -------------------------------------------------------------------------- #include +#include namespace OpenMS { @@ -187,4 +188,68 @@ namespace OpenMS return result; } + OpenSwath::LightTargetedExperiment OpenSwathHelper::subsampleLibrary(OpenSwath::LightTargetedExperiment& library, + int nrBins, + int peptidesPerBin) + { + std::pair RTRange = OpenSwathHelper::estimateRTRange(library); + OPENMS_LOG_DEBUG << "Detected retention time range from " << RTRange.first << " to " << RTRange.second << std::endl; + + // Sort the LightTargetedExperiment Peptide precursors by their RT + auto peptide_precursors = library.getCompounds(); + std::sort(peptide_precursors.begin(), peptide_precursors.end(), + [](const OpenSwath::LightCompound& a, const OpenSwath::LightCompound& b) + { + return a.rt < b.rt; + }); + + std::vector selected_compounds; + + // Create a histogram bins with the desired number of bins + Math::Histogram<> hist(RTRange.first, RTRange.second, nrBins); + for (size_t h = 0; h < hist.size(); h++) + { + OPENMS_LOG_DEBUG << "Bin " << h << " has left border of " << hist.leftBorderOfBin(h) << " and right border of " << hist.rightBorderOfBin(h) << std::endl; + } + + // Take the first `minPeptidesPerBin` peptides from each bin + int i = 0; // index of the peptide precursor + for (size_t h = 0; h < hist.size(); h++) + { + int numPepsInBin = 0; + bool doneWithBin = false; + while ((numPepsInBin < peptidesPerBin) && (!doneWithBin)) + { + // Check if the peptide precursor is within the bin range + OPENMS_LOG_DEBUG << "Checking peptide precursor with RT " << peptide_precursors[i].rt << " against bin with left border of " << hist.leftBorderOfBin(h) << std::endl; + if (peptide_precursors[i].rt >= hist.leftBorderOfBin(h) && peptide_precursors[i].rt < hist.rightBorderOfBin(h)) + { + OPENMS_LOG_DEBUG << "Adding peptide precursor with RT " << peptide_precursors[i].rt << " to bin with left border of " << hist.leftBorderOfBin(h) << std::endl; + selected_compounds.push_back(peptide_precursors[i]); + numPepsInBin++; + i++; + } + else + { + // If we have less than the minimum number of peptides in the bin, warn the user + OPENMS_LOG_WARN << "RT Bin from " << hist.leftBorderOfBin(h) << " to " << hist.rightBorderOfBin(h) << " has only " + << numPepsInBin << " peptides. This is less than the minumum number specified of " << peptidesPerBin << std::endl; + + doneWithBin = true; + } + } + + // Have the minumum number of peptides required, seek to the next bin + while (peptide_precursors[i].rt < hist.rightBorderOfBin(h)) + { + i++; + } + numPepsInBin = 0; + } + + OPENMS_LOG_DEBUG << "Subsampled library has " << selected_compounds.size() << " precursors" << std::endl; + return library.selectCompounds(selected_compounds); + } + + } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp index 3693854269b..ffeafa5b324 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathWorkflow.cpp @@ -86,6 +86,422 @@ namespace OpenMS return tr; } + TransformationDescription OpenSwathCalibrationWorkflow::performAutoRTNormalization( + OpenSwath::LightTargetedExperiment& targeted_exp, + std::vector< OpenSwath::SwathMap > & swath_maps, + TransformationDescription& im_trafo, + double min_rsq, + double min_coverage, + const Param& default_ffparam, + ChromExtractParams& cp_irt, + const Param& irt_detection_param, + const Param& calibration_param, + Size debug_level, + bool pasef, + bool load_into_memory) + { + int rt_extraction_window; + TransformationDescription trafo_out; + + // 1. Estimate the retention time range of the iRT peptides over all assays + std::pair RTRange = OpenSwathHelper::estimateRTRange(targeted_exp); + OPENMS_LOG_DEBUG << "Detected retention time range from " << RTRange.first << " to " << RTRange.second << std::endl; + + { + OPENMS_LOG_DEBUG << "Start of performAutoRTNormalization method" << std::endl; + + // ########## LINEAR CALIBRATION ########## + // 1. Subsample the library to a number of bins + // TODO change param names NrRTBins and MinPeptidesPerBin to more descriptive names for new workflow (e.g. MinPeptidesPerBin is not actually bin peptides, NrRTBins is ok) + OpenSwath::LightTargetedExperiment targeted_exp_subsampled = OpenSwathHelper::subsampleLibrary(targeted_exp, + irt_detection_param.getValue("NrRTBins"), + irt_detection_param.getValue("MinPeptidesPerBin")); + + // 2. Store the peptide retention times in an intermediate map + std::map PeptideRTMap; + for (Size i = 0; i < targeted_exp_subsampled.getCompounds().size(); i++) + { + PeptideRTMap[targeted_exp_subsampled.getCompounds()[i].id] = targeted_exp_subsampled.getCompounds()[i].rt; + } + + // 2. Extract the chromatograms + std::vector< OpenMS::MSChromatogram > chromatograms; + TransformationDescription trafo; // dummy + this->simpleExtractChromatograms_(swath_maps, targeted_exp_subsampled, chromatograms, trafo, cp_irt, pasef, load_into_memory); + + OPENMS_LOG_DEBUG << "Extracted number of chromatograms from iRT files: " << chromatograms.size() << std::endl; + + // 2. For each peptide, extract and score chromatograms + const OpenSwath::LightTargetedExperiment& transition_exp_used = targeted_exp_subsampled; + + // Change the feature finding parameters: + // - no RT score (since we don't know the correct retention time) + // - no RT window + // - no elution model score + // - no peak quality (use all peaks) + // - if best peptides should be used, use peak quality + MRMFeatureFinderScoring featureFinder; + Param feature_finder_param(default_ffparam); + feature_finder_param.setValue("Scores:use_rt_score", "false"); + feature_finder_param.setValue("Scores:use_elution_model_score", "false"); + feature_finder_param.setValue("rt_extraction_window", -1.0); + feature_finder_param.setValue("stop_report_after_feature", 1); + feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:signal_to_noise", 1.0); // set to 1.0 in all cases + feature_finder_param.setValue("TransitionGroupPicker:compute_peak_quality", "false"); // no peak quality -> take all peaks! + feature_finder_param.setValue("TransitionGroupPicker:compute_peak_quality", "true"); + feature_finder_param.setValue("TransitionGroupPicker:minimal_quality", irt_detection_param.getValue("InitialQualityCutoff")); + featureFinder.setParameters(feature_finder_param); + featureFinder.setStrictFlag(false); // Since we do not know if features are correct, we should not be strict + + FeatureMap featureFile; // for results + OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType transition_group_map; // for results + std::vector empty_swath_maps; + TransformationDescription empty_trafo; // empty transformation + + // Prepare the data with the chromatograms + boost::shared_ptr xic_map(new PeakMap); + xic_map->setChromatograms(chromatograms); + OpenSwath::SpectrumAccessPtr chromatogram_ptr = OpenSwath::SpectrumAccessPtr(new OpenMS::SpectrumAccessOpenMS(xic_map)); + + featureFinder.pickExperiment(chromatogram_ptr, featureFile, transition_exp_used, empty_trafo, empty_swath_maps, transition_group_map); + + // 4. Find most likely correct feature for each compound and add it to the + // "pairs" vector by computing pairs of iRT and real RT. + // + // Note that the quality threshold will only be applied if + // estimateBestPeptides is true + std::vector > pairs; // store the RT pairs to write the output trafoXML + std::map best_features = OpenSwathHelper::simpleFindBestFeature(transition_group_map, + true, irt_detection_param.getValue("OverallQualityCutoff")); + OPENMS_LOG_DEBUG << "Extracted best features: " << best_features.size() << std::endl; + + // Create pairs vector and store peaks + std::map trgrmap_allpeaks; // store all peaks above cutoff + for (std::map::iterator it = best_features.begin(); it != best_features.end(); ++it) + { + pairs.emplace_back(it->second, PeptideRTMap[it->first]); // pair + if (transition_group_map.find(it->first) != transition_group_map.end()) + { + trgrmap_allpeaks[ it->first ] = &transition_group_map[ it->first]; + } + } + + // 5. Perform the outlier detection + std::vector > pairs_corrected; + String outlier_method = irt_detection_param.getValue("outlierMethod").toString(); + if (outlier_method == "iter_residual" || outlier_method == "iter_jackknife") + { + pairs_corrected = MRMRTNormalizer::removeOutliersIterative(pairs, min_rsq, min_coverage, + irt_detection_param.getValue("useIterativeChauvenet").toBool(), outlier_method); + } + else if (outlier_method == "ransac") + { + // First, estimate of the maximum deviation from RT that is tolerated: + // Because 120 min gradient can have around 4 min elution shift, we use + // a default value of 3 % of the gradient to find upper RT threshold (3.6 min). + double pcnt_rt_threshold = irt_detection_param.getValue("RANSACMaxPercentRTThreshold"); + double max_rt_threshold = (RTRange.second - RTRange.first) * pcnt_rt_threshold / 100.0; + + pairs_corrected = MRMRTNormalizer::removeOutliersRANSAC(pairs, min_rsq, min_coverage, + irt_detection_param.getValue("RANSACMaxIterations"), max_rt_threshold, + irt_detection_param.getValue("RANSACSamplingSize")); + } + else if (outlier_method == "none") + { + pairs_corrected = pairs; + } + else + { + throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + String("Illegal argument '") + outlier_method + + "' used for outlierMethod (valid: 'iter_residual', 'iter_jackknife', 'ransac', 'none')."); + } + OPENMS_LOG_DEBUG << "Performed outlier detection, left with features: " << pairs_corrected.size() << std::endl; + + // 7. Select the "correct" peaks for m/z (and IM) correction (e.g. remove those not + // part of the linear regression) + std::map trgrmap_final; // store all peaks above cutoff + for (const auto& it : trgrmap_allpeaks) + { + if (it.second->getFeatures().empty() ) {continue;} + const MRMFeature& feat = it.second->getBestFeature(); + + // Check if the current feature is in the list of pairs used for the + // linear RT regression (using other features may result in wrong + // calibration values). + // Matching only by RT is not perfect but should work for most cases. + for (Size pit = 0; pit < pairs_corrected.size(); pit++) + { + if (fabs(feat.getRT() - pairs_corrected[pit].first ) < 1e-2) + { + trgrmap_final[ it.first ] = it.second; + break; + } + } + } + + // 8. Correct m/z (and IM) deviations using SwathMapMassCorrection + // m/z correction is done with the -irt_im_extraction parameters + SwathMapMassCorrection mc; + mc.setParameters(calibration_param); + + mc.correctMZ(trgrmap_final, targeted_exp, swath_maps, pasef); + mc.correctIM(trgrmap_final, targeted_exp, swath_maps, pasef, im_trafo); + + // 9. store RT transformation, using the selected model + trafo_out.setDataPoints(pairs_corrected); + Param model_params; + model_params.setValue("symmetric_regression", "false"); + model_params.setValue("span", irt_detection_param.getValue("lowess:span")); + model_params.setValue("num_nodes", irt_detection_param.getValue("b_spline:num_nodes")); + String model_type = irt_detection_param.getValue("alignmentMethod").toString(); + trafo_out.fitModel(model_type, model_params); + + OPENMS_LOG_DEBUG << "Final RT mapping:" << std::endl; + for (Size i = 0; i < pairs_corrected.size(); i++) + { + OPENMS_LOG_DEBUG << pairs_corrected[i].first << " " << pairs_corrected[i].second << std::endl; + } + + /////////////////////////////////////////////////////////////////////////// + // TODO incorperate this into transformation description function instead + ///////////////////////////////////////////////////////////// + + std::vector predicted_values(pairs_corrected.size()); // these are the predicted values by applying the trafo on the values + std::vector delta_true_predicted(pairs_corrected.size()); // these are the differences between the predicted and the true values + double sum_rt_differences = 0.0; + TransformationDescription trafo_inverse = trafo_out; + trafo_inverse.invert(); + for (size_t i = 0; i < pairs_corrected.size(); ++i) + { + predicted_values[i] = trafo_out.apply(pairs_corrected[i].first); + delta_true_predicted[i] = pairs_corrected[i].first - predicted_values[i]; + sum_rt_differences += delta_true_predicted[i]; // sum for computing the mean + OPENMS_LOG_DEBUG << "True - Predicted value: " << pairs_corrected[i].second << " " << predicted_values[i] << std::endl; + } + double mean_rt_difference = sum_rt_differences / delta_true_predicted.size(); + + // compute the variance + double rt_variance = 0.0; + for (double delta: delta_true_predicted) + { + rt_variance += (delta - mean_rt_difference) * (delta - mean_rt_difference); + } + rt_variance /= delta_true_predicted.size(); + double rt_stdev = sqrt(rt_variance); + double rt_extraction_window = rt_stdev * 3.0 * 2; // 3 times the standard deviation, doubled since the window is the full length (not half) + std::cout << "RT variance: " << rt_stdev << std::endl; + std::cout << "RT Extraction Window to Use: " << rt_extraction_window << std::endl; + + + std::cout << "End of doDataNormalization_ method" << std::endl; + + trafo_out.printSummary(std::cout); + } + { + + ////////////////////////////////////////////////////////////////////////////////////// + // PART 2: NONLINEAR CALIBRATION + ////////////////////////////////////////////////////////////////////////////////////// + int nrRTBins = (int) irt_detection_param.getValue("NrRTBins") * 10.0; // have 10X number of precursors as linear + int minPeptidesPerBin = (int) irt_detection_param.getValue("MinPeptidesPerBin") * 10; // have 10X number of precursors as linear + OpenSwath::LightTargetedExperiment targeted_exp_subsampled = OpenSwathHelper::subsampleLibrary(targeted_exp, + nrRTBins, + minPeptidesPerBin ); + + + // 2. Store the peptide retention times in an intermediate map + std::map PeptideRTMap; + for (Size i = 0; i < targeted_exp_subsampled.getCompounds().size(); i++) + { + PeptideRTMap[targeted_exp_subsampled.getCompounds()[i].id] = targeted_exp_subsampled.getCompounds()[i].rt; + } + + + // 2. Extract the chromatograms + std::vector< OpenMS::MSChromatogram > chromatograms; + TransformationDescription trafo; // dummy + cp_irt.rt_extraction_window = rt_extraction_window; + this->simpleExtractChromatograms_(swath_maps, targeted_exp_subsampled, chromatograms, trafo_out, cp_irt, pasef, load_into_memory); + + + OPENMS_LOG_DEBUG << "Extracted number of chromatograms from iRT files: " << chromatograms.size() << std::endl; + + // 2. For each peptide, extract and score chromatograms + const OpenSwath::LightTargetedExperiment& transition_exp_used = targeted_exp_subsampled; + + // Change the feature finding parameters: + // - no RT score (since we don't know the correct retention time) + // - no RT window + // - no elution model score + // - no peak quality (use all peaks) + // - if best peptides should be used, use peak quality + MRMFeatureFinderScoring featureFinder; + Param feature_finder_param(default_ffparam); + feature_finder_param.setValue("Scores:use_rt_score", "false"); + feature_finder_param.setValue("Scores:use_elution_model_score", "false"); + feature_finder_param.setValue("rt_extraction_window", -1.0); + feature_finder_param.setValue("stop_report_after_feature", 1); + feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:signal_to_noise", 1.0); // set to 1.0 in all cases + feature_finder_param.setValue("TransitionGroupPicker:compute_peak_quality", "false"); // no peak quality -> take all peaks! + feature_finder_param.setValue("TransitionGroupPicker:compute_peak_quality", "true"); + feature_finder_param.setValue("TransitionGroupPicker:minimal_quality", irt_detection_param.getValue("InitialQualityCutoff")); + featureFinder.setParameters(feature_finder_param); + featureFinder.setStrictFlag(false); // Since we do not know if features are correct, we should not be strict + + FeatureMap featureFile; // for results + OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType transition_group_map; // for results + std::vector empty_swath_maps; + TransformationDescription empty_trafo; // empty transformation + + // Prepare the data with the chromatograms + boost::shared_ptr xic_map(new PeakMap); + xic_map->setChromatograms(chromatograms); + OpenSwath::SpectrumAccessPtr chromatogram_ptr = OpenSwath::SpectrumAccessPtr(new OpenMS::SpectrumAccessOpenMS(xic_map)); + + featureFinder.pickExperiment(chromatogram_ptr, featureFile, transition_exp_used, empty_trafo, empty_swath_maps, transition_group_map); + + // 4. Find most likely correct feature for each compound and add it to the + // "pairs" vector by computing pairs of iRT and real RT. + // + // Note that the quality threshold will only be applied if + // estimateBestPeptides is true + std::vector > pairs; // store the RT pairs to write the output trafoXML + std::map best_features = OpenSwathHelper::simpleFindBestFeature(transition_group_map, + true, irt_detection_param.getValue("OverallQualityCutoff")); + OPENMS_LOG_DEBUG << "Extracted best features: " << best_features.size() << std::endl; + + // Create pairs vector and store peaks + std::map trgrmap_allpeaks; // store all peaks above cutoff + for (std::map::iterator it = best_features.begin(); it != best_features.end(); ++it) + { + pairs.emplace_back(it->second, PeptideRTMap[it->first]); // pair + if (transition_group_map.find(it->first) != transition_group_map.end()) + { + trgrmap_allpeaks[ it->first ] = &transition_group_map[ it->first]; + } + } + + // 5. Perform the outlier detection + std::vector > pairs_corrected; + String outlier_method = irt_detection_param.getValue("outlierMethod").toString(); + if (outlier_method == "iter_residual" || outlier_method == "iter_jackknife") + { + pairs_corrected = MRMRTNormalizer::removeOutliersIterative(pairs, min_rsq, min_coverage, + irt_detection_param.getValue("useIterativeChauvenet").toBool(), outlier_method); + } + else if (outlier_method == "ransac") + { + // First, estimate of the maximum deviation from RT that is tolerated: + // Because 120 min gradient can have around 4 min elution shift, we use + // a default value of 3 % of the gradient to find upper RT threshold (3.6 min). + double pcnt_rt_threshold = irt_detection_param.getValue("RANSACMaxPercentRTThreshold"); + double max_rt_threshold = (RTRange.second - RTRange.first) * pcnt_rt_threshold / 100.0; + + pairs_corrected = MRMRTNormalizer::removeOutliersRANSAC(pairs, min_rsq, min_coverage, + irt_detection_param.getValue("RANSACMaxIterations"), max_rt_threshold, + irt_detection_param.getValue("RANSACSamplingSize")); + } + else if (outlier_method == "none") + { + pairs_corrected = pairs; + } + else + { + throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + String("Illegal argument '") + outlier_method + + "' used for outlierMethod (valid: 'iter_residual', 'iter_jackknife', 'ransac', 'none')."); + } + OPENMS_LOG_DEBUG << "Performed outlier detection, left with features: " << pairs_corrected.size() << std::endl; + + // 7. Select the "correct" peaks for m/z (and IM) correction (e.g. remove those not + // part of the linear regression) + std::map trgrmap_final; // store all peaks above cutoff + for (const auto& it : trgrmap_allpeaks) + { + if (it.second->getFeatures().empty() ) {continue;} + const MRMFeature& feat = it.second->getBestFeature(); + + // Check if the current feature is in the list of pairs used for the + // linear RT regression (using other features may result in wrong + // calibration values). + // Matching only by RT is not perfect but should work for most cases. + for (Size pit = 0; pit < pairs_corrected.size(); pit++) + { + if (fabs(feat.getRT() - pairs_corrected[pit].first ) < 1e-2) + { + trgrmap_final[ it.first ] = it.second; + break; + } + } + } + + // 8. Correct m/z (and IM) deviations using SwathMapMassCorrection + // m/z correction is done with the -irt_im_extraction parameters + SwathMapMassCorrection mc; + mc.setParameters(calibration_param); + + mc.correctMZ(trgrmap_final, targeted_exp, swath_maps, pasef); + mc.correctIM(trgrmap_final, targeted_exp, swath_maps, pasef, im_trafo); + + // 9. store RT transformation, using the selected model + TransformationDescription trafo_out; + trafo_out.setDataPoints(pairs_corrected); + Param model_params; + model_params.setValue("symmetric_regression", "false"); + model_params.setValue("span", irt_detection_param.getValue("lowess:span")); + model_params.setValue("num_nodes", irt_detection_param.getValue("b_spline:num_nodes")); + model_params.setValue("alignmentMethod", "lowess"); + String model_type = irt_detection_param.getValue("alignmentMethod").toString(); + trafo_out.fitModel(model_type, model_params); + + OPENMS_LOG_DEBUG << "Final RT mapping:" << std::endl; + for (Size i = 0; i < pairs_corrected.size(); i++) + { + OPENMS_LOG_DEBUG << pairs_corrected[i].first << " " << pairs_corrected[i].second << std::endl; + } + + /////////////////////////////////////////////////////////////////////////// + // TODO incorperate this into transformation description function instead + ///////////////////////////////////////////////////////////// + + std::vector predicted_values(pairs_corrected.size()); // these are the predicted values by applying the trafo on the values + std::vector delta_true_predicted(pairs_corrected.size()); // these are the differences between the predicted and the true values + double sum_rt_differences = 0.0; + TransformationDescription trafo_inverse = trafo_out; + trafo_inverse.invert(); + + // note pairs correct first is exp RT, pairs corrected second is iRT + for (size_t i = 0; i < pairs_corrected.size(); ++i) + { + predicted_values[i] = trafo_inverse.apply(pairs_corrected[i].second); + delta_true_predicted[i] = pairs_corrected[i].first - predicted_values[i]; + sum_rt_differences += delta_true_predicted[i]; // sum for computing the mean + OPENMS_LOG_DEBUG << "True - Predicted value: " << pairs_corrected[i].second << " " << predicted_values[i] << std::endl; + } + double mean_rt_difference = sum_rt_differences / delta_true_predicted.size(); + + // compute the variance + double rt_variance = 0.0; + for (double delta: delta_true_predicted) + { + rt_variance += (delta - mean_rt_difference) * (delta - mean_rt_difference); + } + rt_variance /= delta_true_predicted.size(); + double rt_stdev = sqrt(rt_variance); + double rt_extraction_window = rt_stdev * 3.0 * 2; // 3 times the standard deviation, doubled since the window is the full length (not half) + std::cout << "RT variance: " << rt_stdev << std::endl; + std::cout << "RT Extraction Window to Use: " << rt_extraction_window << std::endl; + + + OPENMS_LOG_DEBUG << "End of doDataNormalization_ method" << std::endl; + + trafo_out.printSummary(std::cout); + + return trafo_out; + } + } + TransformationDescription OpenSwathCalibrationWorkflow::doDataNormalization_( const OpenSwath::LightTargetedExperiment& targeted_exp, const std::vector< OpenMS::MSChromatogram >& chromatograms, @@ -209,8 +625,8 @@ namespace OpenMS OPENMS_LOG_DEBUG << "Performed outlier detection, left with features: " << pairs_corrected.size() << std::endl; // 6. Check whether the found peptides fulfill the binned coverage criteria - // set by the user. - if (estimateBestPeptides) + // set by the user. This is only done if not using a linear RT correction + if (estimateBestPeptides && irt_detection_param.getValue("alignmentMethod").toString() != "linear") { bool enoughPeptides = MRMRTNormalizer::computeBinnedCoverage(RTRange, pairs_corrected, irt_detection_param.getValue("NrRTBins"), @@ -274,8 +690,40 @@ namespace OpenMS { OPENMS_LOG_DEBUG << pairs_corrected[i].first << " " << pairs_corrected[i].second << std::endl; } + + /////////////////////////////////////////////////////////////////////////// + // TODO incorperate this into transformation description function instead + ///////////////////////////////////////////////////////////// + + std::vector predicted_values(pairs_corrected.size()); // these are the predicted values by applying the trafo on the values + std::vector delta_true_predicted(pairs_corrected.size()); // these are the differences between the predicted and the true values + double sum_rt_differences = 0.0; + for (size_t i = 0; i < pairs_corrected.size(); ++i) + { + predicted_values[i] = trafo_out.apply(pairs_corrected[i].first); + delta_true_predicted[i] = pairs_corrected[i].second - predicted_values[i]; + sum_rt_differences += delta_true_predicted[i]; // sum for computing the mean + OPENMS_LOG_DEBUG << "True - Predicted value: " << pairs_corrected[i].second << " " << predicted_values[i] << std::endl; + } + double mean_rt_difference = sum_rt_differences / delta_true_predicted.size(); + + // compute the variance + double rt_variance = 0.0; + for (double delta: delta_true_predicted) + { + rt_variance += (delta - mean_rt_difference) * (delta - mean_rt_difference); + } + rt_variance /= delta_true_predicted.size(); + double rt_stdev = sqrt(rt_variance); + double rt_extraction_window = rt_stdev * 3.0 * 2; // 3 times the standard deviation, doubled since the window is the full length (not half) + OPENMS_LOG_DEBUG << "RT variance: " << rt_stdev << std::endl; + OPENMS_LOG_DEBUG << "RT Extraction Window to Use: " << rt_extraction_window << std::endl; + + OPENMS_LOG_DEBUG << "End of doDataNormalization_ method" << std::endl; + trafo_out.printSummary(std::cout); + this->endProgress(); return trafo_out; } diff --git a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h index 9f6ebe5c4bb..33b34d03a88 100644 --- a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h +++ b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -238,6 +239,30 @@ namespace OpenSwath return *(compound_reference_map_[ref]); } + // Creates a subsampled library from the specified light compounds + const LightTargetedExperiment selectCompounds(const std::vector& selectedCompounds) + { + LightTargetedExperiment subsampled; + subsampled.proteins = proteins; + + subsampled.compounds.insert(subsampled.compounds.end(), selectedCompounds.begin(), selectedCompounds.end()); + + std::set compound_ids; + for (size_t i = 0; i < subsampled.compounds.size(); i++) + { + compound_ids.insert(subsampled.compounds[i].id); + } + + for (size_t i = 0; i < transitions.size(); i++) + { + if (compound_ids.find(transitions[i].peptide_ref) != compound_ids.end()) + { + subsampled.transitions.push_back(transitions[i]); + } + } + return subsampled; + } + private: void createPeptideReferenceMap_() diff --git a/src/topp/OpenSwathWorkflow.cpp b/src/topp/OpenSwathWorkflow.cpp index 688feee7c65..4be1a2b228f 100644 --- a/src/topp/OpenSwathWorkflow.cpp +++ b/src/topp/OpenSwathWorkflow.cpp @@ -422,6 +422,9 @@ class TOPPOpenSwathWorkflow registerStringOption_("enable_ms1", "", "true", "Extract the precursor ion trace(s) and use for scoring if present", false, true); setValidStrings_("enable_ms1", ListUtils::create("true,false")); + registerStringOption_("auto_transform", "", "true", "Perform automatic RT, IM, and m/z calibration using a quality heuristic apporach", false, true); + setValidStrings_("auto_transform", ListUtils::create("true,false")); + registerStringOption_("enable_ipf", "", "true", "Enable additional scoring of identification assays using IPF (see online documentation)", false, true); setValidStrings_("enable_ipf", ListUtils::create("true,false")); @@ -665,6 +668,7 @@ class TOPPOpenSwathWorkflow bool pasef = getFlag_("pasef"); bool sort_swath_maps = getFlag_("sort_swath_maps"); bool use_ms1_traces = getStringOption_("enable_ms1") == "true"; + bool auto_transform = getStringOption_("auto_transform") == "true"; bool enable_uis_scoring = getStringOption_("enable_ipf") == "true"; int batchSize = (int)getIntOption_("batchSize"); int outer_loop_threads = (int)getIntOption_("outer_loop_threads"); @@ -906,6 +910,30 @@ class TOPPOpenSwathWorkflow calibration_param.setValue("im_extraction_window", cp_irt.im_extraction_window); calibration_param.setValue("mz_correction_function", mz_correction_function); TransformationDescription trafo_rtnorm; + if (auto_transform) + { + + OpenSwathCalibrationWorkflow wf; + wf.setLogType(log_type_); + Param linear_irt = irt_detection_param; + linear_irt.setValue("alignmentMethod", "linear"); + Param no_calibration = calibration_param; + no_calibration.setValue("mz_correction_function", "none"); + + std::vector< OpenMS::MSChromatogram > chromatograms; + + TransformationDescription im_trafo; // exp -> theoretical + trafo_rtnorm = wf.performAutoRTNormalization(transition_exp, swath_maps, im_trafo, + min_rsq, min_coverage, feature_finder_param, + cp_irt, linear_irt, no_calibration, + debug_level, pasef, load_into_memory); + + if (!irt_trafo_out.empty()) + { + FileHandler().storeTransformations(irt_trafo_out, trafo_rtnorm, {FileTypes::TRANSFORMATIONXML}); + } + + } if (nonlinear_irt_tr_file.empty()) { trafo_rtnorm = performCalibration(trafo_in, irt_tr_file, swath_maps,