Vespucci  1.0.0
peakfinding.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  Copyright (C) 2014-2016 Wright State University - All Rights Reserved
3  Daniel P. Foose - Maintainer/Lead Developer
4 
5  This file is part of Vespucci.
6 
7  Vespucci is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  Vespucci is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with Vespucci. If not, see <http://www.gnu.org/licenses/>.
19 *******************************************************************************/
20 
24 
25 
26 
27 
45  arma::vec dX,
46  double sel,
47  double threshold,
48  arma::vec &peak_magnitudes)
49 {
50  arma::uvec peak_indices;
51  arma::uvec peak_locations;
52  peak_magnitudes = arma::zeros(X.n_rows, 1);
53 
54  dX.elem( find(dX == 0) ).fill(-arma::datum::eps); //to find first of repeated values
55  double minimum_magnitude = X.min();
56  double left_min = X(0);
57  arma::uword length = X.n_elem;
58  arma::uword temporary_location = 0;
59 
60  //Find where derivative changes sign:
61  arma::uvec extrema_indices = find( (dX.subvec(0, length - 2) % dX.subvec(1, length - 1) )< 0);
62  arma::vec X_extrema = X.elem(extrema_indices);
63  length = X_extrema.n_elem;
64  double temporary_magnitude = minimum_magnitude;
65  bool peak_found = false;
66  arma::uword ii;
67  if (X(1) >= X(2))
68  ii = 1;
69  else
70  ii = 2;
71 
72  //This loop finds the peak locations
73  while (ii < length){
74  //reset peak finding if we had a peak earlier and the next peak is
75  //bigger than the last or the left minimum was small enough to reset
76  if (peak_found){
77  temporary_magnitude = minimum_magnitude;
78  peak_found = false;
79  }
80 
81  if ( (X_extrema(ii) > temporary_magnitude) && (X_extrema(ii) > left_min + sel) ){
82  std::cout << "temporary_location = " << ii << std::endl;
83  temporary_magnitude = X_extrema(ii);
84  }
85 
86  ++ii; //move into the valley
87  if (ii >= length){
88  break;
89  }
90  //come down at least sel from peak
91  if (!peak_found && (temporary_magnitude > sel + X_extrema(ii) ) ){
92  peak_found = true;
93  std::cout << "peak found!" << std::endl;
94  left_min = X_extrema(ii);
95  peak_locations << temporary_location;
96  }
97  else if(X(ii) < left_min)
98  left_min = X_extrema(ii);
99 
100  ++ii;
101 
102 
103  }
104  //remove all peaks below the threshold
105 
106  arma::vec results = arma::zeros(X.n_elem, 1);
107  peak_indices = extrema_indices.elem(peak_locations);
108  results.elem(peak_indices).ones();
109  peak_magnitudes.elem(peak_indices) = X(peak_indices);
110 
111 
112 
113  arma::uvec indices = find(peak_magnitudes <= threshold);
114  peak_magnitudes.elem(indices).zeros();
115  results.elem(indices).zeros();
116  return results;
117 }
118 
119 
131 arma::mat Vespucci::Math::PeakFinding::FindPeaksMat(arma::mat X, double sel, double threshold, arma::uword poly_order, arma::uword window_size, arma::mat &peak_magnitudes)
132 {
133  arma::mat peak_locations(X.n_rows, X.n_cols); //value that gets returned
134  peak_magnitudes.set_size(X.n_rows, X.n_cols);
135 
136  //calculate a smoothed first derivative:
137  arma::mat derivatized = Vespucci::Math::Smoothing::sgolayfilt(X, poly_order, window_size, 1, 1);
138  arma::vec thresh(X.n_cols);
139  arma::vec sels(X.n_cols);
140  if (std::isnan(sel)){
141  for (arma::uword i = 0; i < X.n_cols; ++i){
142  sels(i) = (X.col(i).max() - X.col(i).min() ) / 16.0;
143  }
144  }
145  else{
146  sels.fill(sel);
147  }
148 
149  if (std::isnan(threshold)){
150  arma::uword k = (X.n_rows % 2 == 0 ? (X.n_rows / 2) : (X.n_rows + 1 / 2 ));
151  arma::vec buffer(X.n_rows);
152  arma::uvec sorted;
153  //we calculate median this way due to a problem with my MinGW compiler that
154  //causes crashes when arma::median is called.
155  for (arma::uword i = 0; i < X.n_cols; ++i){
156  buffer = Vespucci::Math::diff(X, 1); //use arithmetic derivative, rather than smoothed derivative
157  sorted = stable_sort_index(buffer);
158  thresh(i) = buffer(sorted(k));
159  }
160  }
161  else{
162  thresh.fill(threshold);
163  }
164  arma::vec current_spectrum;
165  arma::vec current_derivative;
166  arma::vec current_magnitudes;
167  arma::vec current_peaks;
168  for (arma::uword i = 0; i < X.n_cols; ++i){
169  current_spectrum = X.col(i);
170  current_derivative = derivatized.col(i);
171  current_peaks = Vespucci::Math::PeakFinding::FindPeaks(current_spectrum, current_derivative, sels(i), thresh(i), current_magnitudes);
172  peak_locations.col(i) = current_peaks;
173  peak_magnitudes.col(i) = current_magnitudes;
174  }
175  return peak_locations;
176 }
177 
201 arma::umat Vespucci::Math::PeakFinding::FindPeakPositions(arma::vec X, arma::vec dX,
202  double threshold,
203  std::string threshold_method,
204  arma::vec &peak_magnitudes)
205 {
206  //threshold can be arbitrary or calculated
207 
208  arma::vec d2X = Vespucci::Math::diff(dX, 1);
209  try{
210  d2X.insert_rows(0, 1, true);
211  }catch(std::exception e){
212  std::cerr << "error in Vespucci::Math::PeakFinding::FindPeakPositions" << std::endl;
213  }
214 
215  if (threshold_method == "count" && threshold <= 0){
216  throw std::runtime_error("Invalid threshold value for given method");
217  }
218  if ((threshold_method == "countpercentage" || threshold_method == "ratio") && threshold > 1){
219  throw std::runtime_error("Invalid threshold value for given method");
220  }
221 
222  //find where first derivative changes sign
223  arma::uvec extrema_indices = find( (dX.subvec(0, X.n_rows - 2) % dX.subvec(1, X.n_rows - 1) )< 0);
224  arma::vec d2X_extrema = d2X(extrema_indices);
225  arma::uvec maxima_indices = find(d2X_extrema < 0); //the indices in d2X_extrema, X_extrema and extrema_indices that correspond to maxima
226  arma::umat results(maxima_indices.n_elem, 3);
227  arma::uvec buffer(3);
228  peak_magnitudes.set_size(maxima_indices.n_elem); //each maximum corresponds to a magnitude
229 
230  //we have the indices of the maxima, now we need to find magnitudes and positions
231  //of the minima associated with each maximum
232  arma::uword center_index = 0;
233  arma::uword left_index = 0;
234  arma::uword right_index = 0;
235 
236  //find peak edges for each center and the magnitude of the peaks
237  arma::uword i;
238  try{
239  for (i = 0; i < maxima_indices.n_elem; ++i){
240 
241  if(maxima_indices(i) == 0){
242  //no left index (at beginning)
243  center_index = extrema_indices(maxima_indices(i)); //index of the center of peak
244  right_index = extrema_indices(maxima_indices(i) + 1); //index of right edge
245  left_index = center_index; //left edge is same as center
246  }
247 
248  else if(maxima_indices(i) + 1 >= extrema_indices.n_elem){
249  //no right_index (we're at the end)
250  center_index = extrema_indices(maxima_indices(i)); //index of the center of peak
251  left_index = extrema_indices(maxima_indices(i) - 1); //index of left edge
252  right_index = center_index;
253  }
254 
255  else{
256  //normal
257  center_index = extrema_indices(maxima_indices(i)); //index of the center of peak
258  left_index = extrema_indices(maxima_indices(i) - 1); //index of left edge
259  right_index = extrema_indices(maxima_indices(i) + 1); //index of right edge
260  }
261 
262  //if left_index == right_index, average is the same as difference...
263  peak_magnitudes(i) = (2.0*X(center_index) - X(left_index) - X(right_index))/2.0;
264  buffer(0) = center_index;
265  buffer(1) = left_index;
266  buffer(2) = right_index;
267  results.row(i) = buffer.t();
268  }
269  }catch(std::exception e){
270  std::cout << "Exception!" << std::endl;
271  std::cout << e.what() << std::endl;
272  std::cout << "iterator index = " << i << std::endl;
273  std::cout << "maxima index = " << maxima_indices(i) << std::endl;
274  std::cout << "extrema_indices.n_rows = " << extrema_indices.n_rows << std::endl;
275  std::cout << "center_index = " << center_index << std::endl;
276  std::cout << "left_index = " << left_index << std::endl;
277  std::cout << "right_index = " << right_index << std::endl;
278  throw std::runtime_error(e.what());
279  }
280 
281  //Fix thresholds//
282  //sort with largest first:
283  arma::uvec sorted_peak_indices = stable_sort_index(peak_magnitudes, "descend");
284  peak_magnitudes = peak_magnitudes.rows(sorted_peak_indices);
285  results = results.rows(sorted_peak_indices);
286 
287  if (threshold_method == "magnitude"){
288  arma::uvec indices = find(peak_magnitudes > threshold);
289  peak_magnitudes = peak_magnitudes.rows(indices);
290  return results.rows(indices);
291  }
292  else if (threshold_method == "count"){
293  arma::uword uthresh = (std::ceil(threshold) < results.n_rows - 1 ? std::ceil(threshold) : results.n_rows - 1);
294  peak_magnitudes = peak_magnitudes.rows(0, uthresh);
295  return results.rows(0, uthresh);
296  }
297  else if (threshold_method == "ratio"){
298  if (threshold == 1){
299  peak_magnitudes = peak_magnitudes.row(0);
300  return results.row(0);
301  }
302  else{
303  double cutoff = peak_magnitudes(0) * threshold;
304  arma::uvec indices = find(peak_magnitudes > cutoff);
305  peak_magnitudes = peak_magnitudes.rows(indices);
306  return results.rows(indices);
307  }
308  }
309  else{ //(threshold_method == "countpercentage")
310  arma::uword count = std::floor(threshold*results.n_rows);
311  peak_magnitudes = peak_magnitudes.rows(0, count);
312  return results.rows(0, count);
313  }
314 
315 }
316 
317 
318 arma::vec Vespucci::Math::PeakFinding::PeakPopulation(arma::uword vector_size, arma::umat peak_positions)
319 {
320  arma::uvec peaks = peak_positions.col(0);
321  arma::vec population = arma::zeros(vector_size);
322  population.elem(peaks) = arma::ones(peaks.n_elem);
323  return population;
324 }
325 
326 
327 arma::vec Vespucci::Math::PeakFinding::PeakExtrema(arma::uword vector_size, arma::umat peak_positions)
328 {
329  if (peak_positions.n_cols != 3){
330  return arma::zeros(vector_size);
331  }
332  else{
333  arma::vec extrema = arma::zeros(vector_size);
334  arma::uvec peak_centers = peak_positions.col(0);
335  arma::uvec left_boundaries = peak_positions.col(1);
336  arma::uvec right_boundaries = peak_positions.col(2);
337  extrema.elem(peak_centers) = arma::ones(peak_centers.n_elem);
338  extrema.elem(left_boundaries) = -1 * arma::ones(left_boundaries.n_elem);
339  extrema.elem(right_boundaries) = -1 * arma::ones(right_boundaries.n_elem);
340  return extrema;
341  }
342 }
343 
352  arma::umat peaks,
353  arma::uword window_size)
354 {
355  if (window_size % 2 == 0){
356  window_size--;
357  std::cerr << "invalid window_size, using one less" << std::endl;
358  }
359 
360  arma::vec baseline = X;
361  arma::uword start, end, center, size;
362  //linearize across bases of peaks
363  arma::uword i;
364  try{
365  for (i = 0; i < peaks.n_rows; ++i){
366  center = peaks(i, 0);
367  start = peaks(i, 1);
368  end = peaks(i, 2);
369  //cut accross in a straight line when peak is on edge of spectrum
370  size = end - start;
371 
372  //linearize baseline across base of peak
373  // if the start or end of the peak is the center, take a flat line
374  // from the minimum
375  if (start == center){
376  baseline.subvec(start, end-1) = X(end) * arma::ones(size);
377  }
378  else if (end == center){
379  baseline.subvec(start, end-1) = X(start) * arma::ones(size);
380  }
381  else{
382  baseline.subvec(start, end-1) = arma::linspace(X(start), X(end), size);
383  }
384 
385  }
386  }catch(std::exception e){
387  std::cout << "Exception! (peak exclusion)" << std::endl;
388  std::cout << "size of peaks = " << peaks.n_rows << " " << peaks.n_cols << std::endl;
389  std::cout << "i = " << i << std::endl;
390  throw std::runtime_error(e.what());
391  }
392 
393  //apply local minimum filtering to baseline
394 
395  arma::uword k = (window_size - 1) / 2; //mid-point of window
396  arma::vec buffer;
397  arma::vec filtered(baseline.n_elem);
398  try{
399  for (arma::uword i = k; i < (X.n_rows - k); ++i){
400  buffer = baseline.subvec(i-k, i+k);
401  filtered(i) = buffer.min();
402  }
403  //fill edges with first and last values
404  for (arma::uword i = 0; i < k; ++i){
405  filtered(i) = filtered(k);
406  }
407  for (arma::uword i = filtered.n_rows - k; i < filtered.n_elem; ++i){
408  filtered(i) = filtered(filtered.n_rows - k - 2);
409  }
410  }catch(std::exception e){
411  std::cout << "Exception! (filtering)" << std::endl;
412  std::cout << "i = " << i << std::endl;
413  throw std::runtime_error(e.what());
414  }
415 
416  return filtered;
417 
418 
419 }
VESPUCCI_EXPORT arma::mat sgolayfilt(const arma::mat &x, arma::uword poly_order, arma::uword window_size, arma::uword deriv_order, arma::uword scaling_factor)
sgolayfilt Apply Savitzky-Golay smoothing to each column of a arma::matrix
Definition: FIR.cpp:89
T diff(const T &X, arma::uword deriv_order=1)
Vespucci::Math::diff.
VESPUCCI_EXPORT arma::vec PeakExtrema(arma::uword vector_size, arma::umat peak_positions)
VESPUCCI_EXPORT arma::vec FindPeaks(arma::vec X, arma::vec dX, double sel, double threshold, arma::vec &peak_magnitudes)
Vespucci::Math::PeakFinding::FindPeaks an implementation of the peakfinder arma::mat routine...
Definition: peakfinding.cpp:44
VESPUCCI_EXPORT arma::mat FindPeaksMat(arma::mat X, double sel, double threshold, arma::uword poly_order, arma::uword window_size, arma::mat &peak_magnitudes)
Vespucci::Math::PeakFinding::FindPeaksMat Performs FindPeaks on a spectra arma::matrix.
VESPUCCI_EXPORT arma::vec PeakPopulation(arma::uword vector_size, arma::umat peak_positions)
VESPUCCI_EXPORT arma::vec EstimateBaseline(arma::vec X, arma::umat peaks, arma::uword window_size)
EstimateBaseline.
VESPUCCI_EXPORT arma::uvec FindPeakPositions(arma::vec X, arma::vec dX, double sel, double threshold, arma::uvec &local_minima)