Vespucci  1.0.0
textimport.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  Copyright (C) 2014-2016 Wright State University - All Rights Reserved
3  Daniel P. Foose - Maintainer/Lead Developer
4 
5  This file is part of Vespucci.
6 
7  Vespucci is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  Vespucci is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with Vespucci. If not, see <http://www.gnu.org/licenses/>.
19 *******************************************************************************/
20 #include <Data/Import/textimport.h>
21 #include <Global/vespucci.h>
22 #include <regex>
23 #include <QtCore>
24 #include <boost/tokenizer.hpp>
25 #include <boost/lexical_cast.hpp>
26 #include <string>
27 #include <iostream>
28 #include <regex>
29 
30 using namespace arma;
37 bool TextImport::CheckFileValidity(QString filename, bool &comma_decimals)
38 {
39  QFile inputfile(filename);
40  inputfile.open(QIODevice::ReadOnly);
41  QTextStream inputstream(&inputfile);
42  QString line = inputstream.readLine();
43  QStringList tab_list = line.split("\t");
44  QStringList comma_list = line.split(",");
45  bool valid;
46 
47  if(tab_list.size() <= 0 && comma_list.size() <= 0){
48  return false;
49  }
50  else if(tab_list.size() > 0 && comma_list.size() > 0){
51  //we probably have a tab delimited file with commas for decimal points
52  //import function will probabably throw exception if comma is separator
53  //and type is wide text, so comma_decimals must be checked
54  comma_decimals = true;
55  return true;
56  }
57  else if(tab_list.size() > 0 && comma_list.size() == 0){
58  comma_decimals = false;
59  for (int i = 0; i < tab_list.size(); ++i){
60  tab_list[i].toDouble(&valid);
61  if(!valid)
62  return false;
63  }
64  return true;
65  }
66  else if(tab_list.size() == 0 && comma_list.size() > 0){
67  comma_decimals = false; //we don't know if this is true or not.
68  //users using instruments that save csv files with local number formatting
69  //in regions where commas are used for decimal places should be super
70  //careful.
71  for (int i = 0; i < comma_list.size(); ++i){
72  comma_list[i].toDouble(&valid);
73  if(!valid)
74  return false;
75  }
76  return true;
77  }
78  else{
79  return false;
80  }
81 
82 
83 }
84 
85 
97 bool TextImport::ImportWideText(std::string filename,
98  arma::mat &spectra,
99  arma::vec &abscissa,
100  arma::vec &x, arma::vec &y,
101  bool swap_spatial)
102 {
103  bool ok;
104  Vespucci::ResetDataset(spectra, x, y, abscissa);
105  std::ifstream file(filename);
106  std::string first_line;
107  std::getline(file, first_line);
108  first_line.erase(0, 2); //remove leading empty elements
109 
110  abscissa = mat(first_line).t();
111 
112  ok = spectra.load(file, raw_ascii);
113  if (!ok){
114  Vespucci::ResetDataset(spectra, x, y, abscissa);
115  return false;
116  }
117 
118  if (swap_spatial){
119  x = spectra.col(1);
120  y = spectra.col(0);
121  }
122  else{
123  x = spectra.col(0);
124  y = spectra.col(1);
125  }
126 
127  spectra.shed_cols(0, 1);
128  arma::inplace_trans(spectra);
129 
130  //check to make sure everything is sorted the way Vespucci expects
131  arma::uvec sorted_indices = arma::stable_sort_index(abscissa);
132  abscissa = abscissa.rows(sorted_indices);
133  spectra = spectra.rows(sorted_indices);
134  if (swap_spatial){
135 
136  }
137 
138  return true;
139 }
140 
152 bool TextImport::ImportMultiplePoints(std::map<std::pair<int, int>, std::string> filenames,
153  int rows, int cols,
154  arma::mat &spectra,
155  arma::vec &abscissa,
156  arma::vec &x, arma::vec &y)
157 {
158  //fill x and y based on rows and colums
159  x.set_size(rows*cols);
160  y.set_size(rows*cols);
161 
162  bool have_abscissa = false;
163  mat current_spectrum;
164  std::pair<int,int> origin(0,0);
165  bool ok = current_spectrum.load(filenames[origin]);
166  int spec_rows = current_spectrum.n_rows;
167  spectra.set_size(spec_rows, x.n_rows);
168  if (!ok){
169  return false;
170  }
171  typedef std::map<std::pair<int,int>, std::string>::iterator map_it;
172  uword i = 0;
173  std::string filename;
174  for (map_it it = filenames.begin(); it!=filenames.end(); ++it){
175  std::pair<int,int> keys = it->first;
176  x(i) = (double) keys.first;
177  y(i) = (double) keys.second;
178  filename = it->second;
179  ok = current_spectrum.load(filename);
180  if (!have_abscissa && current_spectrum.n_cols == 2){
181  if (current_spectrum.n_rows > current_spectrum.n_cols){
182  abscissa = current_spectrum.col(0);
183  current_spectrum.shed_col(0);
184  }
185  else{
186  abscissa = current_spectrum.row(0).t();
187  current_spectrum.shed_row(0);
188  }
189  have_abscissa = true;
190  }
191  try{
192  if (current_spectrum.n_rows > current_spectrum.n_cols){
193  if (current_spectrum.n_cols > 1)
194  spectra.col(i) = current_spectrum.col(1);
195  else
196  spectra.col(i) = current_spectrum.col(0);
197  }
198  else{
199  if (current_spectrum.n_rows > 1)
200  spectra.col(i) = current_spectrum.row(1).t();
201  else
202  spectra.col(i) = current_spectrum.row(0).t();
203  }
204  }catch(std::exception e){
205  spectra.clear();
206  abscissa.clear();
207  x.clear();
208  y.clear();
209  std::cerr << "Exception thrown. Spectra " << i << "." << "Type: " << e.what() << "." << std::endl;
210  return false;
211  }
212 
213  if (!ok){
214  std::cerr << "Could not load file " << filename << "." << std::endl;
215  }
216  ++i;
217  }
218 
219  //check to make sure everything is sorted the way Vespucci expects
220  arma::uvec sorted_indices = arma::stable_sort_index(abscissa);
221  abscissa = abscissa.rows(sorted_indices);
222  spectra = spectra.rows(sorted_indices);
223 
224  return have_abscissa;
225 }
226 
242 bool TextImport::ImportWitec(std::string filename,
243  double x_start,
244  double y_start,
245  double x_end,
246  double y_end,
247  arma::uword x_count,
248  arma::uword y_count,
249  arma::mat &spectra,
250  arma::vec &abscissa,
251  arma::vec &x,
252  arma::vec &y)
253 {
254  Vespucci::ResetDataset(spectra, x, y, abscissa);
255  spectra.load(filename);
256  if (spectra.n_cols < 2) return false;
257  abscissa = spectra.col(0);
258  spectra.shed_col(0);
259  if (x_count * y_count != spectra.n_cols){
260  Vespucci::ResetDataset(spectra, x, y, abscissa);
261  return false;
262  }
263  GenerateSpatialData(x_start, y_start,
264  x_end, y_end,
265  x_count, y_count,
266  x, y);
267  if (!(x.n_rows == y.n_rows && x.n_rows == spectra.n_cols)){
268  Vespucci::ResetDataset(spectra, x, y, abscissa);
269  return false;
270  }
271 
272  return true;
273 }
274 
286 void TextImport::GenerateSpatialData(double x_start, double y_start, double x_end, double y_end, arma::uword x_count, arma::uword y_count, arma::vec &x, arma::vec &y)
287 {
288  x.clear();
289  y.clear();
290  arma::vec unique_x = arma::linspace(x_start, x_end, x_count);
291  arma::vec unique_y = arma::linspace(y_start, y_end, y_count);
292  arma::uword vec_size = x_count * y_count;
293  x.set_size(vec_size);
294  y.set_size(vec_size);
295  for (arma::uword i = 0; i < unique_y.n_rows; ++i){
296  y.rows(i, i*x_count).fill(unique_y(i));
297  x.rows(i, i*x_count) = unique_x;
298  }
299 
300 }
301 
311 bool TextImport::ImportLongText(std::string filename, arma::mat &spectra, arma::mat &abscissa, arma::vec &x, arma::vec &y, bool swap_spatial)
312 {
313  arma::mat all_data;
314  try{
315  all_data.load(filename);
316  }catch(...){
317  return false;
318  }
319  if (all_data.n_cols < 4) return false;
320  arma::vec all_x = (swap_spatial ? all_data.col(1) : all_data.col(0));
321  arma::vec all_y = (swap_spatial ? all_data.col(0) : all_data.col(1));
322  arma::vec all_abscissa = all_data.col(2);
323  arma::vec all_spectra = all_data.col(3);
324  double abscissa_max = all_abscissa.max();
325  arma::uvec max_indices = arma::find(all_abscissa == abscissa_max);
326  x = all_x.elem(max_indices);
327  y = all_y.elem(max_indices);
328  abscissa = arma::unique(all_abscissa);
329  spectra = arma::mat(all_spectra.memptr(), abscissa.n_rows, x.n_rows);
330 
331  arma::uvec sorted_indices = arma::stable_sort_index(abscissa);
332  abscissa = abscissa.rows(sorted_indices);
333  spectra = spectra.rows(sorted_indices);
334  return true;
335 }
VESPUCCI_EXPORT bool CheckFileValidity(QString filename, bool &comma_decimals)
TextImport::CheckFileValidity.
Definition: textimport.cpp:37
VESPUCCI_EXPORT bool ImportWitec(std::string filename, double x_start, double y_start, double x_end, double y_end, arma::uword x_count, arma::uword y_count, arma::mat &spectra, arma::vec &abscissa, arma::vec &x, arma::vec &y)
TextImport::ImportWitec.
Definition: textimport.cpp:242
VESPUCCI_EXPORT void GenerateSpatialData(double x_start, double y_start, double x_end, double y_end, arma::uword x_count, arma::uword y_count, arma::vec &x, arma::vec &y)
TextImport::GenerateSpatialData.
Definition: textimport.cpp:286
VESPUCCI_EXPORT bool ImportWideText(std::string filename, arma::mat &spectra, arma::vec &abscissa, arma::vec &x, arma::vec &y, bool swap_spatial)
TextImport::ImportWideText.
Definition: textimport.cpp:97
VESPUCCI_EXPORT bool ImportMultiplePoints(std::map< std::pair< int, int >, std::string > filenames, int rows, int cols, arma::mat &spectra, arma::vec &abscissa, arma::vec &x, arma::vec &y)
TextImport::ImportMultiplePoints.
Definition: textimport.cpp:152
VESPUCCI_EXPORT void ResetDataset(arma::mat &spectra, arma::vec &x, arma::vec &y, arma::vec &abscissa)
Definition: vespucci.cpp:226
VESPUCCI_EXPORT bool ImportLongText(std::string filename, arma::mat &spectra, arma::mat &abscissa, arma::vec &x, arma::vec &y, bool swap_spatial)
TextImport::ImportLongText.
Definition: textimport.cpp:311