OGS
CsvInterface.cpp
Go to the documentation of this file.
1 
14 #include "CsvInterface.h"
15 
16 #include <algorithm>
17 #include <fstream>
18 #include <numeric>
19 
20 #include "GeoLib/Point.h"
21 
22 namespace FileIO
23 {
24 CsvInterface::CsvInterface() = default;
25 
26 std::vector<std::string> CsvInterface::getColumnNames(std::string const& fname,
27  char const delim)
28 {
29  std::ifstream in(fname.c_str());
30 
31  if (!in.is_open())
32  {
33  ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
34  return std::vector<std::string>();
35  }
36  std::string line;
37  if (!std::getline(in, line))
38  {
39  return {};
40  }
41 
42  std::list<std::string> fields;
43  if (delim != '\n')
44  {
45  fields = BaseLib::splitString(line, delim);
46  }
47 
48  if (fields.size() < 2)
49  {
50  for (char const d : {'\t', ';', ','})
51  {
52  fields = BaseLib::splitString(line, d);
53  if (fields.size() > 1)
54  {
55  break;
56  }
57  }
58  }
59  return {begin(fields), end(fields)};
60 }
61 
62 int CsvInterface::readPoints(std::string const& fname, char delim,
63  std::vector<GeoLib::Point*>& points)
64 {
65  std::ifstream in(fname.c_str());
66 
67  if (!in.is_open())
68  {
69  ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
70  return -1;
71  }
72 
73  std::string line;
74  std::getline(in, line);
75 
76  std::size_t line_count(0);
77  std::size_t error_count(0);
78  std::list<std::string>::const_iterator it;
79  while (std::getline(in, line))
80  {
81  line_count++;
82  std::list<std::string> const fields = BaseLib::splitString(line, delim);
83 
84  if (fields.size() < 3)
85  {
86  ERR("Line {:d} contains not enough columns of data. Skipping "
87  "line...",
88  line_count);
89  error_count++;
90  continue;
91  }
92  it = fields.begin();
93  std::array<double, 3> point{};
94  try
95  {
96  point[0] = std::stod(*it);
97  point[1] = std::stod(*(++it));
98  point[2] = std::stod(*(++it));
99  points.push_back(new GeoLib::Point(point[0], point[1], point[2]));
100  }
101  catch (const std::invalid_argument&)
102  {
103  ERR("Error converting data to coordinates in line {:d}.",
104  line_count);
105  }
106  }
107  return error_count;
108 }
109 
110 int CsvInterface::readPoints(std::string const& fname, char delim,
111  std::vector<GeoLib::Point*>& points,
112  std::string const& x_column_name,
113  std::string const& y_column_name,
114  std::string const& z_column_name)
115 {
116  std::ifstream in(fname.c_str());
117  std::array<std::string, 3> const column_names = {
118  {x_column_name, y_column_name, z_column_name}};
119 
120  if (!in.is_open())
121  {
122  ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
123  return -1;
124  }
125 
126  std::string line;
127  std::getline(in, line);
128  std::array<std::size_t, 3> const column_idx = {
129  {CsvInterface::findColumn(line, delim, x_column_name),
130  CsvInterface::findColumn(line, delim, y_column_name),
131  (z_column_name.empty())
132  ? CsvInterface::findColumn(line, delim, y_column_name)
133  : CsvInterface::findColumn(line, delim, z_column_name)}};
134 
135  for (std::size_t i = 0; i < 3; ++i)
136  {
137  if (column_idx[i] == std::numeric_limits<std::size_t>::max())
138  {
139  ERR("Column '{:s}' not found in file header.", column_names[i]);
140  return -1;
141  }
142  }
143 
144  return readPoints(in, delim, points, column_idx);
145 }
146 
147 int CsvInterface::readPoints(std::string const& fname, char delim,
148  std::vector<GeoLib::Point*>& points,
149  std::size_t x_column_idx, std::size_t y_column_idx,
150  std::size_t z_column_idx)
151 {
152  std::ifstream in(fname.c_str());
153 
154  if (!in.is_open())
155  {
156  ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
157  return -1;
158  }
159 
160  if (z_column_idx == std::numeric_limits<std::size_t>::max())
161  {
162  z_column_idx = y_column_idx;
163  }
164  std::array<std::size_t, 3> const column_idx = {
165  {x_column_idx, y_column_idx, z_column_idx}};
166 
167  return readPoints(in, delim, points, column_idx);
168 }
169 
170 int CsvInterface::readPoints(std::ifstream& in, char delim,
171  std::vector<GeoLib::Point*>& points,
172  std::array<std::size_t, 3> const& column_idx)
173 {
174  std::array<std::size_t, 3> order = {{0, 1, 2}};
175  std::sort(order.begin(), order.end(),
176  [&column_idx](std::size_t idx1, std::size_t idx2)
177  { return column_idx[idx1] < column_idx[idx2]; });
178  std::array<std::size_t, 3> const column_advance = {
179  {column_idx[order[0]], column_idx[order[1]] - column_idx[order[0]],
180  column_idx[order[2]] - column_idx[order[1]]}};
181 
182  std::string line;
183  std::size_t line_count(0);
184  std::size_t error_count(0);
185  std::list<std::string>::const_iterator it;
186 
187  while (std::getline(in, line))
188  {
189  line_count++;
190  std::list<std::string> const fields = BaseLib::splitString(line, delim);
191 
192  if (fields.size() < column_idx[order[2]] + 1)
193  {
194  ERR("Line {:d} contains not enough columns of data. Skipping "
195  "line...",
196  line_count);
197  error_count++;
198  continue;
199  }
200 
201  std::array<double, 3> point{};
202  it = fields.begin();
203  try
204  {
205  std::advance(it, column_advance[0]);
206  point[order[0]] = std::stod(*it);
207  std::advance(it, column_advance[1]);
208  point[order[1]] = std::stod(*it);
209  std::advance(it, column_advance[2]);
210  point[order[2]] =
211  (column_idx[1] == column_idx[2]) ? 0 : std::stod(*it);
212  points.push_back(new GeoLib::Point(point[0], point[1], point[2]));
213  }
214  catch (const std::invalid_argument&)
215  {
216  ERR("Error converting data to coordinates in line {:d}.",
217  line_count);
218  error_count++;
219  }
220  }
221  return error_count;
222 }
223 
224 std::size_t CsvInterface::findColumn(std::string const& line, char delim,
225  std::string const& column_name)
226 {
227  std::list<std::string> const fields = BaseLib::splitString(line, delim);
228  if (fields.empty())
229  {
230  return std::numeric_limits<std::size_t>::max();
231  }
232 
233  std::size_t count(0);
234  for (const auto& field : fields)
235  {
236  if (field == column_name)
237  {
238  break;
239  }
240 
241  count++;
242  }
243 
244  if (count == fields.size())
245  {
246  return std::numeric_limits<std::size_t>::max();
247  }
248 
249  return count;
250 }
251 
253 {
254  std::vector<int> idx_vec(s);
255  std::iota(idx_vec.begin(), idx_vec.end(), 0);
256  addVectorForWriting("Index", idx_vec);
257 }
258 
260 {
261  if (_data.empty())
262  {
263  ERR("CsvInterface::write() - No data to write.");
264  return false;
265  }
266 
267  std::size_t const n_vecs(_data.size());
268  std::size_t const vec_size(getVectorSize(0));
269 
270  if (_writeCsvHeader)
271  {
272  out << _vec_names[0];
273  for (std::size_t i = 1; i < n_vecs; ++i)
274  {
275  out << "\t" << _vec_names[i];
276  }
277  out << "\n";
278  }
279 
280  for (std::size_t j = 0; j < vec_size; ++j)
281  {
282  writeValue(0, j);
283  for (std::size_t i = 1; i < n_vecs; ++i)
284  {
285  out << "\t";
286  writeValue(i, j);
287  }
288  out << "\n";
289  }
290  return true;
291 }
292 
293 std::size_t CsvInterface::getVectorSize(std::size_t idx) const
294 {
295  if (_data[idx].type() == typeid(std::vector<std::string>))
296  {
297  return std::any_cast<std::vector<std::string>>(_data[idx]).size();
298  }
299  if (_data[idx].type() == typeid(std::vector<double>))
300  {
301  return std::any_cast<std::vector<double>>(_data[idx]).size();
302  }
303  if (_data[idx].type() == typeid(std::vector<int>))
304  {
305  return std::any_cast<std::vector<int>>(_data[idx]).size();
306  }
307  return 0;
308 }
309 
310 void CsvInterface::writeValue(std::size_t vec_idx, std::size_t in_vec_idx)
311 {
312  if (_data[vec_idx].type() == typeid(std::vector<std::string>))
313  {
314  out << std::any_cast<std::vector<std::string>>(
315  _data[vec_idx])[in_vec_idx];
316  }
317  else if (_data[vec_idx].type() == typeid(std::vector<double>))
318  {
319  out << std::any_cast<std::vector<double>>(_data[vec_idx])[in_vec_idx];
320  }
321  else if (_data[vec_idx].type() == typeid(std::vector<int>))
322  {
323  out << std::any_cast<std::vector<int>>(_data[vec_idx])[in_vec_idx];
324  }
325 }
326 
327 } // end namespace FileIO
Definition of the CsvInterface class.
Definition of the Point class.
void ERR(char const *fmt, Args const &... args)
Definition: Logging.h:42
std::ostringstream out
The stream to write to.
Definition: Writer.h:46
std::vector< std::any > _data
Definition: CsvInterface.h:253
void writeValue(std::size_t vec_idx, std::size_t in_vec_idx)
std::vector< std::string > _vec_names
Definition: CsvInterface.h:252
static std::vector< std::string > getColumnNames(std::string const &fname, char delim)
bool addVectorForWriting(std::string const &vec_name, std::vector< T > const &vec)
Definition: CsvInterface.h:64
static std::size_t findColumn(std::string const &line, char delim, std::string const &column_name)
void addIndexVectorForWriting(std::size_t s)
Adds an index vector of size s to the CSV file.
std::size_t getVectorSize(std::size_t idx) const
Returns the size of the vector with the given index.
CsvInterface()
Constructor (only needed for writing files)
bool write() override
Writes the CSV file.
static int readPoints(std::string const &fname, char delim, std::vector< GeoLib::Point * > &points)
std::vector< std::string > splitString(std::string const &str)
Definition: StringTools.cpp:28