OGS
CsvInterface.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: Copyright (c) OpenGeoSys Community (opengeosys.org)
2// SPDX-License-Identifier: BSD-3-Clause
3
4#include "CsvInterface.h"
5
6#include <algorithm>
7#include <numeric>
8
9#include "GeoLib/Point.h"
10
11namespace FileIO
12{
14
15std::vector<std::string> CsvInterface::getColumnNames(std::string const& fname,
16 char const delim)
17{
18 std::ifstream in(fname.c_str());
19
20 if (!in.is_open())
21 {
22 ERR("CsvInterface::getColumnNames(): Could not open file {:s}.", fname);
23 return std::vector<std::string>();
24 }
25 std::string line;
26 if (!std::getline(in, line))
27 {
28 ERR("CsvInterface::getColumnNames(): Could not read line from file "
29 "{:s}. Is it empty?",
30 fname);
31 return {};
32 }
33
34 if (delim == '\n')
35 {
36 return {};
37 }
38
39 std::list<std::string> fields = BaseLib::splitString(line, delim);
40 if (fields.size() < 2)
41 {
42 for (char const d : {'\t', ';', ','})
43 {
44 fields = BaseLib::splitString(line, d);
45 if (fields.size() > 1)
46 {
47 break;
48 }
49 }
50 }
51 return {begin(fields), end(fields)};
52}
53
54int CsvInterface::readPoints(std::string const& fname, char delim,
55 std::vector<GeoLib::Point*>& points)
56{
57 std::ifstream in(fname.c_str());
58
59 if (!in.is_open())
60 {
61 ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
62 return -1;
63 }
64
65 std::string line;
66 std::getline(in, line);
67
68 std::size_t line_count(0);
69 std::size_t error_count(0);
70 std::list<std::string>::const_iterator it;
71 while (std::getline(in, line))
72 {
73 line_count++;
74 std::list<std::string> const fields = BaseLib::splitString(line, delim);
75
76 if (fields.size() < 3)
77 {
78 ERR("Line {:d} contains not enough columns of data. Skipping "
79 "line...",
80 line_count);
81 error_count++;
82 continue;
83 }
84 it = fields.begin();
85 try
86 {
87 std::array<double, 3> point{};
88 point[0] = std::stod(*it);
89 point[1] = std::stod(*(++it));
90 point[2] = std::stod(*(++it));
91 points.push_back(new GeoLib::Point(point[0], point[1], point[2]));
92 }
93 catch (const std::invalid_argument&)
94 {
95 ERR("Error converting data to coordinates in line {:d}.",
96 line_count);
97 }
98 }
99 return error_count;
100}
101
102int CsvInterface::readPoints(std::string const& fname, char delim,
103 std::vector<GeoLib::Point*>& points,
104 std::string const& x_column_name,
105 std::string const& y_column_name,
106 std::string const& z_column_name)
107{
108 std::ifstream in(fname.c_str());
109 std::array<std::string, 3> const column_names = {
110 {x_column_name, y_column_name, z_column_name}};
111
112 if (!in.is_open())
113 {
114 ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
115 return -1;
116 }
117
118 std::string line;
119 std::getline(in, line);
120 std::array<std::size_t, 3> const column_idx = {
121 {CsvInterface::findColumn(line, delim, x_column_name),
122 CsvInterface::findColumn(line, delim, y_column_name),
123 (z_column_name.empty())
124 ? CsvInterface::findColumn(line, delim, y_column_name)
125 : CsvInterface::findColumn(line, delim, z_column_name)}};
126
127 for (std::size_t i = 0; i < 3; ++i)
128 {
129 if (column_idx[i] == std::numeric_limits<std::size_t>::max())
130 {
131 ERR("Column '{:s}' not found in file header.", column_names[i]);
132 return -1;
133 }
134 }
135
136 return readPoints(in, delim, points, column_idx);
137}
138
139int CsvInterface::readPoints(std::string const& fname, char delim,
140 std::vector<GeoLib::Point*>& points,
141 std::size_t x_column_idx, std::size_t y_column_idx,
142 std::size_t z_column_idx)
143{
144 std::ifstream in(fname.c_str());
145
146 if (!in.is_open())
147 {
148 ERR("CsvInterface::readPoints(): Could not open file {:s}.", fname);
149 return -1;
150 }
151
152 if (z_column_idx == std::numeric_limits<std::size_t>::max())
153 {
154 z_column_idx = y_column_idx;
155 }
156 std::array<std::size_t, 3> const column_idx = {
157 {x_column_idx, y_column_idx, z_column_idx}};
158
159 return readPoints(in, delim, points, column_idx);
160}
161
162int CsvInterface::readPoints(std::ifstream& in, char delim,
163 std::vector<GeoLib::Point*>& points,
164 std::array<std::size_t, 3> const& column_idx)
165{
166 std::array<std::size_t, 3> order = {{0, 1, 2}};
167 std::sort(order.begin(), order.end(),
168 [&column_idx](std::size_t idx1, std::size_t idx2)
169 { return column_idx[idx1] < column_idx[idx2]; });
170 std::array<std::size_t, 3> const column_advance = {
171 {column_idx[order[0]], column_idx[order[1]] - column_idx[order[0]],
172 column_idx[order[2]] - column_idx[order[1]]}};
173
174 std::string line;
175 std::size_t line_count(0);
176 std::size_t error_count(0);
177 std::list<std::string>::const_iterator it;
178
179 while (std::getline(in, line))
180 {
181 line_count++;
182 std::list<std::string> const fields = BaseLib::splitString(line, delim);
183
184 if (fields.size() < column_idx[order[2]] + 1)
185 {
186 ERR("Line {:d} contains not enough columns of data. Skipping "
187 "line...",
188 line_count);
189 error_count++;
190 continue;
191 }
192
193 it = fields.begin();
194 try
195 {
196 std::advance(it, column_advance[0]);
197 std::array<double, 3> point{};
198 point[order[0]] = std::stod(*it);
199 std::advance(it, column_advance[1]);
200 point[order[1]] = std::stod(*it);
201 std::advance(it, column_advance[2]);
202 point[order[2]] =
203 (column_idx[1] == column_idx[2]) ? 0 : std::stod(*it);
204 points.push_back(new GeoLib::Point(point[0], point[1], point[2]));
205 }
206 catch (const std::invalid_argument&)
207 {
208 ERR("Error converting data to coordinates in line {:d}.",
209 line_count);
210 error_count++;
211 }
212 }
213 return error_count;
214}
215
216std::size_t CsvInterface::findColumn(std::string const& line, char delim,
217 std::string const& column_name)
218{
219 std::list<std::string> const fields = BaseLib::splitString(line, delim);
220 if (fields.empty())
221 {
222 return std::numeric_limits<std::size_t>::max();
223 }
224
225 std::size_t count(0);
226 for (const auto& field : fields)
227 {
228 if (field == column_name)
229 {
230 break;
231 }
232
233 count++;
234 }
235
236 if (count == fields.size())
237 {
238 return std::numeric_limits<std::size_t>::max();
239 }
240
241 return count;
242}
243
245{
246 std::vector<int> idx_vec(s);
247 std::iota(idx_vec.begin(), idx_vec.end(), 0);
248 addVectorForWriting("Index", idx_vec);
249}
250
252{
253 if (_data.empty())
254 {
255 ERR("CsvInterface::write() - No data to write.");
256 return false;
257 }
258
259 std::size_t const n_vecs(_data.size());
260 std::size_t const vec_size(getVectorSize(0));
261
262 if (_writeCsvHeader)
263 {
264 out << _vec_names[0];
265 for (std::size_t i = 1; i < n_vecs; ++i)
266 {
267 out << "\t" << _vec_names[i];
268 }
269 out << "\n";
270 }
271
272 for (std::size_t j = 0; j < vec_size; ++j)
273 {
274 writeValue(0, j);
275 for (std::size_t i = 1; i < n_vecs; ++i)
276 {
277 out << "\t";
278 writeValue(i, j);
279 }
280 out << "\n";
281 }
282 return true;
283}
284
285std::size_t CsvInterface::getVectorSize(std::size_t idx) const
286{
287 if (_data[idx].type() == typeid(std::vector<std::string>))
288 {
289 return std::any_cast<std::vector<std::string>>(_data[idx]).size();
290 }
291 if (_data[idx].type() == typeid(std::vector<double>))
292 {
293 return std::any_cast<std::vector<double>>(_data[idx]).size();
294 }
295 if (_data[idx].type() == typeid(std::vector<int>))
296 {
297 return std::any_cast<std::vector<int>>(_data[idx]).size();
298 }
299 return 0;
300}
301
302void CsvInterface::writeValue(std::size_t vec_idx, std::size_t in_vec_idx)
303{
304 if (_data[vec_idx].type() == typeid(std::vector<std::string>))
305 {
307 _data[vec_idx])[in_vec_idx];
308 }
309 else if (_data[vec_idx].type() == typeid(std::vector<double>))
310 {
311 out << std::any_cast<std::vector<double>>(_data[vec_idx])[in_vec_idx];
312 }
313 else if (_data[vec_idx].type() == typeid(std::vector<int>))
314 {
315 out << std::any_cast<std::vector<int>>(_data[vec_idx])[in_vec_idx];
316 }
317}
318
319} // end namespace FileIO
void ERR(fmt::format_string< Args... > fmt, Args &&... args)
Definition Logging.h:40
std::ostringstream out
The stream to write to.
Definition Writer.h:36
std::vector< std::any > _data
void writeValue(std::size_t vec_idx, std::size_t in_vec_idx)
std::vector< std::string > _vec_names
static std::vector< std::string > getColumnNames(std::string const &fname, char delim)
bool addVectorForWriting(std::string const &vec_name, std::vector< T > const &vec)
static std::size_t findColumn(std::string const &line, char delim, std::string const &column_name)
void addIndexVectorForWriting(std::size_t s)
Adds an index vector of size s to the CSV file.
std::size_t getVectorSize(std::size_t idx) const
Returns the size of the vector with the given index.
CsvInterface()
Constructor (only needed for writing files)
bool write() override
Writes the CSV file.
static int readPoints(std::string const &fname, char delim, std::vector< GeoLib::Point * > &points)
std::vector< std::string > splitString(std::string const &str)