Alexandria  2.19
Please provide a description of the project.
AsciiReader.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2021 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
25 #include <fstream>
26 #include <set>
27 // The std regex library is not fully implemented in GCC 4.8. The following lines
28 // make use of the BOOST library and can be modified if GCC 4.9 will be used in
29 // the future.
30 // #include <regex>
31 #include <boost/regex.hpp>
32 using boost::regex;
33 using boost::regex_match;
34 #include <boost/algorithm/string.hpp>
35 
36 #if BOOST_VERSION < 107300
37 #include <boost/io/detail/quoted_manip.hpp>
38 #else
39 #include <boost/io/quoted.hpp>
40 #endif
41 
43 #include "Table/AsciiReader.h"
44 
45 #include "AsciiReaderHelper.h"
46 #include "ReaderHelper.h"
47 
48 namespace Euclid {
49 namespace Table {
50 
51 AsciiReader::AsciiReader(std::istream& stream) : AsciiReader(InstOrRefHolder<std::istream>::create(stream)) {}
52 
53 AsciiReader::AsciiReader(const std::string& filename) : AsciiReader(create<std::ifstream>(filename)) {}
54 
56  : m_stream_holder(std::move(stream_holder)) {}
57 
59  if (m_reading_started) {
60  throw Elements::Exception() << "Changing comment indicator after reading "
61  << "has started is not allowed";
62  }
63  if (indicator.empty()) {
64  throw Elements::Exception() << "Empty string as comment indicator";
65  }
66  m_comment = indicator;
67  return *this;
68 }
69 
71  if (m_reading_started) {
72  throw Elements::Exception() << "Fixing the column names after reading "
73  << "has started is not allowed";
74  }
75 
76  m_column_names = std::move(column_names);
77 
79  regex vertical_whitespace{".*\\v.*"}; // Checks if input contains any whitespace characters
80  for (const auto& name : m_column_names) {
81  if (name.empty()) {
82  throw Elements::Exception() << "Empty string column names are not allowed";
83  }
84  if (regex_match(name, vertical_whitespace)) {
85  throw Elements::Exception() << "Column name '" << name << "' contains "
86  << "vertical whitespace characters";
87  }
88  if (!set.insert(name).second) { // Check for duplicate names
89  throw Elements::Exception() << "Duplicate column name " << name;
90  }
91  }
93  throw Elements::Exception() << "Different number of column names and types";
94  }
95 
96  return *this;
97 }
98 
100  if (m_reading_started) {
101  throw Elements::Exception() << "Fixing the column types after reading "
102  << "has started is not allowed";
103  }
104 
105  m_column_types = std::move(column_types);
106 
108  throw Elements::Exception() << "Different number of column names and types";
109  }
110 
111  return *this;
112 }
113 
115  if (m_column_info != nullptr) {
116  return;
117  }
118  m_reading_started = true;
119 
120  auto& in = m_stream_holder->ref();
121 
122  size_t columns_number = countColumns(in, m_comment);
123  if (!m_column_names.empty() && m_column_names.size() != columns_number) {
124  throw Elements::Exception() << "Columns number in stream (" << columns_number << ") does not match the column names number ("
125  << m_column_names.size() << ")";
126  }
127  if (!m_column_types.empty() && m_column_types.size() != columns_number) {
128  throw Elements::Exception() << "Columns number in stream (" << columns_number << ") does not match the column types number ("
129  << m_column_types.size() << ")";
130  }
131 
132  auto auto_names = autoDetectColumnNames(in, m_comment, columns_number);
133  auto auto_desc = autoDetectColumnDescriptions(in, m_comment);
134 
135  std::vector<std::string> names{};
137  std::vector<std::string> units{};
138  std::vector<std::string> descriptions{};
139  for (size_t i = 0; i < columns_number; ++i) {
140  if (m_column_names.empty()) {
141  names.emplace_back(auto_names[i]);
142  } else {
143  names.emplace_back(m_column_names[i]);
144  }
145  auto info = auto_desc.find(auto_names[i]);
146  if (info != auto_desc.end()) {
147  if (m_column_types.empty()) {
148  types.emplace_back(info->second.type);
149  } else {
150  types.emplace_back(m_column_types[i]);
151  }
152  units.emplace_back(info->second.unit);
153  descriptions.emplace_back(info->second.description);
154  } else {
155  if (m_column_types.empty()) {
156  types.emplace_back(typeid(std::string));
157  } else {
158  types.emplace_back(m_column_types[i]);
159  }
160  units.emplace_back("");
161  descriptions.emplace_back("");
162  }
163  }
164  m_column_info = createColumnInfo(names, types, units, descriptions);
165 }
166 
168  readColumnInfo();
169  return *m_column_info;
170 }
171 
173  std::string line;
174  auto pos = in.tellg();
175  getline(in, line);
176  in.seekg(pos);
177  return line;
178 }
179 
181  std::ostringstream comment;
182 
183  m_reading_started = true;
184  auto& in = m_stream_holder->ref();
185  while (in && _peekLine(in).compare(0, m_comment.size(), m_comment) == 0) {
186  std::string line;
187  getline(in, line);
188  line = line.substr(m_comment.size());
189  boost::trim(line);
190  comment << line << '\n';
191  }
192 
193  auto full_comment = comment.str();
194  boost::trim(full_comment);
195  return full_comment;
196 }
197 
199  readColumnInfo();
200  auto& in = m_stream_holder->ref();
201 
202  std::vector<Row> row_list;
203  while (in && rows != 0) {
204  std::string line;
205  getline(in, line);
206  size_t comment_pos = line.find(m_comment);
207  if (comment_pos != std::string::npos) {
208  line = line.substr(0, comment_pos);
209  }
210  boost::trim(line);
211  if (!line.empty()) {
212  --rows;
213  std::stringstream line_stream(line);
214  size_t count{0};
216  std::string token;
217  line_stream >> token;
218  while (line_stream) {
219  if (count >= m_column_info->size()) {
220  throw Elements::Exception() << "Line with wrong number of cells: " << line;
221  }
222  values.push_back(convertToCellType(token, m_column_info->getDescription(count).type));
223  line_stream >> boost::io::quoted(token);
224  ++count;
225  }
226  row_list.push_back(Row{std::move(values), m_column_info});
227  }
228  }
229 
230  if (row_list.empty()) {
231  throw Elements::Exception() << "No more table rows left";
232  }
233  return Table{std::move(row_list)};
234 }
235 
236 void AsciiReader::skip(long rows) {
237  readColumnInfo();
238  auto& in = m_stream_holder->ref();
239 
240  while (in && rows != 0) {
241  std::string line;
242  getline(in, line);
243  size_t comment_pos = line.find(m_comment);
244  if (comment_pos != std::string::npos) {
245  line = line.substr(0, comment_pos);
246  }
247  boost::trim(line);
248  if (!line.empty()) {
249  --rows;
250  }
251  }
252 }
253 
255  return hasNextRow(m_stream_holder->ref(), m_comment);
256 }
257 
260 }
261 
262 } // namespace Table
263 } // namespace Euclid
Euclid::Table::createColumnInfo
std::shared_ptr< ColumnInfo > createColumnInfo(const std::vector< std::string > &names, const std::vector< std::type_index > &types, const std::vector< std::string > &units, const std::vector< std::string > &descriptions)
Creates a ColumnInfo object from the given names and types.
Definition: ReaderHelper.cpp:30
Euclid::Table::convertToCellType
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
Definition: AsciiReaderHelper.cpp:284
std::string
STL class.
std::istream::tellg
T tellg(T... args)
std::move
T move(T... args)
Euclid::Table::countRemainingRows
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
Definition: AsciiReaderHelper.cpp:345
Euclid::Table::AsciiReader::hasMoreRows
bool hasMoreRows() override
Implements the TableReader::hasMoreRows() contract.
Definition: AsciiReader.cpp:254
Euclid::Table::autoDetectColumnDescriptions
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
Definition: AsciiReaderHelper.cpp:107
std::vector< std::string >
std::string::find
T find(T... args)
std::vector::size
T size(T... args)
Euclid::Table::AsciiReader::fixColumnNames
AsciiReader & fixColumnNames(std::vector< std::string > column_names)
Overrides the automatically detected column names.
Definition: AsciiReader.cpp:70
Euclid::Table::AsciiReader::setCommentIndicator
AsciiReader & setCommentIndicator(const std::string &indicator)
Set the comment indicator.
Definition: AsciiReader.cpp:58
Euclid::InstOrRefHolder
Definition: InstOrRefHolder.h:38
std::stringstream
STL class.
Euclid::Table::AsciiReader::m_column_types
std::vector< std::type_index > m_column_types
Definition: AsciiReader.h:230
Euclid::Table::AsciiReader::AsciiReader
AsciiReader(std::istream &stream)
Constructs an AsciiReader which reads from the given stream.
Definition: AsciiReader.cpp:51
Euclid::Table::AsciiReader::m_comment
std::string m_comment
Definition: AsciiReader.h:229
AsciiReader.h
std::string::push_back
T push_back(T... args)
Euclid::Table::AsciiReader::m_stream_holder
std::unique_ptr< InstOrRefHolder< std::istream > > m_stream_holder
Definition: AsciiReader.h:227
Euclid::Table::ColumnInfo
Provides information about the columns of a Table.
Definition: ColumnInfo.h:52
Euclid::Table::hasNextRow
bool hasNextRow(std::istream &in, const std::string &comment)
Definition: AsciiReaderHelper.cpp:328
Euclid::Table::AsciiReader::readColumnInfo
void readColumnInfo()
Definition: AsciiReader.cpp:114
Euclid::Table::AsciiReader::m_column_info
std::shared_ptr< ColumnInfo > m_column_info
Definition: AsciiReader.h:232
Euclid::Table::autoDetectColumnNames
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
Definition: AsciiReaderHelper.cpp:171
Exception.h
Euclid::Table::AsciiReader::getComment
std::string getComment() override
Definition: AsciiReader.cpp:180
Elements::Exception
Euclid::Table::quoted
std::string quoted(const std::string &str)
Definition: AsciiWriterHelper.cpp:111
Euclid::Table::AsciiReader::rowsLeft
std::size_t rowsLeft() override
Implements the TableReader::rowsLeft() contract.
Definition: AsciiReader.cpp:258
Euclid::Table::AsciiReader::skip
void skip(long rows) override
Implements the TableReader::skip() contract.
Definition: AsciiReader.cpp:236
Euclid::Table::AsciiReader::getInfo
const ColumnInfo & getInfo() override
Returns the column information of the table.
Definition: AsciiReader.cpp:167
AsciiReaderHelper.h
Euclid::Table::AsciiReader
TableReader implementation for reading ASCII tables from streams.
Definition: AsciiReader.h:87
std::string::substr
T substr(T... args)
std::ostringstream
STL class.
std::vector::emplace_back
T emplace_back(T... args)
std
STL namespace.
Euclid::Table::AsciiReader::readImpl
Table readImpl(long rows) override
Reads the next rows into a Table.
Definition: AsciiReader.cpp:198
ReaderHelper.h
Euclid::Table::Table
Represents a table.
Definition: Table.h:49
std::string::empty
T empty(T... args)
std::ostringstream::str
T str(T... args)
std::size_t
Euclid::Table::countColumns
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
Definition: AsciiReaderHelper.cpp:42
std::istream::seekg
T seekg(T... args)
Euclid::Table::Row
Represents one row of a Table.
Definition: Row.h:64
Euclid::Table::AsciiReader::m_column_names
std::vector< std::string > m_column_names
Definition: AsciiReader.h:231
std::istream
STL class.
std::unique_ptr
STL class.
Euclid
Definition: InstOrRefHolder.h:29
Euclid::Table::_peekLine
static std::string _peekLine(std::istream &in)
Definition: AsciiReader.cpp:172
std::set
STL class.
Euclid::Table::AsciiReader::fixColumnTypes
AsciiReader & fixColumnTypes(std::vector< std::type_index > column_types)
Overrides the automatically detected column types.
Definition: AsciiReader.cpp:99
Euclid::Table::AsciiReader::m_reading_started
bool m_reading_started
Definition: AsciiReader.h:228