Revision dac170178e553309c5da580b68f8c9eaa7269905 authored by renchao_lu on 05 October 2021, 15:21:32 UTC, committed by renchao_lu on 05 October 2021, 15:21:32 UTC
1 parent cc0251f
Raw File
CsvInterface.h
/**
 * @file CsvInterface.h
 * @author Karsten Rink
 * @date 2015-03-25
 * @brief Definition of the CsvInterface class.
 *
 * @copyright
 * Copyright (c) 2012-2021, OpenGeoSys Community (http://www.opengeosys.org)
 *            Distributed under a Modified BSD License.
 *              See accompanying file LICENSE.txt or
 *              http://www.opengeosys.org/project/license
 */

#pragma once

#include <any>
#include <array>
#include <fstream>
#include <iterator>
#include <limits>
#include <list>
#include <string>
#include <typeinfo>
#include <vector>

#include "BaseLib/IO/Writer.h"
#include "BaseLib/Logging.h"
#include "BaseLib/StringTools.h"

namespace GeoLib
{
class Point;
}

namespace FileIO
{
/**
 * Interface for reading CSV file formats.
 */
class CsvInterface : public BaseLib::IO::Writer
{
public:
    /// Constructor (only needed for writing files)
    CsvInterface();

    /// Returns the number of vectors currently staged for writing.
    std::size_t getNArrays() const { return _vec_names.size(); }

    /// Returns a vector containing the names of columns in the file (assuming
    /// the file *has* a header)
    static std::vector<std::string> getColumnNames(std::string const& fname,
                                                   char delim);

    /// Adds an index vector of size s to the CSV file
    void addIndexVectorForWriting(std::size_t s);

    /// Stores if the CSV file to be written should include a header or not.
    void setCsvHeader(bool write_header) { _writeCsvHeader = write_header; }

    /// Adds a data vector to the CSV file. All data vectors have to have the
    /// same size. Vectors will be written in the same sequence they have been
    /// added to the interface.
    template <typename T>
    bool addVectorForWriting(std::string const& vec_name,
                             std::vector<T> const& vec)
    {
        static_assert(
            std::is_same_v<T, std::string> || std::is_same_v<T, double> ||
                std::is_same_v<T, int>,
            "CsvInterface can only write vectors of strings, doubles or ints.");

        if (!_data.empty())
        {
            std::size_t const vec_size(getVectorSize(0));
            if (vec_size != vec.size())
            {
                ERR("Vector size does not match existing data (should be "
                    "{:d}).",
                    vec_size);
                return false;
            }
        }

        _vec_names.push_back(vec_name);
        _data.push_back(vec);
        return true;
    }

    /// Writes the CSV file.
    bool write() override;

    /**
     * Reads 3D points from a CSV file. It is assumed that the file has a header
     * specifying a name for each of the columns. The first three columns will
     * be interpreted as x-, y- and z-coordinate, respectively. \param fname
     * Name of the file to be read \param delim    Deliminator, default is ','
     * \param points   A vector containing the 3D points read from the file
     * \return An error code (0 = ok, 0<i<max = number of skipped lines, -1
     * error reading file)
     */
    static int readPoints(std::string const& fname, char delim,
                          std::vector<GeoLib::Point*>& points);

    /**
     * Reads 3D points from a CSV file. It is assumed that the file has a header
     * specifying a name for each of the columns. The columns specified in the
     * function call will be used for reading x-, y- and z-coordinates,
     * respectively If z_column_name is an empty string or not given at all, all
     * z-coordinates will be set to zero.
     * \param fname           Name of the file to be read
     * \param delim           Deliminator, default is ','
     * \param points          A vector containing the 3D points read from the
     * file \param x_column_name   Name of the column to be interpreted as
     * x-coordinate \param y_column_name   Name of the column to be interpreted
     * as y-coordinate \param z_column_name   Name of the column to be
     * interpreted as z-coordinate \return An error code (0 = ok, 0<i<max =
     * number of skipped lines, -1 error reading file)
     */
    static int readPoints(std::string const& fname, char delim,
                          std::vector<GeoLib::Point*>& points,
                          std::string const& x_column_name,
                          std::string const& y_column_name,
                          std::string const& z_column_name = "");

    /**
     * Reads 3D points from a headerless CSV file, so columns for x-, y- and
     * z-coordinates have to be specified using indices (starting with 0).
     * If z_column_idx is not given (or set to numeric_limits::max()), all
     * z-coordinates will be set to zero.
     * \param fname          Name of the file to be read
     * \param delim          Deliminator, default is ','
     * \param points         A vector containing the 3D points read from the
     * file \param x_column_idx   Index of the column to be interpreted as
     * x-coordinate \param y_column_idx   Index of the column to be interpreted
     * as y-coordinate \param z_column_idx   Index of the column to be
     * interpreted as z-coordinate \return An error code (0 = ok, 0<i<max =
     * number of skipped lines, -1 error reading file)
     */
    static int readPoints(
        std::string const& fname, char delim,
        std::vector<GeoLib::Point*>& points, std::size_t x_column_idx,
        std::size_t y_column_idx,
        std::size_t z_column_idx = std::numeric_limits<std::size_t>::max());

    /**
     * Reads a column of the given name from a CSV file.
     * \param fname        Name of the file to be read
     * \param delim        Deliminator, default is ','
     * \param data_array   A vector containing the data read from the file
     * \param column_name  The column's name to read
     * \return An error code (0 = ok, 0<i<max = number of skipped lines, -1
     * error reading file)
     */
    template <typename T>
    static int readColumn(std::string const& fname, char delim,
                          std::vector<T>& data_array,
                          std::string const& column_name)
    {
        std::ifstream in(fname.c_str());
        if (!in.is_open())
        {
            ERR("CsvInterface::readColumn(): Could not open file {:s}.", fname);
            return -1;
        }

        std::string line;
        std::getline(in, line);
        std::size_t const column_idx =
            CsvInterface::findColumn(line, delim, column_name);
        if (column_idx == std::numeric_limits<std::size_t>::max())
        {
            ERR("Column '{:s}' not found in file header.", column_name);
            return -1;
        }
        return readColumn<T>(in, delim, data_array, column_idx);
    }

    template <typename T>
    static int readColumn(std::string const& fname, char delim,
                          std::vector<T>& data_array, std::size_t column_idx)
    {
        std::ifstream in(fname.c_str());
        if (!in.is_open())
        {
            ERR("CsvInterface::readColumn(): Could not open file {:s}.", fname);
            return -1;
        }
        return readColumn<T>(in, delim, data_array, column_idx);
    }

private:
    /// Actual point reader for public methods
    static int readPoints(std::ifstream& in, char delim,
                          std::vector<GeoLib::Point*>& points,
                          std::array<std::size_t, 3> const& column_idx);

    /// Actual column reader for public methods
    template <typename T>
    static int readColumn(std::ifstream& in, char delim,
                          std::vector<T>& data_array, std::size_t column_idx)
    {
        std::string line;
        std::size_t line_count(0);
        std::size_t error_count(0);
        while (std::getline(in, line))
        {
            line_count++;
            std::list<std::string> const fields =
                BaseLib::splitString(line, delim);

            if (fields.size() < column_idx + 1)
            {
                ERR("Line {:d} contains not enough columns of data. Skipping "
                    "line...",
                    line_count);
                error_count++;
                continue;
            }
            auto it = fields.begin();
            std::advance(it, column_idx);

            std::istringstream stream(*it);
            T value;
            if (!(stream >> value))
            {
                ERR("Error reading value in line {:d}.", line_count);
                error_count++;
                continue;
            }

            data_array.push_back(value);
        }
        return error_count;
    }

    /// Returns the number of the column with column_name (or
    /// std::numeric_limits::max() if no such column has been found).
    static std::size_t findColumn(std::string const& line, char delim,
                                  std::string const& column_name);

    /// Returns the size of the vector with the given index
    std::size_t getVectorSize(std::size_t idx) const;

    /**
     * Writes a value from a vector to the file.
     * \param vec_idx     Index of the vector
     * \param in_vec_idx  Entry in the selected vector
     */
    void writeValue(std::size_t vec_idx, std::size_t in_vec_idx);

    bool _writeCsvHeader{true};
    std::vector<std::string> _vec_names;
    std::vector<std::any> _data;
};

}  // namespace FileIO
back to top