Quantcast
Channel: A header-only library to read CSV - Code Review Stack Exchange
Viewing all articles
Browse latest Browse all 5

A header-only library to read CSV

$
0
0

I started to learn C++ after programming for some years in Java, and this is my first (header-only) library. When switching from another language, it is always difficult not to project the old habits into the new medium, and thus I wonder how idiomatic my C++ code is, so it would be awesome if someone reviewed this piece.

#ifndef __FILE_READER_H__#define __FILE_READER_H__#include <filesystem>#include <fstream>#include <functional>#include <stdexcept>#include <string>#include <vector>// APInamespace octarine {    // Read a CSV file into a vector of vectors of strings.    // Each string is trimmed of whitespace.    inline std::vector<std::vector<std::string>> read_csv(            const std::filesystem::path &filename,            bool has_header = true,            char separator = ',');    // Read a CSV file into a vector of objects of a predefined type.    template <typename T>    inline std::vector<T> read_csv(            const std::filesystem::path &filename,            const std::function<T(const std::vector<std::string>&)>& mapper,            bool has_header = true,            char separator = ','    );}// Implementationnamespace octarine {    namespace {        size_t count_items(const std::string& line, char separator);        std::vector<std::string> parse_line(const std::string &line, char separator, size_t num_items, size_t line_number);        const char* k_whitespace = " \t";    }    template <typename T>    std::vector<T> read_csv(            const std::filesystem::path &filename,            const std::function<T(const std::vector<std::string>&)>& mapper,            bool has_header,            char separator) {        const auto& lines = read_csv(filename, has_header, separator);        std::vector<T> result;        result.reserve(lines.size());        for (const auto& line : lines) {            result.emplace_back(mapper(line));        }        return result;    }    std::vector<std::vector<std::string>> read_csv(            const std::filesystem::path &filename,            bool has_header,            char separator) {        std::ifstream f(filename);        if (!f.good()) {            throw std::invalid_argument("unable to read file '"+ filename.string() +"'");        }        // read header line        std::string header;        std::getline(f, header);        if (!f.good()) {            throw std::invalid_argument("error reading header line from '"+ filename.string() +"'");        }        // count number of items per line        size_t num_items = count_items(header, separator);        // if we don't have the header, add the first line to the results        std::vector<std::vector<std::string>> lines;        size_t line_number = 1;        if (!has_header) {            lines.push_back(parse_line(header, separator, num_items, line_number));            line_number += 1;        }        std::string line;        while (!f.bad()) {            std::getline(f, line);            if (f.eof()) {                break;            }            if (f.fail()) {                throw std::invalid_argument("error reading line from '"+ filename.string() +"'");            }            lines.push_back(parse_line(line, separator, num_items, line_number));            line_number += 1;        }        return lines;    }    namespace {        // counts number of comma-separated items in a line from a CSV file        size_t count_items(const std::string &line, char separator) {            size_t count = 1;            for (char c : line) {                if (c == separator) {++count;                }            }            return count;        }        // splits a line from a CSV file when the number of items per line is known in advance        std::vector<std::string> parse_line(                const std::string &line,                char separator,                size_t num_items,                size_t line_number) {            if (num_items == 0) {                throw std::invalid_argument("number of items must be positive");            }            std::vector<std::string> result(num_items);            size_t item = 0;            size_t offset = 0, end_offset = 0;            size_t max_offset = line.size();            size_t index;            while (end_offset != max_offset) {                if (item >= num_items) {                    throw std::length_error("line "+ std::to_string(line_number) +": found more items in a line than expected");                }                index = line.find(separator, offset);                end_offset = (index != std::string::npos) ? index : max_offset;                size_t non_space_start = line.find_first_not_of(k_whitespace, offset);                size_t non_space_end = line.find_last_not_of(k_whitespace, end_offset - 1);                if (non_space_start == std::string::npos || non_space_end == std::string::npos ||                    non_space_start == index) {                    result[item] = "";                } else {                    result[item] = line.substr(non_space_start, non_space_end - non_space_start + 1);                }                offset = end_offset + 1;                item += 1;            }            return result;        }    }}#endif

Viewing all articles
Browse latest Browse all 5

Latest Images

Trending Articles





Latest Images