Code to read and write CSV files












6












$begingroup$


Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



#include <string>    // std::string
#include <fstream> // std::ofstream, std::ifstream
#include <vector> // std::vector
#include <utility> // std::pair
#include <stdexcept> // std::runtime_error
#include <sstream> // std::stringstream
#include <iostream> // std::cout, std::cin

void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
// Make a CSV file with one or more columns of integer values
// Each column of data is represented by the pair <column name, column data>
// as std::pair<std::string, std::vector<int>>
// The dataset is represented as a vector of these columns
// Note that all columns should be the same size

// Create an output filestream object
std::ofstream myFile(filename);

// Send column names to the stream
for(int j = 0; j < dataset.size(); ++j)
{
myFile << dataset.at(j).first;
if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
}
myFile << "n";

// Send data to the stream
for(int i = 0; i < dataset.at(0).second.size(); ++i)
{
for(int j = 0; j < dataset.size(); ++j)
{
myFile << dataset.at(j).second.at(i);
if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
}
myFile << "n";
}

// Close the file
myFile.close();
}

std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
// Reads a CSV file into a vector of <string, vector<int>> pairs where
// each pair represents <column name, column values>

// Create a vector of <string, int vector> pairs to store the result
std::vector<std::pair<std::string, std::vector<int>>> result;

// Create an input filestream
std::ifstream myFile(filename);

// Make sure the file is open
if(!myFile.is_open()) throw std::runtime_error("Could not open file");

// Helper vars
std::string line, colname;
int val;

// Read the column names
if(myFile.good())
{
// Extract the first line in the file
std::getline(myFile, line);

// Create a stringstream from line
std::stringstream ss(line);

// Extract each column name
while(std::getline(ss, colname, ',')){

// Initialize and add <colname, int vector> pairs to result
result.push_back({colname, std::vector<int> {}});
}
}

// Read data, line by line
while(std::getline(myFile, line))
{
// Create a stringstream of the current line
std::stringstream ss(line);

// Keep track of the current column index
int colIdx = 0;

// Extract each integer
while(ss >> val){

// Add the current integer to the 'colIdx' column's values vector
result.at(colIdx).second.push_back(val);

// If the next token is a comma, ignore it and move on
if(ss.peek() == ',') ss.ignore();

// Increment the column index
colIdx++;
}
}

// Close file
myFile.close();

return result;
}

int main() {
// // Make three vectors, each of length N filled with 1s, 2s, and 3s
int N = 1000;
std::vector<int> vec1(N, 1);
std::vector<int> vec2(N, 2);
std::vector<int> vec3(N, 3);

// Wrap into a vector
std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

// Write the vector to CSV
write_csv("three_cols.csv", vals);

// Read three_cols.csv
std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

// Print row and column counts to check if this was successful
std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

return 0;
}









share|improve this question











$endgroup$

















    6












    $begingroup$


    Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



    Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



    #include <string>    // std::string
    #include <fstream> // std::ofstream, std::ifstream
    #include <vector> // std::vector
    #include <utility> // std::pair
    #include <stdexcept> // std::runtime_error
    #include <sstream> // std::stringstream
    #include <iostream> // std::cout, std::cin

    void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
    // Make a CSV file with one or more columns of integer values
    // Each column of data is represented by the pair <column name, column data>
    // as std::pair<std::string, std::vector<int>>
    // The dataset is represented as a vector of these columns
    // Note that all columns should be the same size

    // Create an output filestream object
    std::ofstream myFile(filename);

    // Send column names to the stream
    for(int j = 0; j < dataset.size(); ++j)
    {
    myFile << dataset.at(j).first;
    if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
    }
    myFile << "n";

    // Send data to the stream
    for(int i = 0; i < dataset.at(0).second.size(); ++i)
    {
    for(int j = 0; j < dataset.size(); ++j)
    {
    myFile << dataset.at(j).second.at(i);
    if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
    }
    myFile << "n";
    }

    // Close the file
    myFile.close();
    }

    std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
    // Reads a CSV file into a vector of <string, vector<int>> pairs where
    // each pair represents <column name, column values>

    // Create a vector of <string, int vector> pairs to store the result
    std::vector<std::pair<std::string, std::vector<int>>> result;

    // Create an input filestream
    std::ifstream myFile(filename);

    // Make sure the file is open
    if(!myFile.is_open()) throw std::runtime_error("Could not open file");

    // Helper vars
    std::string line, colname;
    int val;

    // Read the column names
    if(myFile.good())
    {
    // Extract the first line in the file
    std::getline(myFile, line);

    // Create a stringstream from line
    std::stringstream ss(line);

    // Extract each column name
    while(std::getline(ss, colname, ',')){

    // Initialize and add <colname, int vector> pairs to result
    result.push_back({colname, std::vector<int> {}});
    }
    }

    // Read data, line by line
    while(std::getline(myFile, line))
    {
    // Create a stringstream of the current line
    std::stringstream ss(line);

    // Keep track of the current column index
    int colIdx = 0;

    // Extract each integer
    while(ss >> val){

    // Add the current integer to the 'colIdx' column's values vector
    result.at(colIdx).second.push_back(val);

    // If the next token is a comma, ignore it and move on
    if(ss.peek() == ',') ss.ignore();

    // Increment the column index
    colIdx++;
    }
    }

    // Close file
    myFile.close();

    return result;
    }

    int main() {
    // // Make three vectors, each of length N filled with 1s, 2s, and 3s
    int N = 1000;
    std::vector<int> vec1(N, 1);
    std::vector<int> vec2(N, 2);
    std::vector<int> vec3(N, 3);

    // Wrap into a vector
    std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

    // Write the vector to CSV
    write_csv("three_cols.csv", vals);

    // Read three_cols.csv
    std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

    // Print row and column counts to check if this was successful
    std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

    return 0;
    }









    share|improve this question











    $endgroup$















      6












      6








      6





      $begingroup$


      Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



      Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



      #include <string>    // std::string
      #include <fstream> // std::ofstream, std::ifstream
      #include <vector> // std::vector
      #include <utility> // std::pair
      #include <stdexcept> // std::runtime_error
      #include <sstream> // std::stringstream
      #include <iostream> // std::cout, std::cin

      void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
      // Make a CSV file with one or more columns of integer values
      // Each column of data is represented by the pair <column name, column data>
      // as std::pair<std::string, std::vector<int>>
      // The dataset is represented as a vector of these columns
      // Note that all columns should be the same size

      // Create an output filestream object
      std::ofstream myFile(filename);

      // Send column names to the stream
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).first;
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";

      // Send data to the stream
      for(int i = 0; i < dataset.at(0).second.size(); ++i)
      {
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).second.at(i);
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";
      }

      // Close the file
      myFile.close();
      }

      std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
      // Reads a CSV file into a vector of <string, vector<int>> pairs where
      // each pair represents <column name, column values>

      // Create a vector of <string, int vector> pairs to store the result
      std::vector<std::pair<std::string, std::vector<int>>> result;

      // Create an input filestream
      std::ifstream myFile(filename);

      // Make sure the file is open
      if(!myFile.is_open()) throw std::runtime_error("Could not open file");

      // Helper vars
      std::string line, colname;
      int val;

      // Read the column names
      if(myFile.good())
      {
      // Extract the first line in the file
      std::getline(myFile, line);

      // Create a stringstream from line
      std::stringstream ss(line);

      // Extract each column name
      while(std::getline(ss, colname, ',')){

      // Initialize and add <colname, int vector> pairs to result
      result.push_back({colname, std::vector<int> {}});
      }
      }

      // Read data, line by line
      while(std::getline(myFile, line))
      {
      // Create a stringstream of the current line
      std::stringstream ss(line);

      // Keep track of the current column index
      int colIdx = 0;

      // Extract each integer
      while(ss >> val){

      // Add the current integer to the 'colIdx' column's values vector
      result.at(colIdx).second.push_back(val);

      // If the next token is a comma, ignore it and move on
      if(ss.peek() == ',') ss.ignore();

      // Increment the column index
      colIdx++;
      }
      }

      // Close file
      myFile.close();

      return result;
      }

      int main() {
      // // Make three vectors, each of length N filled with 1s, 2s, and 3s
      int N = 1000;
      std::vector<int> vec1(N, 1);
      std::vector<int> vec2(N, 2);
      std::vector<int> vec3(N, 3);

      // Wrap into a vector
      std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

      // Write the vector to CSV
      write_csv("three_cols.csv", vals);

      // Read three_cols.csv
      std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

      // Print row and column counts to check if this was successful
      std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

      return 0;
      }









      share|improve this question











      $endgroup$




      Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



      Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



      #include <string>    // std::string
      #include <fstream> // std::ofstream, std::ifstream
      #include <vector> // std::vector
      #include <utility> // std::pair
      #include <stdexcept> // std::runtime_error
      #include <sstream> // std::stringstream
      #include <iostream> // std::cout, std::cin

      void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
      // Make a CSV file with one or more columns of integer values
      // Each column of data is represented by the pair <column name, column data>
      // as std::pair<std::string, std::vector<int>>
      // The dataset is represented as a vector of these columns
      // Note that all columns should be the same size

      // Create an output filestream object
      std::ofstream myFile(filename);

      // Send column names to the stream
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).first;
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";

      // Send data to the stream
      for(int i = 0; i < dataset.at(0).second.size(); ++i)
      {
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).second.at(i);
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";
      }

      // Close the file
      myFile.close();
      }

      std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
      // Reads a CSV file into a vector of <string, vector<int>> pairs where
      // each pair represents <column name, column values>

      // Create a vector of <string, int vector> pairs to store the result
      std::vector<std::pair<std::string, std::vector<int>>> result;

      // Create an input filestream
      std::ifstream myFile(filename);

      // Make sure the file is open
      if(!myFile.is_open()) throw std::runtime_error("Could not open file");

      // Helper vars
      std::string line, colname;
      int val;

      // Read the column names
      if(myFile.good())
      {
      // Extract the first line in the file
      std::getline(myFile, line);

      // Create a stringstream from line
      std::stringstream ss(line);

      // Extract each column name
      while(std::getline(ss, colname, ',')){

      // Initialize and add <colname, int vector> pairs to result
      result.push_back({colname, std::vector<int> {}});
      }
      }

      // Read data, line by line
      while(std::getline(myFile, line))
      {
      // Create a stringstream of the current line
      std::stringstream ss(line);

      // Keep track of the current column index
      int colIdx = 0;

      // Extract each integer
      while(ss >> val){

      // Add the current integer to the 'colIdx' column's values vector
      result.at(colIdx).second.push_back(val);

      // If the next token is a comma, ignore it and move on
      if(ss.peek() == ',') ss.ignore();

      // Increment the column index
      colIdx++;
      }
      }

      // Close file
      myFile.close();

      return result;
      }

      int main() {
      // // Make three vectors, each of length N filled with 1s, 2s, and 3s
      int N = 1000;
      std::vector<int> vec1(N, 1);
      std::vector<int> vec2(N, 2);
      std::vector<int> vec3(N, 3);

      // Wrap into a vector
      std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

      // Write the vector to CSV
      write_csv("three_cols.csv", vals);

      // Read three_cols.csv
      std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

      // Print row and column counts to check if this was successful
      std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

      return 0;
      }






      c++ beginner csv






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Jan 19 at 20:41









      200_success

      129k15153415




      129k15153415










      asked Jan 19 at 20:38









      BenBen

      1334




      1334






















          3 Answers
          3






          active

          oldest

          votes


















          3












          $begingroup$

          Good documentation



          Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



          Pass by const reference for only read purposes



          Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



          Check if file is opened



          The check is performed when reading, but not performed when writing.



          Do not use at() unless exception-throwing version is desired



          .at() incurs overhead by performing in-range check, and also throws if out of range.



          Use emplace back to construct and push in place



          result.push_back({colname, std::vector<int> {}});


          This could be rewritten as



          result.emplace_back(colname, std::vector<int> {});


          From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



          Improve printing algorithm



          This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



          Do not explicitly close file



          Destructor of std::ifstream and of it's twin automatically close the file.



          Create type alias where useful



          using column = std::pair<std::string, std::vector<int>>;


          would save users a lot of typing.



          Use locales for reading from csv



          Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



          Unreliable reading algorithm



          I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





          Architecture for generic reading/writing



          I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





          • Upstream and downstream invariances



            Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




          • Data layout



            This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








          share|improve this answer









          $endgroup$













          • $begingroup$
            Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
            $endgroup$
            – Ben
            Jan 21 at 16:57



















          3












          $begingroup$

          General design



          Your dataset representation is a bit unconventional: since you need to write/read the file line by line, having the dataset oriented the other way might prove a hindrance. I understand that you might need to include / exclude columns from a particular file: but it isn't a sufficient reason to rotate your dataset: a more general, line-oriented dataset representation would allow you to do so too. In principle, you should separate concerns: handling your data on the one hand, reading/writing it from/to a csv file on the other hand.



          Also, your design forces the user to create headers, which isn't required by the format. The explanation, if I understood correctly, is that you weren't able to provide a type-erasing interface, so it was either headers for everyone or for no one.



          Type-flexible cell



          There are lots of ways to achieve that, and none is indisputably superior. You could consider std::variant, which is a standard, type-safe union:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>

          int main() {

          using Cell = std::variant<int, double, std::string>; // Cell can contain any of these types
          std::vector<Cell> record{"John", "Doe", 41, 1.75};
          for (const auto& cell : record) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          }


          Any column or line can now be represented by a sequence of Cells. It means that you don't need to hard-code a header, or to store your data by type-homogeneous columns. You can even have your whole dataset in one big vector: it can be a lot faster because you have better locality (in your current implementation, the processor has to fetch the cells of a given line from as many places in memory).



          If you're ready to anticipate the next standard (C++20), and that you have your dataset contained in one big std::vector<Cell>, you can then have rows and columns as range::views over your dataset:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>
          #include <range/v3/core.hpp>
          #include <range/v3/view.hpp>

          using namespace ranges;
          using namespace ranges::view;

          int main() {

          using Cell = std::variant<int, double, std::string>;
          std::vector<Cell> records{"John", "Doe", 41, 1.75, "Jane", "Smith", 35, 1.63};
          const int row_sz = 4;

          auto column = [row_sz](int n) {
          // to get column n, skip the n first elements
          // then take every row-szth element
          return drop(n) | stride(row_sz);
          };

          for (auto cell : records | column(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          auto row = [row_sz](int n) { return slice(n*row_sz, n*row_sz+row_sz); };
          for (auto cell : records | row(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          }


          Performance



          Your csv reader is inefficient. You read each line (one memory allocation and copy), then cut the line into pieces (as many allocations / conversions as pieces). You can build your dataset from the file without reading it into lines first: every time you read a ';' you push_back a cell, and every time you read a 'n' you push_back a last cell in the record, and then push_back the record (conceptually, because as I said I believe a flat std::vector with row-size as an additional information is better). It is a simplification of course, because you have to take care of quoted fields, eof and error-handling, but that's the general idea. You can design it as a state-machine with customization points for the cell and record handlers.






          share|improve this answer









          $endgroup$













          • $begingroup$
            This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
            $endgroup$
            – Ben
            Jan 21 at 16:53



















          0












          $begingroup$

          My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



          Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






          share|improve this answer









          $endgroup$









          • 1




            $begingroup$
            I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
            $endgroup$
            – Incomputable
            Jan 19 at 22:10











          Your Answer





          StackExchange.ifUsing("editor", function () {
          return StackExchange.using("mathjaxEditing", function () {
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          });
          });
          }, "mathjax-editing");

          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "196"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          autoActivateHeartbeat: false,
          convertImagesToLinks: false,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














          draft saved

          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f211826%2fcode-to-read-and-write-csv-files%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown

























          3 Answers
          3






          active

          oldest

          votes








          3 Answers
          3






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes









          3












          $begingroup$

          Good documentation



          Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



          Pass by const reference for only read purposes



          Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



          Check if file is opened



          The check is performed when reading, but not performed when writing.



          Do not use at() unless exception-throwing version is desired



          .at() incurs overhead by performing in-range check, and also throws if out of range.



          Use emplace back to construct and push in place



          result.push_back({colname, std::vector<int> {}});


          This could be rewritten as



          result.emplace_back(colname, std::vector<int> {});


          From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



          Improve printing algorithm



          This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



          Do not explicitly close file



          Destructor of std::ifstream and of it's twin automatically close the file.



          Create type alias where useful



          using column = std::pair<std::string, std::vector<int>>;


          would save users a lot of typing.



          Use locales for reading from csv



          Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



          Unreliable reading algorithm



          I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





          Architecture for generic reading/writing



          I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





          • Upstream and downstream invariances



            Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




          • Data layout



            This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








          share|improve this answer









          $endgroup$













          • $begingroup$
            Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
            $endgroup$
            – Ben
            Jan 21 at 16:57
















          3












          $begingroup$

          Good documentation



          Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



          Pass by const reference for only read purposes



          Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



          Check if file is opened



          The check is performed when reading, but not performed when writing.



          Do not use at() unless exception-throwing version is desired



          .at() incurs overhead by performing in-range check, and also throws if out of range.



          Use emplace back to construct and push in place



          result.push_back({colname, std::vector<int> {}});


          This could be rewritten as



          result.emplace_back(colname, std::vector<int> {});


          From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



          Improve printing algorithm



          This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



          Do not explicitly close file



          Destructor of std::ifstream and of it's twin automatically close the file.



          Create type alias where useful



          using column = std::pair<std::string, std::vector<int>>;


          would save users a lot of typing.



          Use locales for reading from csv



          Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



          Unreliable reading algorithm



          I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





          Architecture for generic reading/writing



          I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





          • Upstream and downstream invariances



            Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




          • Data layout



            This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








          share|improve this answer









          $endgroup$













          • $begingroup$
            Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
            $endgroup$
            – Ben
            Jan 21 at 16:57














          3












          3








          3





          $begingroup$

          Good documentation



          Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



          Pass by const reference for only read purposes



          Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



          Check if file is opened



          The check is performed when reading, but not performed when writing.



          Do not use at() unless exception-throwing version is desired



          .at() incurs overhead by performing in-range check, and also throws if out of range.



          Use emplace back to construct and push in place



          result.push_back({colname, std::vector<int> {}});


          This could be rewritten as



          result.emplace_back(colname, std::vector<int> {});


          From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



          Improve printing algorithm



          This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



          Do not explicitly close file



          Destructor of std::ifstream and of it's twin automatically close the file.



          Create type alias where useful



          using column = std::pair<std::string, std::vector<int>>;


          would save users a lot of typing.



          Use locales for reading from csv



          Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



          Unreliable reading algorithm



          I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





          Architecture for generic reading/writing



          I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





          • Upstream and downstream invariances



            Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




          • Data layout



            This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








          share|improve this answer









          $endgroup$



          Good documentation



          Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



          Pass by const reference for only read purposes



          Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



          Check if file is opened



          The check is performed when reading, but not performed when writing.



          Do not use at() unless exception-throwing version is desired



          .at() incurs overhead by performing in-range check, and also throws if out of range.



          Use emplace back to construct and push in place



          result.push_back({colname, std::vector<int> {}});


          This could be rewritten as



          result.emplace_back(colname, std::vector<int> {});


          From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



          Improve printing algorithm



          This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



          Do not explicitly close file



          Destructor of std::ifstream and of it's twin automatically close the file.



          Create type alias where useful



          using column = std::pair<std::string, std::vector<int>>;


          would save users a lot of typing.



          Use locales for reading from csv



          Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



          Unreliable reading algorithm



          I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





          Architecture for generic reading/writing



          I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





          • Upstream and downstream invariances



            Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




          • Data layout



            This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.









          share|improve this answer












          share|improve this answer



          share|improve this answer










          answered Jan 19 at 22:09









          IncomputableIncomputable

          6,65521753




          6,65521753












          • $begingroup$
            Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
            $endgroup$
            – Ben
            Jan 21 at 16:57


















          • $begingroup$
            Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
            $endgroup$
            – Ben
            Jan 21 at 16:57
















          $begingroup$
          Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
          $endgroup$
          – Ben
          Jan 21 at 16:57




          $begingroup$
          Lots of good tips in this answer and I've update my code to make use of most of them. Now I need to work on my generic CSV reader that can read ints, doubles, and strings.. Thanks for your help.
          $endgroup$
          – Ben
          Jan 21 at 16:57













          3












          $begingroup$

          General design



          Your dataset representation is a bit unconventional: since you need to write/read the file line by line, having the dataset oriented the other way might prove a hindrance. I understand that you might need to include / exclude columns from a particular file: but it isn't a sufficient reason to rotate your dataset: a more general, line-oriented dataset representation would allow you to do so too. In principle, you should separate concerns: handling your data on the one hand, reading/writing it from/to a csv file on the other hand.



          Also, your design forces the user to create headers, which isn't required by the format. The explanation, if I understood correctly, is that you weren't able to provide a type-erasing interface, so it was either headers for everyone or for no one.



          Type-flexible cell



          There are lots of ways to achieve that, and none is indisputably superior. You could consider std::variant, which is a standard, type-safe union:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>

          int main() {

          using Cell = std::variant<int, double, std::string>; // Cell can contain any of these types
          std::vector<Cell> record{"John", "Doe", 41, 1.75};
          for (const auto& cell : record) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          }


          Any column or line can now be represented by a sequence of Cells. It means that you don't need to hard-code a header, or to store your data by type-homogeneous columns. You can even have your whole dataset in one big vector: it can be a lot faster because you have better locality (in your current implementation, the processor has to fetch the cells of a given line from as many places in memory).



          If you're ready to anticipate the next standard (C++20), and that you have your dataset contained in one big std::vector<Cell>, you can then have rows and columns as range::views over your dataset:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>
          #include <range/v3/core.hpp>
          #include <range/v3/view.hpp>

          using namespace ranges;
          using namespace ranges::view;

          int main() {

          using Cell = std::variant<int, double, std::string>;
          std::vector<Cell> records{"John", "Doe", 41, 1.75, "Jane", "Smith", 35, 1.63};
          const int row_sz = 4;

          auto column = [row_sz](int n) {
          // to get column n, skip the n first elements
          // then take every row-szth element
          return drop(n) | stride(row_sz);
          };

          for (auto cell : records | column(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          auto row = [row_sz](int n) { return slice(n*row_sz, n*row_sz+row_sz); };
          for (auto cell : records | row(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          }


          Performance



          Your csv reader is inefficient. You read each line (one memory allocation and copy), then cut the line into pieces (as many allocations / conversions as pieces). You can build your dataset from the file without reading it into lines first: every time you read a ';' you push_back a cell, and every time you read a 'n' you push_back a last cell in the record, and then push_back the record (conceptually, because as I said I believe a flat std::vector with row-size as an additional information is better). It is a simplification of course, because you have to take care of quoted fields, eof and error-handling, but that's the general idea. You can design it as a state-machine with customization points for the cell and record handlers.






          share|improve this answer









          $endgroup$













          • $begingroup$
            This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
            $endgroup$
            – Ben
            Jan 21 at 16:53
















          3












          $begingroup$

          General design



          Your dataset representation is a bit unconventional: since you need to write/read the file line by line, having the dataset oriented the other way might prove a hindrance. I understand that you might need to include / exclude columns from a particular file: but it isn't a sufficient reason to rotate your dataset: a more general, line-oriented dataset representation would allow you to do so too. In principle, you should separate concerns: handling your data on the one hand, reading/writing it from/to a csv file on the other hand.



          Also, your design forces the user to create headers, which isn't required by the format. The explanation, if I understood correctly, is that you weren't able to provide a type-erasing interface, so it was either headers for everyone or for no one.



          Type-flexible cell



          There are lots of ways to achieve that, and none is indisputably superior. You could consider std::variant, which is a standard, type-safe union:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>

          int main() {

          using Cell = std::variant<int, double, std::string>; // Cell can contain any of these types
          std::vector<Cell> record{"John", "Doe", 41, 1.75};
          for (const auto& cell : record) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          }


          Any column or line can now be represented by a sequence of Cells. It means that you don't need to hard-code a header, or to store your data by type-homogeneous columns. You can even have your whole dataset in one big vector: it can be a lot faster because you have better locality (in your current implementation, the processor has to fetch the cells of a given line from as many places in memory).



          If you're ready to anticipate the next standard (C++20), and that you have your dataset contained in one big std::vector<Cell>, you can then have rows and columns as range::views over your dataset:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>
          #include <range/v3/core.hpp>
          #include <range/v3/view.hpp>

          using namespace ranges;
          using namespace ranges::view;

          int main() {

          using Cell = std::variant<int, double, std::string>;
          std::vector<Cell> records{"John", "Doe", 41, 1.75, "Jane", "Smith", 35, 1.63};
          const int row_sz = 4;

          auto column = [row_sz](int n) {
          // to get column n, skip the n first elements
          // then take every row-szth element
          return drop(n) | stride(row_sz);
          };

          for (auto cell : records | column(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          auto row = [row_sz](int n) { return slice(n*row_sz, n*row_sz+row_sz); };
          for (auto cell : records | row(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          }


          Performance



          Your csv reader is inefficient. You read each line (one memory allocation and copy), then cut the line into pieces (as many allocations / conversions as pieces). You can build your dataset from the file without reading it into lines first: every time you read a ';' you push_back a cell, and every time you read a 'n' you push_back a last cell in the record, and then push_back the record (conceptually, because as I said I believe a flat std::vector with row-size as an additional information is better). It is a simplification of course, because you have to take care of quoted fields, eof and error-handling, but that's the general idea. You can design it as a state-machine with customization points for the cell and record handlers.






          share|improve this answer









          $endgroup$













          • $begingroup$
            This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
            $endgroup$
            – Ben
            Jan 21 at 16:53














          3












          3








          3





          $begingroup$

          General design



          Your dataset representation is a bit unconventional: since you need to write/read the file line by line, having the dataset oriented the other way might prove a hindrance. I understand that you might need to include / exclude columns from a particular file: but it isn't a sufficient reason to rotate your dataset: a more general, line-oriented dataset representation would allow you to do so too. In principle, you should separate concerns: handling your data on the one hand, reading/writing it from/to a csv file on the other hand.



          Also, your design forces the user to create headers, which isn't required by the format. The explanation, if I understood correctly, is that you weren't able to provide a type-erasing interface, so it was either headers for everyone or for no one.



          Type-flexible cell



          There are lots of ways to achieve that, and none is indisputably superior. You could consider std::variant, which is a standard, type-safe union:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>

          int main() {

          using Cell = std::variant<int, double, std::string>; // Cell can contain any of these types
          std::vector<Cell> record{"John", "Doe", 41, 1.75};
          for (const auto& cell : record) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          }


          Any column or line can now be represented by a sequence of Cells. It means that you don't need to hard-code a header, or to store your data by type-homogeneous columns. You can even have your whole dataset in one big vector: it can be a lot faster because you have better locality (in your current implementation, the processor has to fetch the cells of a given line from as many places in memory).



          If you're ready to anticipate the next standard (C++20), and that you have your dataset contained in one big std::vector<Cell>, you can then have rows and columns as range::views over your dataset:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>
          #include <range/v3/core.hpp>
          #include <range/v3/view.hpp>

          using namespace ranges;
          using namespace ranges::view;

          int main() {

          using Cell = std::variant<int, double, std::string>;
          std::vector<Cell> records{"John", "Doe", 41, 1.75, "Jane", "Smith", 35, 1.63};
          const int row_sz = 4;

          auto column = [row_sz](int n) {
          // to get column n, skip the n first elements
          // then take every row-szth element
          return drop(n) | stride(row_sz);
          };

          for (auto cell : records | column(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          auto row = [row_sz](int n) { return slice(n*row_sz, n*row_sz+row_sz); };
          for (auto cell : records | row(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          }


          Performance



          Your csv reader is inefficient. You read each line (one memory allocation and copy), then cut the line into pieces (as many allocations / conversions as pieces). You can build your dataset from the file without reading it into lines first: every time you read a ';' you push_back a cell, and every time you read a 'n' you push_back a last cell in the record, and then push_back the record (conceptually, because as I said I believe a flat std::vector with row-size as an additional information is better). It is a simplification of course, because you have to take care of quoted fields, eof and error-handling, but that's the general idea. You can design it as a state-machine with customization points for the cell and record handlers.






          share|improve this answer









          $endgroup$



          General design



          Your dataset representation is a bit unconventional: since you need to write/read the file line by line, having the dataset oriented the other way might prove a hindrance. I understand that you might need to include / exclude columns from a particular file: but it isn't a sufficient reason to rotate your dataset: a more general, line-oriented dataset representation would allow you to do so too. In principle, you should separate concerns: handling your data on the one hand, reading/writing it from/to a csv file on the other hand.



          Also, your design forces the user to create headers, which isn't required by the format. The explanation, if I understood correctly, is that you weren't able to provide a type-erasing interface, so it was either headers for everyone or for no one.



          Type-flexible cell



          There are lots of ways to achieve that, and none is indisputably superior. You could consider std::variant, which is a standard, type-safe union:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>

          int main() {

          using Cell = std::variant<int, double, std::string>; // Cell can contain any of these types
          std::vector<Cell> record{"John", "Doe", 41, 1.75};
          for (const auto& cell : record) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          }


          Any column or line can now be represented by a sequence of Cells. It means that you don't need to hard-code a header, or to store your data by type-homogeneous columns. You can even have your whole dataset in one big vector: it can be a lot faster because you have better locality (in your current implementation, the processor has to fetch the cells of a given line from as many places in memory).



          If you're ready to anticipate the next standard (C++20), and that you have your dataset contained in one big std::vector<Cell>, you can then have rows and columns as range::views over your dataset:



          #include <variant>
          #include <iostream>
          #include <string>
          #include <vector>
          #include <range/v3/core.hpp>
          #include <range/v3/view.hpp>

          using namespace ranges;
          using namespace ranges::view;

          int main() {

          using Cell = std::variant<int, double, std::string>;
          std::vector<Cell> records{"John", "Doe", 41, 1.75, "Jane", "Smith", 35, 1.63};
          const int row_sz = 4;

          auto column = [row_sz](int n) {
          // to get column n, skip the n first elements
          // then take every row-szth element
          return drop(n) | stride(row_sz);
          };

          for (auto cell : records | column(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          auto row = [row_sz](int n) { return slice(n*row_sz, n*row_sz+row_sz); };
          for (auto cell : records | row(1)) {
          std::visit((const auto& content) { std::cout << content << ' '; }, cell);
          }
          std::cout << std::endl;

          }


          Performance



          Your csv reader is inefficient. You read each line (one memory allocation and copy), then cut the line into pieces (as many allocations / conversions as pieces). You can build your dataset from the file without reading it into lines first: every time you read a ';' you push_back a cell, and every time you read a 'n' you push_back a last cell in the record, and then push_back the record (conceptually, because as I said I believe a flat std::vector with row-size as an additional information is better). It is a simplification of course, because you have to take care of quoted fields, eof and error-handling, but that's the general idea. You can design it as a state-machine with customization points for the cell and record handlers.







          share|improve this answer












          share|improve this answer



          share|improve this answer










          answered Jan 21 at 9:21









          papagagapapagaga

          4,427321




          4,427321












          • $begingroup$
            This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
            $endgroup$
            – Ben
            Jan 21 at 16:53


















          • $begingroup$
            This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
            $endgroup$
            – Ben
            Jan 21 at 16:53
















          $begingroup$
          This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
          $endgroup$
          – Ben
          Jan 21 at 16:53




          $begingroup$
          This is incredibly helpful. I will spend the next week studying std::variant and ranges thanks to your advice. +1
          $endgroup$
          – Ben
          Jan 21 at 16:53











          0












          $begingroup$

          My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



          Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






          share|improve this answer









          $endgroup$









          • 1




            $begingroup$
            I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
            $endgroup$
            – Incomputable
            Jan 19 at 22:10
















          0












          $begingroup$

          My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



          Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






          share|improve this answer









          $endgroup$









          • 1




            $begingroup$
            I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
            $endgroup$
            – Incomputable
            Jan 19 at 22:10














          0












          0








          0





          $begingroup$

          My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



          Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






          share|improve this answer









          $endgroup$



          My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



          Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.







          share|improve this answer












          share|improve this answer



          share|improve this answer










          answered Jan 19 at 22:02









          Joe CJoe C

          1,030211




          1,030211








          • 1




            $begingroup$
            I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
            $endgroup$
            – Incomputable
            Jan 19 at 22:10














          • 1




            $begingroup$
            I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
            $endgroup$
            – Incomputable
            Jan 19 at 22:10








          1




          1




          $begingroup$
          I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
          $endgroup$
          – Incomputable
          Jan 19 at 22:10




          $begingroup$
          I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
          $endgroup$
          – Incomputable
          Jan 19 at 22:10


















          draft saved

          draft discarded




















































          Thanks for contributing an answer to Code Review Stack Exchange!


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          Use MathJax to format equations. MathJax reference.


          To learn more, see our tips on writing great answers.




          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f211826%2fcode-to-read-and-write-csv-files%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          How fix org.hibernate.TransientPropertyValueException

          Updating UILabel text programmatically using a function

          Cloud Functions - OpenCV Videocapture Read method fails for larger files from cloud storage