#include "ExpressionParser.h"
#include "strnatcmp.h"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <boost/lexical_cast.hpp>
const char delimiter = '\t';
unsigned int read_csvline(std::istream &instream,
std::vector<std::string> &columns)
{
columns.clear();
std::string line;
if (!std::getline(instream, line, '\n').good()) {
return 0;
}
columns.push_back("");
for (std::string::const_iterator si = line.begin();
si != line.end(); ++si)
{
if (*si == delimiter)
columns.push_back("");
else
columns.back() += *si;
}
return columns.size();
}
class CSVRowSymbolTable : public stx::BasicSymbolTable
{
public:
const std::map<std::string, unsigned int> &headersmap;
const std::vector<std::string> &datacolumns;
CSVRowSymbolTable(const std::map<std::string, unsigned int> &_headersmap,
const std::vector<std::string> &_datacolumns)
: stx::BasicSymbolTable(),
headersmap(_headersmap),
datacolumns(_datacolumns)
{
}
virtual stx::AnyScalar lookupVariable(const std::string &varname) const
{
std::map<std::string, unsigned int>::const_iterator
varfind = headersmap.find(varname);
if (varfind == headersmap.end()) {
return stx::BasicSymbolTable::lookupVariable(varname);
}
if(varfind->second < datacolumns.size())
{
return stx::AnyScalar().setAutoString( datacolumns[varfind->second] );
}
else
{
return "";
}
}
};
struct DataRecordSortRelation
{
unsigned int sortcol;
bool descending;
inline DataRecordSortRelation(unsigned int _sortcol, bool _descending = 0)
: sortcol(_sortcol), descending(_descending)
{
}
inline bool operator()(const std::vector<std::string> &recordA,
const std::vector<std::string> &recordB) const
{
if (!descending) {
return strnatcasecmp(sortcol < recordA.size() ? recordA[sortcol].c_str() : "",
sortcol < recordB.size() ? recordB[sortcol].c_str() : "") < 0;
}
else {
return strnatcasecmp(sortcol < recordA.size() ? recordA[sortcol].c_str() : "",
sortcol < recordB.size() ? recordB[sortcol].c_str() : "") > 0;
}
}
};
static inline std::string string_trim(const std::string& str)
{
std::string::size_type pos1 = str.find_first_not_of(' ');
if (pos1 == std::string::npos) return std::string();
std::string::size_type pos2 = str.find_last_not_of(' ');
if (pos2 == std::string::npos) return std::string();
return str.substr(pos1 == std::string::npos ? 0 : pos1,
pos2 == std::string::npos ? (str.length() - 1) : (pos2 - pos1 + 1));
}
int main(int argc, char *argv[])
{
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <csv-filename> [filter expression] [sort-column] [offset] [limit]" << "\n";
return 0;
}
std::string csvfilename = argv[1];
std::string exprstring = (argc >= 3) ? string_trim(argv[2]) : "";
std::string sortcolumn = (argc >= 4) ? string_trim(argv[3]) : "";
std::string offsetstring = (argc >= 5) ? string_trim(argv[4]) : "";
std::string limitstring = (argc >= 6) ? string_trim(argv[5]) : "";
stx::ParseTree pt;
try
{
if (exprstring.size()) {
pt = stx::parseExpression(exprstring);
}
}
catch (stx::ExpressionParserException &e)
{
std::cerr << "ExpressionParserException: " << e.what() << "\n";
return 0;
}
std::ifstream csvfilestream;
if (csvfilename != "-")
{
csvfilestream.open(csvfilename.c_str());
if (!csvfilestream) {
std::cerr << "Error opening CSV file " << csvfilename << "\n";
return 0;
}
}
std::istream& csvfile = (csvfilename == "-") ? std::cin : csvfilestream;
std::vector<std::string> headers;
if (read_csvline(csvfile, headers) == 0) {
std::cerr << "Error read column headers: no input\n";
return 0;
}
std::map<std::string, unsigned int> headersmap;
for(unsigned int headnum = 0; headnum < headers.size(); ++headnum)
{
headersmap[ headers[headnum] ] = headnum;
}
unsigned int linesprocessed = 0;
bool addedEvalResult = false;
std::vector<std::string> datacolumns;
CSVRowSymbolTable csvsymboltable(headersmap, datacolumns);
std::vector< std::vector<std::string> > datarecords;
while( read_csvline(csvfile, datacolumns) > 0 )
{
try
{
linesprocessed++;
if (!pt.isEmpty())
{
stx::AnyScalar val = pt.evaluate( csvsymboltable );
if (val.isBooleanType())
{
if (!val.getBoolean()) continue;
}
else
{
if (!addedEvalResult) {
headers.push_back("EvalResult");
addedEvalResult = true;
}
while( datacolumns.size() + 1 < headers.size() )
datacolumns.push_back("");
datacolumns.push_back(val.getString());
}
}
}
catch (stx::ExpressionParserException &e)
{
if (!addedEvalResult) {
headers.push_back("EvalResult");
addedEvalResult = true;
}
while( datacolumns.size() + 1 < headers.size() )
datacolumns.push_back("");
datacolumns.push_back(std::string("Exception: ") + e.what());
}
datarecords.push_back( datacolumns );
}
if (addedEvalResult) {
headersmap[ headers[headers.size() - 1] ] = headers.size() - 1;
}
if (sortcolumn.size())
{
std::map<std::string, unsigned int>::const_iterator
colfind = headersmap.find(sortcolumn);
if (colfind != headersmap.end())
{
std::sort(datarecords.begin(), datarecords.end(),
DataRecordSortRelation(colfind->second));
}
else
{
if (sortcolumn[0] == '!')
{
sortcolumn.erase(0, 1);
colfind = headersmap.find(sortcolumn);
if (colfind != headersmap.end())
{
std::sort(datarecords.begin(), datarecords.end(),
DataRecordSortRelation(colfind->second, 1));
}
else
{
std::cerr << "Bad sort column: " << sortcolumn << " could not be found.\n";
return 0;
}
}
else
{
std::cerr << "Bad sort column: " << sortcolumn << " could not be found.\n";
return 0;
}
}
}
unsigned int offset = 0;
unsigned int limit = datarecords.size();
if (offsetstring.size())
{
try {
offset = boost::lexical_cast<unsigned int>(offsetstring);
}
catch (boost::bad_lexical_cast &e) {
std::cerr << "Bad number in offset: not an integer.\n";
return 0;
}
}
if (limitstring.size())
{
try {
limit = boost::lexical_cast<unsigned int>(limitstring);
}
catch (boost::bad_lexical_cast &e) {
std::cerr << "Bad number in limit: not an integer.\n";
return 0;
}
}
std::cerr << "Processed " << linesprocessed << " lines, "
<< "copied " << datarecords.size() << " and "
<< "skipped " << (linesprocessed - datarecords.size()) << " lines" << "\n";
for(std::vector<std::string>::const_iterator coliter = headers.begin();
coliter != headers.end(); ++coliter)
{
if (coliter != headers.begin()) std::cout << delimiter;
std::cout << *coliter;
}
std::cout << "\n";
for(unsigned int current = offset;
current < offset + limit && current < datarecords.size();
++current)
{
std::vector<std::string> &currrecord = datarecords[current];
for(std::vector<std::string>::const_iterator coliter = currrecord.begin();
coliter != currrecord.end(); ++coliter)
{
if (coliter != currrecord.begin()) std::cout << delimiter;
std::cout << *coliter;
}
std::cout << "\n";
}
}