CXX解析CSV文件

1、头文件

 

#ifndef _CSV_HELPER_H_

#define _CSV_HELPER_H_

#include <string>

#include <list>

#include <vector>

 

using namespace std;

 

typedef struct CsvRow {

    char **fields_;

    int numOfFields_;

} CsvRow;

 

typedef struct CsvParser {

    char *filePath_;

    char delimiter_;

    int firstLineIsHeader_;

    char *errMsg_;

    CsvRow *header_;

    FILE *fileHandler_;

    int fromString_;

    char *csvString_;

    int csvStringIter_;

    int iSkipLine;

    vector<string> vtSkippedLine;

} CsvParser;

 

 

CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader, int iSkipLine = 0);

CsvParser *CsvParser_new_from_string(const char *csvString, const char *delimiter, int firstLineIsHeader);

void CsvParser_destroy(CsvParser *csvParser);

void CsvParser_destroy_row(CsvRow *csvRow);

 

const CsvRow *CsvParser_getHeader(CsvParser *csvParser);

CsvRow *CsvParser_getRow(CsvParser *csvParser);

int CsvParser_getNumFields(const CsvRow *csvRow);

const char **CsvParser_getFields(const CsvRow *csvRow);

const char* CsvParser_getErrorMessage(CsvParser *csvParser);

 

int _CsvParser_getNextLinePos(FILE *p);

bool _CsvParser_skipLine(CsvParser *csvParser);

CsvRow *_CsvParser_getRow(CsvParser *csvParser);

int _CsvParser_delimiterIsAccepted(const char *delimiter);

void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage);

 

 

class CCsvHelper;

class CCsvResultSet;

using CCsvField = std::pair<string, string>;

 

class CCsvRow

{

friend class CCsvResultSet;

public:

    CCsvRow(){

        m_vtField.clear();

    }

    ~CCsvRow(){}

    string GetFieldByName(string sName);

    string GetFieldByIndex(int index);

    void Print();

private:

    void SetData(vector<string> vtHead, vector<string> vtData);

public:

    vector<CCsvField> m_vtField;

};

 

class CCsvResultSet

{

    friend class CCsvHelper;

public:

    CCsvResultSet(){

        m_vtHead.clear();

        m_vtRow.clear();

        m_iPos = 0;

    }

    ~CCsvResultSet(){

        m_vtHead.clear();

        m_vtRow.clear();

    }

    void Print();

    bool Next();

    int GetRowCount();

    CCsvRow GetRowByIndex(int index);

    CCsvRow FetchRow();

 

private:

    void SetHeader(vector<string> vtHead);

    void SetRow(vector<string> vtRow);

 

private:

    int m_iPos;

    vector<string> m_vtHead;//表头

    vector<vector<string>> m_vtRow;//行数据

};

 

class CCsvHelper

{

public:

    CCsvHelper(){

        

    }

    ~CCsvHelper(){

        

    }

 

public:

    /*

    sFileName 文件路径,path/filename.csv

    iSkipLine 略过起始行数,略过的行将不被解析,略过文件头部连续的n行

    bFirstLineHead 读取的首行是否是表头,不是表头将直接当作数据

    */

    bool LoadFrom(string sFileName, int iSkipLine = 0, bool bFirstLineHead = true);

 

    /*

    获取当前文件结果集

    */

    CCsvResultSet GetResultSet();

 

private:

    string m_sFileName;

    bool m_bHeader;//存在表头

 

    

    CCsvResultSet m_rsData;

};

 

#endif

 

2、实现文件

 

#include <stdlib.h>

#include <string.h>

#include <stdio.h>

#include <errno.h>

#include <iostream>

#include <iomanip>

 

#include "CsvHelper.h"

 

using namespace std;

 

 

CsvParser *CsvParser_new(const char *filePath, const char *delimiter, int firstLineIsHeader, int iSkipLine) {

CsvParser *csvParser = (CsvParser*)malloc(sizeof(CsvParser));

if (filePath == NULL) {

csvParser->filePath_ = NULL;

}

else {

int filePathLen = strlen(filePath);

csvParser->filePath_ = (char*)malloc((filePathLen + 1));

strcpy(csvParser->filePath_, filePath);

}

csvParser->firstLineIsHeader_ = firstLineIsHeader;

csvParser->errMsg_ = NULL;

if (delimiter == NULL) {

csvParser->delimiter_ = ',';

}

else if (_CsvParser_delimiterIsAccepted(delimiter)) {

csvParser->delimiter_ = *delimiter;

}

else {

csvParser->delimiter_ = '\0';

}

csvParser->header_ = NULL;

csvParser->fileHandler_ = NULL;

csvParser->fromString_ = 0;

csvParser->csvString_ = NULL;

csvParser->csvStringIter_ = 0;

csvParser->iSkipLine = iSkipLine;

 

return csvParser;

}

 

CsvParser *CsvParser_new_from_string(const char *csvString, const char *delimiter, int firstLineIsHeader) {

CsvParser *csvParser = CsvParser_new(NULL, delimiter, firstLineIsHeader);

csvParser->fromString_ = 1;

if (csvString != NULL) {

int csvStringLen = strlen(csvString);

csvParser->csvString_ = (char*)malloc(csvStringLen + 1);

strcpy(csvParser->csvString_, csvString);

}

return csvParser;

}

 

void CsvParser_destroy(CsvParser *csvParser) {

if (csvParser == NULL) {

return;

}

if (csvParser->filePath_ != NULL) {

free(csvParser->filePath_);

}

if (csvParser->errMsg_ != NULL) {

free(csvParser->errMsg_);

}

if (csvParser->fileHandler_ != NULL) {

fclose(csvParser->fileHandler_);

}

if (csvParser->header_ != NULL) {

CsvParser_destroy_row(csvParser->header_);

}

if (csvParser->csvString_ != NULL) {

free(csvParser->csvString_);

}

free(csvParser);

}

 

void CsvParser_destroy_row(CsvRow *csvRow) {

int i;

for (i = 0; i < csvRow->numOfFields_; i++) {

free(csvRow->fields_[i]);

}

free(csvRow->fields_);

free(csvRow);

}

 

 

const CsvRow *CsvParser_getHeader(CsvParser *csvParser) {

if (!csvParser->firstLineIsHeader_) {

_CsvParser_setErrorMessage(csvParser, "Cannot supply header, as current CsvParser object does not support header");

return NULL;

}

if (csvParser->header_ == NULL) {

csvParser->header_ = _CsvParser_getRow(csvParser);

}

return csvParser->header_;

}

 

CsvRow *CsvParser_getRow(CsvParser *csvParser) {

if (csvParser->firstLineIsHeader_ && csvParser->header_ == NULL) {

csvParser->header_ = _CsvParser_getRow(csvParser);

}

return _CsvParser_getRow(csvParser);

}

 

int CsvParser_getNumFields(const CsvRow *csvRow) {

return csvRow->numOfFields_;

}

 

const char **CsvParser_getFields(const CsvRow *csvRow) {

return (const char**)csvRow->fields_;

}

int _CsvParser_getNextLinePos(FILE *p)

{

int ch = fgetc(p);

while (ch != EOF)

{

// putchar(ch);

if (ch == '\n')

break;

else

ch = fgetc(p);

}

return ftell(p);

}

bool _CsvParser_skipLine(CsvParser *csvParser) {

 

fseek(csvParser->fileHandler_, _CsvParser_getNextLinePos(csvParser->fileHandler_), SEEK_SET);

return true;

}

 

CsvRow *_CsvParser_getRow(CsvParser *csvParser) {

int numRowRealloc = 0;

int acceptedFields = 64;

int acceptedCharsInField = 64;

if (csvParser->filePath_ == NULL && (!csvParser->fromString_)) {

_CsvParser_setErrorMessage(csvParser, "Supplied CSV file path is NULL");

return NULL;

}

if (csvParser->csvString_ == NULL && csvParser->fromString_) {

_CsvParser_setErrorMessage(csvParser, "Supplied CSV string is NULL");

return NULL;

}

if (csvParser->delimiter_ == '\0') {

_CsvParser_setErrorMessage(csvParser, "Supplied delimiter is not supported");

return NULL;

}

if (!csvParser->fromString_) {

if (csvParser->fileHandler_ == NULL) {

csvParser->fileHandler_ = fopen(csvParser->filePath_, "r");

if (csvParser->fileHandler_ == NULL) {

int errorNum = errno;

const char *errStr = strerror(errorNum);

char *errMsg = (char*)malloc(1024 + strlen(errStr));

strcpy(errMsg, "");

sprintf(errMsg, "Error opening CSV file for reading: %s : %s", csvParser->filePath_, errStr);

_CsvParser_setErrorMessage(csvParser, errMsg);

free(errMsg);

return NULL;

}

if(csvParser->iSkipLine > 0)

{

int iCnt = csvParser->iSkipLine;

while (iCnt > 0)

{

_CsvParser_skipLine(csvParser);

iCnt--;

}

}

}

}

 

CsvRow *csvRow = (CsvRow*)malloc(sizeof(CsvRow));

csvRow->fields_ = (char**)malloc(acceptedFields * sizeof(char*));

csvRow->numOfFields_ = 0;

int fieldIter = 0;

char *currField = (char*)malloc(acceptedCharsInField);

int inside_complex_field = 0;

int currFieldCharIter = 0;

int seriesOfQuotesLength = 0;

int lastCharIsQuote = 0;

int isEndOfFile = 0;

while (1) {

char currChar = (csvParser->fromString_) ? csvParser->csvString_[csvParser->csvStringIter_] : fgetc(csvParser->fileHandler_);

csvParser->csvStringIter_++;

int endOfFileIndicator;

if (csvParser->fromString_) {

endOfFileIndicator = (currChar == '\0');

}

else {

endOfFileIndicator = feof(csvParser->fileHandler_);

}

if (endOfFileIndicator) {

if (currFieldCharIter == 0 && fieldIter == 0) {

_CsvParser_setErrorMessage(csvParser, "Reached EOF");

free(currField);

CsvParser_destroy_row(csvRow);

return NULL;

}

currChar = '\n';

isEndOfFile = 1;

}

if (currChar == '\r') {

continue;

}

if (currFieldCharIter == 0 && !lastCharIsQuote) {

if (currChar == '\"') {

inside_complex_field = 1;

lastCharIsQuote = 1;

continue;

}

}

else if (currChar == '\"') {

seriesOfQuotesLength++;

inside_complex_field = (seriesOfQuotesLength % 2 == 0);

if (inside_complex_field) {

currFieldCharIter--;

}

}

else {

seriesOfQuotesLength = 0;

}

if (isEndOfFile || ((currChar == csvParser->delimiter_ || currChar == '\n') && !inside_complex_field)) {

currField[lastCharIsQuote ? currFieldCharIter - 1 : currFieldCharIter] = '\0';

csvRow->fields_[fieldIter] = (char*)malloc(currFieldCharIter + 1);

strcpy(csvRow->fields_[fieldIter], currField);

free(currField);

csvRow->numOfFields_++;

if (currChar == '\n') {

return csvRow;

}

if (csvRow->numOfFields_ != 0 && csvRow->numOfFields_ % acceptedFields == 0) {

csvRow->fields_ = (char**)realloc(csvRow->fields_, ((numRowRealloc + 2) * acceptedFields) * sizeof(char*));

numRowRealloc++;

}

acceptedCharsInField = 64;

currField = (char*)malloc(acceptedCharsInField);

currFieldCharIter = 0;

fieldIter++;

inside_complex_field = 0;

}

else {

currField[currFieldCharIter] = currChar;

currFieldCharIter++;

if (currFieldCharIter == acceptedCharsInField - 1) {

acceptedCharsInField *= 2;

currField = (char*)realloc(currField, acceptedCharsInField);

}

}

lastCharIsQuote = (currChar == '\"') ? 1 : 0;

}

}

 

int _CsvParser_delimiterIsAccepted(const char *delimiter) {

char actualDelimiter = *delimiter;

if (actualDelimiter == '\n' || actualDelimiter == '\r' || actualDelimiter == '\0' ||

actualDelimiter == '\"') {

return 0;

}

return 1;

}

 

void _CsvParser_setErrorMessage(CsvParser *csvParser, const char *errorMessage) {

if (csvParser->errMsg_ != NULL) {

free(csvParser->errMsg_);

}

int errMsgLen = strlen(errorMessage);

csvParser->errMsg_ = (char*)malloc(errMsgLen + 1);

strcpy(csvParser->errMsg_, errorMessage);

}

 

const char *CsvParser_getErrorMessage(CsvParser *csvParser) {

return csvParser->errMsg_;

}

 

 

////////////////////////////////////////////////////////////////////////

void CCsvRow::Print()

{

cout << left;

if (m_vtField.size() > 0)

{

for (auto it : m_vtField)

{

cout << setw((it.first.length()>=it.second.length()? it.first.length(): it.second.length()) + 10) << it.first;

// cout << it.first << "\t";

}

cout << endl;

cout << left;

for (auto it : m_vtField)

{

cout << setw((it.first.length()>=it.second.length() ? it.first.length() : it.second.length()) + 10) << it.second;

// cout << it.second << "\t";

}

cout << endl;

}

cout << endl;

}

void CCsvRow::SetData(vector<string> vtHead, vector<string> vtData)

{

if (vtHead.size() != vtData.size())

{

throw ("col name size is not equal to row col size!!!");

}

int iSize = vtHead.size();

for (int i = 0; i < iSize; i++)

{

CCsvField fld;

fld.first = vtHead[i];

fld.second = vtData[i];

m_vtField.push_back(fld);

}

}

string CCsvRow::GetFieldByName(string sName)

{

for (auto it: m_vtField)

{

if (it.first == sName)

{

return it.second;

}

}

return string();

}

string CCsvRow::GetFieldByIndex(int index)

{

if (index < 0 || index > (int)m_vtField.size())

{

throw ("invalid index ");

}

else

return m_vtField[index].second;

}

 

 

void CCsvResultSet::Print()

{

cout << left;

if (m_vtHead.size() > 0)

{

for (auto it : m_vtHead)

{

cout << setw(20) << it;

}

}

cout << endl;

for (auto it : m_vtRow)

{

for (auto row : it)

{

cout << setw(20) << row;

}

cout << endl;;

}

}

bool CCsvResultSet::Next()

{

if (m_iPos < 0 || m_iPos >= (int)m_vtRow.size())

return false;

else

return true;

}

CCsvRow CCsvResultSet::FetchRow()

{

CCsvRow row;

if (Next())

{

vector<string> vtTmp = m_vtRow[m_iPos];

row.SetData(m_vtHead, vtTmp);

m_iPos++;

}

return row;

}

 

void CCsvResultSet::SetHeader(vector<string> vtHead)

{

m_vtHead = vtHead;

}

void CCsvResultSet::SetRow(vector<string> vtRow)

{

m_vtRow.push_back(vtRow);

}

int CCsvResultSet::GetRowCount()

{

return (int)m_vtRow.size();

}

CCsvRow CCsvResultSet::GetRowByIndex(int index)

{

if (index < 0 || index >= (int)m_vtRow.size())

{

throw ("valid index!!!");

}

CCsvRow row;

vector<string> vtTmp = m_vtRow[index];

row.SetData(m_vtHead, vtTmp);

return row;

}

 

 

bool CCsvHelper::LoadFrom(string sFileName, int iSkipLine, bool bFirstLineHead)

{

m_bHeader = bFirstLineHead;

int i = 0;

CsvParser *csvparser = CsvParser_new(sFileName.c_str(), ",", bFirstLineHead, iSkipLine);

CsvRow *row = nullptr;

 

if (bFirstLineHead) {//读取表头

const CsvRow *header = CsvParser_getHeader(csvparser);

 

if (header == NULL) {

printf("%s\n", CsvParser_getErrorMessage(csvparser));

return false;

}

vector<string> vtHead;

const char **headerFields = CsvParser_getFields(header);

for (i = 0; i < CsvParser_getNumFields(header); i++) {

// printf("TITLE: %s\n", headerFields[i]);

vtHead.push_back(headerFields[i]);

}

m_rsData.SetHeader(vtHead);

}

 

while ((row = CsvParser_getRow(csvparser))) {

vector<string> vtRow;

const char **rowFields = CsvParser_getFields(row);

for (i = 0; i < CsvParser_getNumFields(row); i++) {

// printf("FIELD: %s\n", rowFields[i]);

vtRow.push_back(rowFields[i]);

}

m_rsData.SetRow(vtRow);

CsvParser_destroy_row(row);

 

}

CsvParser_destroy(csvparser);

return true;

}

 

 

 

CCsvResultSet CCsvHelper::GetResultSet()

{

return m_rsData;

}

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

猜你喜欢

转载自www.cnblogs.com/skiing886/p/9165900.html