CSV文件内容读取

CSV(逗号分隔值文件格式)

         逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号),其文件以纯文本形式存储表格数据(数字和文本)。纯文本意味着该文件是一个字符序列,不含必须像二进制数字那样被解读的数据。CSV文件由任意数目的记录组成,记录间以某种换行符分隔;每条记录由字段组成,字段间的分隔符是其它字符或字符串,最常见的是逗号或制表符。通常,所有记录都有完全相同的字段序列。

CSV文件格式的通用标准并不存在,但是在RFC 4180中有基础性的描述。使用的字符编码同样没有被指定,但是7-bit ASCII是最基本的通用编码。

这种文件格式经常用来作为不同程序之间的数据交互的格式。

具体文件格式:每条记录占一行 以逗号为分隔符 逗号前后的空格会被忽略 字段中包含有逗号,该字段必须用双引号括起来 字段中包含有换行符,该字段必须用双引号括起来 字段前后包含有空格,该字段必须用双引号括起来 字段中的双引号用两个双引号表示 字段中如果有双引号,该字段必须用双引号括起来 第一条记录,可以是字段名

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123

#include <iostream>
#include <fstream>

ifstream file(m_strFilePath);
    std::string row;
    vector<string> infRow;
    getline(file, row);//读取第一行
    MySplit(row, infRow); //获取每列的内容

    while (file.good())
    {
        //读取每一行
        getline(file, row);
        CStringA strRow = row.c_str();
        strRow.Replace("\\", "\\\\");
        strRow.Replace("'", "\\'");
        row = strRow;
        infRow.clear();
        MySplit(row, infRow);
                //...
    }




char *m_pcTemp;
int m_iMaxTempLength;

m_pcTemp(NULL), m_iMaxTempLength(10240)

//csv
void PushInTemp(const char *pcCursor, int iLen)
{
    if (iLen >= m_iMaxTempLength)
    {
        m_iMaxTempLength = iLen * 2;
        if (m_pcTemp)
        {
            delete[] m_pcTemp;
            m_pcTemp = nullptr;
        }
        
        m_pcTemp = new char[m_iMaxTempLength];
    }
    if (iLen > 0)
    {
        if (!m_pcTemp)
        {
            m_pcTemp = new char[m_iMaxTempLength];
        }
            
        memcpy(m_pcTemp, pcCursor, iLen);
    }

    if(m_pcTemp)
        m_pcTemp[iLen] = '\0';
}

//csv
void MySplit(std::string &row, vector<string> &infRow)
{
    const char *pcCursor = row.c_str();
    const char *pcComma = NULL;
    const char *pcQuot = NULL;

    do
    {
        if (0 == row.size())
        {
            break;
        }
        pcComma = strchr(pcCursor, ',');
        pcQuot = strchr(pcCursor, '"');

        if (NULL == pcComma && NULL == pcQuot)
        {
            infRow.push_back(pcCursor);
            break;
        }
        if (NULL == pcQuot)
        {
            int iLen = (int)pcComma - (int)pcCursor;

            PushInTemp(pcCursor, iLen);
            infRow.push_back(m_pcTemp);
            pcCursor += iLen + 1;
        }
        else if (NULL == pcComma)
        {
            const char *pcLastQuot = strrchr(pcCursor, '"');
            int iLen = (int)pcLastQuot - (int)pcQuot - 1;

            PushInTemp(pcQuot + 1, iLen);
            infRow.push_back(m_pcTemp);
            break;
        }
        else
        {
            int iCommaPos = (int)pcComma;
            int iQuotPos = (int)pcQuot;

            if (iCommaPos < iQuotPos)
            {
                int iLen = (int)pcComma - (int)pcCursor;
                PushInTemp(pcCursor, iLen);
                infRow.push_back(m_pcTemp);
                pcCursor += iLen + 1;
            }
            else
            {
                const char *pcNextQuot = NULL;
                int iMove = (int)pcQuot - (int)pcCursor + 1;
                std::string strQuotData;

                pcCursor += iMove;

                do
                {
                    pcNextQuot = strchr(pcCursor, '"');
                    if (NULL == pcNextQuot)
                    {
                        goto end;
                    }
                    if (*(pcNextQuot + 1) == '"')
                    {
                        int iLen = (int)pcNextQuot - (int)pcCursor;
                        PushInTemp(pcCursor, iLen);
                        strQuotData += m_pcTemp;
                        pcCursor = pcNextQuot + 2;
                        strQuotData += '"';
                    }
                    else
                    {
                        int iLen = (int)pcNextQuot - (int)pcCursor;
                        PushInTemp(pcCursor, iLen);
                        strQuotData += m_pcTemp;
                        infRow.push_back(strQuotData);
                        pcCursor += iLen + 1;
                        if (*pcCursor == ',')
                        {
                            ++pcCursor;
                        }
                        break;
                    }
                } while (1);
            }
        }
        if (*pcCursor == '\0')
        {
            break;
        }
    } while (1);

end:
    return;
}

猜你喜欢

转载自www.cnblogs.com/pjl1119/p/8931615.html