CSV file content read

CSV (Comma Separated Values ​​File Format)

         Comma-Separated Values ​​(CSV, also sometimes called character-separated values ​​because the separating character can also be anything other than a comma), whose files store tabular data (numbers and text) in plain text. Plain text means that the file is a sequence of characters without data that must be interpreted like binary numbers. A CSV file consists of any number of records separated by some kind of newline character; each record consists of fields separated by other characters or strings, most commonly a comma or a tab . Typically, all records have the exact same sequence of fields.

A common standard for CSV file format does not exist, but it is fundamentally described in RFC 4180. The character encoding used is also not specified, but 7-bit ASCII is the most basic universal encoding.

This file format is often used as a format for data interaction between different programs.

Specific file format: each record occupies one line with a comma as the delimiter. Spaces before and after the comma will be ignored. The field contains commas, and the field must be enclosed in double quotation marks. The field contains newline characters, and the field must be enclosed in double quotation marks. If there are spaces before and after the field, the field must be enclosed in double quotation marks Double quotation marks in the field are represented by two double quotation marks in the field If there are double quotation marks in the field, the field must be enclosed in double quotation marks The first record, which can be the field name

John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123

 

#include <iostream>
#include <fstream>

ifstream file(m_strFilePath);
    std::string row;
    vector<string> infRow;
    getline(file, row); // Read the first row 
    MySplit(row, infRow); // Get the content of each column

    while (file.good())
    {
        // Read each line 
        getline(file, row);
        CStringA strRow = row.c_str();
        strRow.Replace("\\", "\\\\");
        strRow.Replace("'", "\\'");
        row = strRow;
        infRow.clear();
        MySplit(row, infRow);
                //...
    }




char *m_pcTemp;
int m_iMaxTempLength;

m_pcTemp(NULL), m_iMaxTempLength(10240)

//csv
void PushInTemp(const char *pcCursor, int iLen)
{
    if (iLen >= m_iMaxTempLength)
    {
        m_iMaxTempLength = iLen * 2;
        if (m_pcTemp)
        {
            delete[] m_pcTemp;
            m_pcTemp = nullptr;
        }
        
        m_pcTemp = new char[m_iMaxTempLength];
    }
    if (iLen > 0)
    {
        if (!m_pcTemp)
        {
            m_pcTemp = new char[m_iMaxTempLength];
        }
            
        memcpy(m_pcTemp, pcCursor, iLen);
    }

    if(m_pcTemp)
        m_pcTemp[iLen] = ' \0 ' ;
}

//csv
void MySplit(std::string &row, vector<string> &infRow)
{
    const char *pcCursor = row.c_str();
    const char *pcComma = NULL;
    const char *pcQuot = NULL;

    do
    {
        if (0 == row.size())
        {
            break;
        }
        pcComma = strchr(pcCursor, ',');
        pcQuot = strchr(pcCursor, '"');

        if (NULL == pcComma && NULL == pcQuot)
        {
            infRow.push_back(pcCursor);
            break;
        }
        if (NULL == pcQuot)
        {
            int iLen = ( int ) pcComma - ( int ) pcCursor;

            PushInTemp(pcCursor, iLen);
            infRow.push_back(m_pcTemp);
            pcCursor += iLen + 1 ;
        }
        else if (NULL == pcComma)
        {
            const char *pcLastQuot = strrchr(pcCursor, '"');
            int iLen = (int)pcLastQuot - (int)pcQuot - 1;

            PushInTemp(pcQuot + 1, iLen);
            infRow.push_back(m_pcTemp);
            break;
        }
        else
        {
            int iCommaPos = (int)pcComma;
            int iQuotPos = (int)pcQuot;

            if (iCommaPos < iQuotPos)
            {
                int iLen = ( int ) pcComma - ( int ) pcCursor;
                PushInTemp(pcCursor, iLen);
                infRow.push_back(m_pcTemp);
                pcCursor += iLen + 1 ;
            }
            else
            {
                const char *pcNextQuot = NULL;
                int iMove = (int)pcQuot - (int)pcCursor + 1;
                std::string strQuotData;

                pcCursor += iMove;

                do
                {
                    pcNextQuot = strchr(pcCursor, '"');
                    if (NULL == pcNextQuot)
                    {
                        goto end;
                    }
                    if (*(pcNextQuot + 1) == '"')
                    {
                        int iLen = (int)pcNextQuot - (int)pcCursor;
                        PushInTemp(pcCursor, iLen);
                        strQuotData += m_pcTemp;
                        pcCursor = pcNextQuot + 2;
                        strQuotData += '"';
                    }
                    else
                    {
                        int iLen = (int)pcNextQuot - (int)pcCursor;
                        PushInTemp(pcCursor, iLen);
                        strQuotData += m_pcTemp;
                        infRow.push_back(strQuotData);
                        pcCursor += iLen + 1;
                        if (*pcCursor == ',')
                        {
                            ++pcCursor;
                        }
                        break;
                    }
                } while (1);
            }
        }
        if (*pcCursor == '\0')
        {
            break;
        }
    } while (1);

end:
    return;
}

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324779963&siteId=291194637