大文件内存映射问题

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/nk_wang/article/details/51514404

最近一段时间接到朋友的关于大文件分解的一个问题,文件大小不确定,几百M到几个G,对于这样的问题,采用内存映射处理是最为高效的手段

需求是这样的,文件包含文件头:136个字节,后面记录的是一条一条的数据包,数据包结构如下:数据包头:4个字节;文件序号:4个字节,数据包大小;要求分解为100M大小左右的文件,根据文件结构代码实现如下:
//FileInfoDef.h
#ifndef __INCLUDE_FILEINFODEF_H__
#define __INCLUDE_FILEINFODEF_H__ 
#include <iostream>

using namespace  std;
typedef char int8;
typedef unsigned char uint8;

typedef short int16;
typedef unsigned short uint16;

typedef int int32;
typedef unsigned int uint32;

//数据包个数,用于计数每个小文件存放数据包个数
const size_t PACKAGE_NUM = 1500;
//申请一块buffer,用于记录存放从大文件读取的数据
const size_t MAX_PACKAGE_SIZE = 20*1024 *1024;
//目前所支持的最大可分解文件
const size_t MAX_FILE_SIZE = 20000*65536;

typedef struct FilePos
{
    uint32 NextFileFistCount;   //记录一个数据包映射到另外一个试图的数据大小
    int32  NextPackageHeadCnt;     //记录一个数据包包头映射到另外一个试图长度
    char   PackageHead[12];
    FilePos()
    {
        memset(this,0,sizeof(FilePos));
    }
}FILE_POS_STRU;

#endif
//ByteReader.h
#ifndef __INCLUDE_BYTEREADER_H__
#define __INCLUDE_BYTEREADER_H__
#include "FileInfoDef.h"
class CByteReader
{
public:
    char GetByte();
    uint16 GetShort();
    uint32 GetInt();
    void Skip(size_t n);
    void GetNInt(uint32 *p, size_t n);
    void GetNShort(uint16 *p, size_t n);
    void GetNByte(char *p, size_t n);
public:
    CByteReader(const char *buffer,size_t maxNum);
    virtual ~CByteReader(void);
private:
    char * m_buffer;   //文件缓存
    size_t m_curByte;  //当前字节
    size_t m_maxByte;  //最大字节
};
#endif
//ByteReader.cpp
#include "ByteReader.h"
#include <iostream>
using namespace std;


CByteReader::CByteReader( const char *buffer ,size_t maxNum)
{
    m_buffer = const_cast<char*>(buffer);
    m_curByte = 0;
    m_maxByte = maxNum;
}

CByteReader::~CByteReader(void)
{
    m_buffer = NULL;
    m_curByte = 0;
    m_maxByte = 0;
}

uint32 CByteReader::GetInt()
{
    uint32 ret = 0;
    if (m_curByte + 4 > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return ret;
    }
    for (size_t i = 0; i < 4; i++)
    {
        unsigned char temp = static_cast<unsigned char>(*m_buffer);
        ret |= (temp<<(3 - i) * 8);
        m_buffer++;
    }
    m_curByte += 4;
    return ret;
}

uint16 CByteReader::GetShort()
{
    uint16 ret = 0;
    if (m_curByte + 2 > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return ret;
    }
    for (size_t i = 0; i < 2; i++)
    {
        uint16 temp = static_cast<uint16>(*m_buffer);
        ret |= (temp<<(1 - i) * 8);
        m_buffer++;
    }
    m_curByte += 2;
    return ret;
}

char CByteReader::GetByte()
{
    if (m_curByte + 1 > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return 0;
    }
    char ret = *m_buffer;
    m_buffer++;
    m_curByte += 1;
    return ret;
}

void CByteReader::Skip( size_t n )
{
    if (m_curByte + n > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return;
    }
    m_buffer +=  n;
    m_curByte += n;
}

void CByteReader::GetNInt(uint32 *p, size_t n )
{
    if (m_curByte + n*4 > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return;
    }
    for(size_t i = 0; i < n; i++)
    {
        p[i] = GetInt();
    }
}

void CByteReader::GetNShort( uint16 *p, size_t n )
{
    if (m_curByte + n*2 > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return;
    }
    for(size_t i = 0; i < n; i++)
    {
        p[i] = GetShort();
    }
}

void CByteReader::GetNByte( char *p, size_t n )
{
    if (m_curByte + n > m_maxByte)
    {
        printf("Current Byte is Larger than Max Num!!!");
        return;
    }
    memcpy(p, m_buffer,n);
    m_curByte += n;
    m_buffer += n;
}
//ByteExChange.h
#ifndef __INCLUDE_BYTEEXCHANGE_H__
#define __INCLUDE_BYTEEXCHANGE_H__
class CByteExChange
{
public:
    CByteExChange(void);
    ~CByteExChange(void);
public:
    //注意使用完需要释放内存
    static char* UnicodeToAnsi( const wchar_t*szStr );
    static wchar_t* AnsiToUnicode( const char* szStr );
};
#endif
//ByteExChange.cpp
#include "ByteExChange.h"
#include <Windows.h>

CByteExChange::CByteExChange(void)
{
}


CByteExChange::~CByteExChange(void)
{
}
//将宽字节wchar_t*转化为单字节char*
char* CByteExChange::UnicodeToAnsi( const wchar_t*szStr )

{

    int nLen = WideCharToMultiByte( CP_ACP, 0, szStr, -1, NULL, 0, NULL, NULL );

    if(nLen == 0)
    {
        return NULL;
    }
    char* pResult = new char[nLen];

    WideCharToMultiByte(CP_ACP, 0, szStr, -1, pResult, nLen, NULL, NULL );

    return pResult;
}

wchar_t* CByteExChange::AnsiToUnicode( const char* szStr )
{
    int nLen = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr, -1, NULL, 0 );
    if(nLen == 0)
    {
        return NULL;
    }
    wchar_t*pResult = new wchar_t[nLen];
    MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, szStr, -1, pResult, nLen );

    return pResult;
}
//FileProcss.h
#ifndef __INCLUDE_PARSEFILE_H__
#define __INCLUDE_PARSEFILE_H__

#include <string>
#include <list>
#include <fstream>
#include <Windows.h>

#include "FileInfoDef.h"
using namespace std;
class  CFileProcess
{
public:
    CFileProcess(void);
    virtual ~CFileProcess(void);
public:
    void SplitFile(string strFileName,string strDes);
private:
    string GetFileNameNoSuffix( string &strFileName, string strDes );
    void ProcessFile( DWORD dwBlockBytes, string strFileName);
    void GetFileList(string filePath,list<string> &fileList,string strType);
    bool JudgePackageHeader();
    string GetSmallFileName(string & strFileName );
    void WriteContentToFile(string & strFileName,size_t  CountUint,bool bFlag = false);
    void ResizeBuffer(size_t Count);
private:
    char *m_buf;
    char *m_pBuf;
    char *m_packageArray;      //记录数据包内容
    __int64 m_FileSize;        //记录当前映射试图文件大小
    __int64 m_dwBlockSize;     //记录映射视图数据块大小
    size_t m_counter;          //记录数据包个数
    size_t m_fileNo;           //记录新文件编号
    bool m_isChangeFile;       //记录是否需要打开新文件
    char *m_fileHeader;        //记录文件头
    FILE_POS_STRU m_stfilepos; //记录同一个数据包分成视图文件
    size_t m_bufSize;          //记录数据包缓存大小
private:
    fstream m_fout;            //小文件输出流
};


#endif
//FileProcess.cpp
#include "FileProcess.h"
#include "ByteReader.h"
#include "ByteExChange.h"

extern "C"  __declspec(dllexport) void __stdcall  SplitFile(char * strFileName,char* strDes)
{
    CFileProcess file;
    file.SplitFile(strFileName,strDes);
}
CFileProcess::CFileProcess(void)
{
    try
    {
        m_FileSize = 0;
        m_isChangeFile = true;
        m_fileNo = 0;
        m_counter = 0;
        m_packageArray = new char[MAX_PACKAGE_SIZE];
        m_bufSize = MAX_PACKAGE_SIZE;
        m_fileHeader = new char[136];
        memset(m_fileHeader,0,136);
    }
    catch (...)
    {
    }

}


CFileProcess::~CFileProcess(void)
{
    //删除数据包Buffer
    if (NULL != m_packageArray)
    {
        delete [] m_packageArray;
        m_packageArray = NULL;
    }
    //删除文件头
    if (NULL != m_fileHeader)
    {
        delete [] m_fileHeader;
        m_fileHeader = NULL;
    }
    //关闭文件流
    if (m_fout.is_open())
    {
        m_fout.close();
    }
}
void CFileProcess::SplitFile(string strFileName,string strDes)
{
    //文件名转换
    wchar_t * wText = CByteExChange::AnsiToUnicode(strFileName.c_str());
    HANDLE hFile = ::CreateFile(wText,GENERIC_READ,FILE_SHARE_READ,
        NULL,OPEN_EXISTING,FILE_FLAG_RANDOM_ACCESS,NULL);
    delete [] wText;

    if (INVALID_HANDLE_VALUE == hFile)
    {
        printf("CreateFile Failed,ErrorCode is %d",GetLastError());
        return;
    }
    // 创建文件映射对象  
    HANDLE hFileMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);  
    if (NULL == hFileMap)  
    {  
        printf("CreateFileMapping Failed,ErrorCode is %d",GetLastError());  
        return;
    }
    //////////////////////////////////////////////////////////////////////////
    string strFileNameNoSuffix = GetFileNameNoSuffix(strFileName, strDes);

    //////////////////////////////////////////////////////////////////////////
    //获取系统分配粒度
    SYSTEM_INFO SysInfo;
    GetSystemInfo(&SysInfo);
    DWORD dwGran = SysInfo.dwAllocationGranularity;
    // 得到文件尺寸
    DWORD dwFileSizeHigh;
    __int64 qwFileSize = GetFileSize(hFile, &dwFileSizeHigh);
    qwFileSize |= (((__int64)dwFileSizeHigh) << 32);
    //增加限制条件,大于1G文件不处理,直接返回
//  if (qwFileSize > 1024*1024*1024)
//  {
//      printf("The file is too big,if you want to process super file,please contact [email protected]");
//      return;
//  }
    __int64  qwFileOffset = 0;
    DWORD dwBlockBytes = 5000 * dwGran;
    if (qwFileSize < 5000 * dwGran)
    {
        dwBlockBytes = (DWORD)qwFileSize;
        m_FileSize = qwFileSize;
    }
    else
    {
        m_FileSize = dwBlockBytes;
    }


    bool isOneFile = true;
    //循环获取内存映射内容
    while(qwFileSize > 0)
    {
        m_buf = (char *)MapViewOfFile(hFileMap,FILE_MAP_READ,(DWORD)
            (qwFileOffset>>32),(DWORD)(qwFileOffset&0xFFFFFFFF),dwBlockBytes);
        if (NULL == m_buf)
        {
            printf("Create MapView failed, ErrorCode is %d\n",GetLastError());
            return;
        }
        m_pBuf = m_buf;
        //此处添加处理文件
        m_dwBlockSize = dwBlockBytes;
        //处理文件头
        if (isOneFile)
        {
            //第一次读取文件
            memcpy(m_fileHeader, m_buf, 136);
            m_buf += 136;   
            //m_dwBlockSize = dwBlockBytes;
            isOneFile = false;
            m_dwBlockSize -= 136; //减去文件头长度
        }
        //////////////////////////////////////////////////////////////////////////
        //处理文件内容
        ProcessFile(dwBlockBytes, strFileNameNoSuffix);

        //撤消文件映像
        UnmapViewOfFile(m_buf);
        //修正参数
        qwFileOffset += dwBlockBytes;
        qwFileSize -= dwBlockBytes;
        if (qwFileSize > 0 && qwFileSize < dwBlockBytes)
        {
            m_FileSize = qwFileSize;
            dwBlockBytes = static_cast<DWORD>(qwFileSize);
        }
        else
        {
            m_FileSize = dwBlockBytes;
        }
    }
    ::CloseHandle(hFileMap);
    ::CloseHandle(hFile);
}
string CFileProcess::GetFileNameNoSuffix( string &strFileName, string strDes ) 
{
    size_t m = strFileName.rfind('\\');
    string temp = strFileName.substr(m);
    size_t n = temp.find(".bin");
    temp = temp.substr(0,n);

    string strFileNameNoSuffix(strDes + temp);
    return strFileNameNoSuffix;
}
void CFileProcess::ProcessFile( DWORD dwBlockBytes, string strFileName ) 
{
    while(m_buf < m_pBuf + m_FileSize)
    {
        //文件试图剩余字节数不足12个,拷贝后存入Buffer
        if (m_dwBlockSize < 12)
        {
            char temp[12] = {0};
            memcpy(m_stfilepos.PackageHead,m_buf,(uint32)m_dwBlockSize);
            m_stfilepos.NextPackageHeadCnt = 12 - dwBlockBytes;
            break;
        }
        //处理数据包头与数据包内容不在同一张视图中
        if (m_stfilepos.NextPackageHeadCnt > 0)
        {
            memcpy(m_stfilepos.PackageHead+m_stfilepos.NextPackageHeadCnt,m_buf,m_stfilepos.NextPackageHeadCnt);
            m_buf += m_stfilepos.NextPackageHeadCnt;
            CByteReader rReader(m_stfilepos.PackageHead,12);
            //跳过包头和序列号
            rReader.Skip(8);
            //计算数据包大小;
            uint32 CountUint  = rReader.GetInt();
            ResizeBuffer(CountUint);
            //memset(m_packageArray,0,m_bufSize);
            //组合数据包,首先拷贝数据包头12个字节到数据包Buffer中
            memcpy(m_packageArray,m_stfilepos.PackageHead,12);
            //拷贝数据包内容到到数据包Buffer中
            memcpy(m_packageArray+12,m_buf,CountUint-12);
            m_buf +=(CountUint-12);
            string strName = GetSmallFileName(strFileName);
            WriteContentToFile(strFileName,CountUint);
            m_stfilepos.NextPackageHeadCnt = 0;
        }
        //处理部分内容在下一张视图中
        if (m_stfilepos.NextFileFistCount > 0)
        {
            //拷贝上个视图数据包剩余部分
            memcpy(m_packageArray,m_buf,m_stfilepos.NextFileFistCount);
            m_buf += m_stfilepos.NextFileFistCount; 
            string strName = GetSmallFileName(strName);
            WriteContentToFile(strFileName,m_stfilepos.NextFileFistCount,true);
            m_dwBlockSize -= m_stfilepos.NextFileFistCount;
            m_stfilepos.NextFileFistCount = 0;
        }
        //判断数据包头是否正确
        if (!JudgePackageHeader())
        {
            break;
        }
        char temp[8] = {0};
        memcpy(temp,m_buf,8);
        m_buf += 8;
        //计算包长度
        CByteReader rHeader(temp,8);
        rHeader.Skip(4);  //跳过四个字节
        uint32 CountUint = rHeader.GetInt();  //数据包数据
        m_buf -= 12;
        if (CountUint > m_dwBlockSize)
        {
            //记录下个文件中同个数据包长度
            m_stfilepos.NextFileFistCount = uint32(CountUint - m_dwBlockSize);
            ResizeBuffer(CountUint);
            //memset(m_packageArray,0,m_bufSize);
            memcpy(m_packageArray,m_buf,static_cast<size_t>(m_dwBlockSize));
            string strName = GetSmallFileName(strFileName);
            WriteContentToFile(strName,static_cast<size_t>(m_dwBlockSize));
            break;
        }

        ResizeBuffer(CountUint);
        //memset(m_packageArray,0,m_bufSize);
        memcpy(m_packageArray,m_buf,CountUint);
        m_buf += CountUint;
        m_dwBlockSize -= CountUint;
        string strName = GetSmallFileName(strFileName);
        WriteContentToFile(strName,CountUint);
    }
}
bool CFileProcess::JudgePackageHeader()
{
     unsigned char headerInfo[4] = {0};
     memcpy(headerInfo,m_buf,4);
     m_buf += 4; 
     if (headerInfo[0] == 0xFF && 
        headerInfo[1] == 0xFF && 
        headerInfo[2] == 0xFF &&
        headerInfo[3] == 0xFF)
    {
        return  true;
    }
    return false;
}
void CFileProcess::WriteContentToFile(string & strFileName,size_t  CountUint,bool bFlag)
{
    //写入文件头
    if (0 == m_counter)
    {
        m_fout.open(strFileName, ios::out|ios::binary);
        if (m_fout.bad())
        {
            return;
        }
        m_fout.write(m_fileHeader,136);
    }

    m_fout.write(m_packageArray,CountUint);
    m_fout.flush();
    if (!bFlag)
    {
        m_counter += 1;
    }
    if (PACKAGE_NUM == m_counter)
    {
        m_counter = 0;
        m_fileNo += 1;
        m_isChangeFile = true;
        m_fout.close();
    }
}

string CFileProcess::GetSmallFileName(string & strFileName ) 
{
    string strName("");
    if (m_isChangeFile)
    {
        strName = strFileName;
        char temp[10] = {'\0'};
        sprintf_s(temp,"_%04d",m_fileNo);
        strName += temp;
        strName += ".bin";
        m_isChangeFile = false;
    }   
    return strName;
}

void CFileProcess::ResizeBuffer( size_t Count )
{
    if(Count > m_bufSize)
    {
        m_bufSize = Count;
        delete [] m_packageArray;
        m_packageArray = new char[m_bufSize];
    }
}
//Interface.h
# ifndef __INCLUDE_INTERFACE_H__
# define __INCLUDE_INTERFACE_H__
#include "FileProcess.h"
/************************************************************************/
/* 函数功能:大文件分解为小文件
/* strFileName[in]:输入文件名 例如:F:\\上海项目\\test4\\test4.bin
/* strDes[in]:分解后输出文件路径 例如:F:\\上海项目
/************************************************************************/
extern "C"  __declspec(dllexport) void __stdcall  SplitFile(char * strFileName,char* strDes);

# endif

猜你喜欢

转载自blog.csdn.net/nk_wang/article/details/51514404