Linux learning summary (13) process exercise: large file multi-process copy

Realize file multi-process copy

      Suppose there is a very large file, and the copy work needs to be completed. In order to improve efficiency, it can be realized by multi-process parallel copy method. Assuming that the file size is len, there are n processes to copy the file. The number of bytes copied by each process should be len/n. But not necessarily divisible, we can choose to let the last process be responsible for the remaining part of the copy work. Use len% (len/n) to find the size of the remaining part.
    In order to reduce the complexity of implementation, mmap can be used to implement the mapping of source and target files. The memory address is manipulated through pointers to set the start and end positions of each process copy. Use the MAP_SHARED option to reflect the changes made in the memory to the physical disk.
Insert picture description here

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/wait.h>

void err_int(int ret, const char *err)
{
    
    
    if (ret == -1) {
    
    
        perror(err);
        exit(1);
    }

    return ;
}

void err_str(char *ret, const char *err)
{
    
    
    if (ret == MAP_FAILED) {
    
    
        perror(err);
        exit(1);
    }
}

int main(int argc, char *argv[])
{
    
       
    int fd_src, fd_dst, ret, len, i, n;
    char *mp_src, *mp_dst, *tmp_srcp, *tmp_dstp;
    pid_t pid;
    struct stat sbuf;

    if (argc < 3 || argc > 4) {
    
    
        printf("Enter like this please: ./a.out file_src file_dst [process number]\n");
        exit(1);
    } else if (argc == 3) {
    
    
        n = 5;                  //用户未指定,默认创建5个子进程
    } else if (argc == 4) {
    
    
        n = atoi(argv[3]);
    }

    //打开源文件
    fd_src = open(argv[1], O_RDONLY);
    err_int(fd_src, "open dict.txt err");
    //打开目的文件, 不存在则创建
    fd_dst = open(argv[2], O_RDWR | O_CREAT | O_TRUNC, 0664);
    err_int(fd_dst, "open dict.cp err");
    //获取文件大小
    ret = fstat(fd_src, &sbuf);
    err_int(ret, "fstat err");
    
    len = sbuf.st_size;
    if (len < n)                //文件长度小于进程个数
        n = len;
    //根据文件大小拓展目标文件
    ret = ftruncate(fd_dst, len);
    err_int(ret, "truncate fd_dst err");
    //为源文件创建映射
    mp_src = (char *)mmap(NULL, len, PROT_READ, MAP_SHARED, fd_src, 0);
    err_str(mp_src, "mmap src err");
    //为目标文件创建映射
    mp_dst = (char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd_dst, 0);
    err_str(mp_dst, "mmap dst err");

    tmp_dstp = mp_dst;
    tmp_srcp = mp_src;
    //求出每个子进程该拷贝的字节数
    int bs = len / n;    //每个子进程应该拷贝的字节数
    int mod = len % bs;  //求出均分后余下的字节数,让最后一个子进程处理

    //创建N个子进程
    for (i = 0; i < n; i++) {
    
    
        if ((pid = fork()) == 0) {
    
    
            break;
        }
    }

    if (n == i) {
    
                   //父进程
        for (i = 0; i < n; i++)
            wait(NULL);

    } else if (i == (n-1)){
    
         //最后一个子进程,它多处理均分后剩余几个字节
        memcpy(tmp_dstp+i*bs, tmp_srcp+i*bs, bs+mod); 
    } else if (i == 0) {
    
            //第一个子进程
        memcpy(tmp_dstp, tmp_srcp, bs); 
    } else {
    
                        //其他子进程
        memcpy(tmp_dstp+i*bs, tmp_srcp+i*bs, bs); 
    }

    munmap(mp_src, len);
    munmap(mp_dst, len);

    return 0;
}

Guess you like

Origin blog.csdn.net/bureau123/article/details/112346380