Level programming environment for UNIX 4.6 APUE exercises to achieve similar cp (1) of the program, keep the file in the hole

Question 1 surface

Similarly prepared cp (1) program, copy it contains empty files, but not the 0 byte written to the output file.

2 basic idea

  • We must first find out the nature of the cavity to determine if a file has a cavity, and the cavity position
  • After know the empty position when the cavity portion to read the source file, the object file corresponding length lseek

3 Create empty files, empty while exploring nature

Alternately lseekand writegradually increasing the interval length. The number of block size of the file and compare actual occupation

  • Source Testing
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

int holesize[]={1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32*1024};
int filesize = 64*1024;

int main()
{
    int i = 0;
    int count = 0;
    int ret = 0, fd = 0;
    char filename[32]={0};
    unsigned char buf[32*1024]={0};
    memset(buf, 1, 32*1024);
    for (; i< sizeof(holesize)/ sizeof(int); ++i) {
        count = 0;
        memset(filename, 0, 32);
        sprintf(filename, "%s%d", "holesize", holesize[i]);
        fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
        if(fd < 0) {
            printf("open file fail\n");
            return -1;
        }
        while(count < filesize) {
            ret = lseek(fd, holesize[i], SEEK_CUR);
            if(ret < 0) {
                printf("lseek fail\n");
                return -1;
            }
            int remain = holesize[i];
            while(remain) {
                ret = write(fd, buf, remain);
                if(ret < 0 ) {
                    perror("write fail\n");
                    return -1;
                }
                remain -= ret;
            }
            count += holesize[i] * 2;
        }
        close(fd);
    }
    return 0;
}
  • MAC OSX 10.1.4.6 Test Results
^_^$ ll -s
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize1
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize1024
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize128
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize16
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize16384
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize2
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize2048
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize256
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize32
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize32768
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize4
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize4096
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize512
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize64
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize8
128 -rw-r--r--   1 chenzf  staff  65536 12 28 20:08 holesize8192

Can not create empty files on Mac OSX, because the default file system is HFS +, does not support sparse files

  • Ubuntu18 4.15.0-60-generic test results
^_^$ ll -s
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize1
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize1024
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize128
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize16
32 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize16384
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize2
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize2048
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize256
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize32
32 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize32768
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize4
32 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize4096
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize512
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize64
64 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize8
32 -rw-r--r-- 1 chen chen 65536 12月 25 00:08 holesize8192

4KB more than the actual creation of voids.
Because linux file system, a minimum physical unit of disk allocation cluster. (Even if the file size is not sufficient to take up full cluster, the cluster is still spare disk storage of the file)

So can this property to determine whether the file is empty file. There are empty file, the file size is calculated using the number of block than at least one block block several large clusters actually occupied by several

How portable access to the cluster size

pagesize = sysconf(_SC_PAGESIZE);

The initial realization of functions

  • Source
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/errno.h>

int my_cp(const char *from, const char *to)
{
    int fd1 = -1, fd2 = -1;
    int rev = -1;
    unsigned char *buffer = NULL;
    unsigned char *start_pos = NULL;
    long pagesize = 0;
    long long blocks, blksize, size;
    int read_num, write_num, remain_num, current_pos = 0, last_zero = -1, last_nonzero = -1, have_holes = 0;
    struct stat st;

    fd1 = open(from, O_RDONLY);
    if(-1 == fd1){
        perror("open file1 faild");
        goto err;
    }

    if(fstat(fd1, &st) !=0) {
        perror("fstat: ");
        goto err;
    }
    else{
#ifdef _SC_PAGESIZE
        pagesize = sysconf(_SC_PAGESIZE);
        if (pagesize < 0) {
            if (errno != 0) {
                if (errno == EINVAL) {
                    fputs(" (not supported)\n", stdout);
                    pagesize = st.st_blksize;
                }
                else {
                    perror("sysconf error");
                    goto err;
                }
            } else {
                fputs(" (no limit)\n", stdout);
                pagesize = st.st_blksize;
            }
        }
        printf("pagesize: %ld\n", pagesize);
#else
        pagesize = st.st_blksize;
#endif
        blocks = st.st_blocks;
        blksize = st.st_blksize;
        size = st.st_size;
        printf("st.st_blocks: %lld\n", blocks);
        printf("st.st_blksize: %lld\n", blksize);
        printf("st.st_size: %lld\n", size);
        /*块大小512,在不同平台上可能不兼容*/
        if(S_ISREG(st.st_mode) && (size / pagesize + (size%pagesize?1:0)) * pagesize > 512 * blocks) {
            have_holes = 1;
            printf("%s is a sparse-block file!\n", from);
        } else{
            have_holes = 0;
            printf("%s is not a sparse-block file!\n", from);
        }
    }
    fd2 = open(to, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
    if ( -1 == fd2) {
        perror ("open file2 faild");
        goto err;
    }

    buffer = malloc(pagesize);
    if(buffer == NULL) {
        perror ("malloc fail");
        goto err;
    }
    memset(buffer, '\0', pagesize);
    while((read_num = read(fd1, buffer, pagesize)) > 0) {
        /* 源文件有空洞 */
        if(have_holes){
            last_zero = -1;
            last_nonzero = -1;
            for(current_pos = 0; current_pos < read_num; current_pos++){
                /* 逐字节判断,效率较低*/
                if(buffer[current_pos] == 0){
                    if(last_nonzero > last_zero){
                        remain_num = last_nonzero - last_zero;
                        start_pos = buffer + last_zero + 1;
                        while(remain_num){
                            write_num = write(fd2, start_pos, remain_num);
                            if ( -1 == write_num){
                                perror( "write file2 error");
                                goto err;
                            }
                            remain_num -= write_num;
                            start_pos += write_num;
                        }
                    }
                    last_zero = current_pos;
                }
                else{
                    if(last_zero > last_nonzero){
                        remain_num = last_zero - last_nonzero;
                        if(-1 == lseek(fd2, remain_num, SEEK_CUR)){
                            perror("lseek file2 fail");
                            goto err;
                        }
                    }
                    last_nonzero = current_pos;
                }
            }
            /* 处理最后剩余数据*/
            remain_num = (last_nonzero > last_zero)?(last_nonzero - last_zero):(last_zero - last_nonzero);
            start_pos = buffer + current_pos - remain_num;
            if(last_nonzero > last_zero){
                while(remain_num){
                    write_num = write(fd2, start_pos, remain_num);
                    if ( -1 == write_num){
                        perror( "write file2 error");
                        goto err;
                    }
                        remain_num -= write_num;
                        start_pos += write_num;
                    }
                }
            else{
                if(-1 == lseek(fd2, remain_num, SEEK_CUR)){
                    perror("lseek file2 fail");
                    goto err;
                }
            }
        }
        /* 源文件无空洞 */
        else {
            remain_num = read_num;
            start_pos = buffer;
            while(remain_num){
                write_num = write(fd2, start_pos, remain_num);
                if ( -1 == write_num){
                    perror( "write file2 error");
                    goto err;
                }
                remain_num -= write_num;
                start_pos += write_num;
            }
        }
    }
    if(-1 == read_num) {
        perror("read file1 error");
        goto err;
    }
    rev = 0;
err:
    if(buffer) free(buffer);
    close(fd1);
    close(fd2);
    return rev;
}

int main(int argc, char *argv[])
{
    if(argc < 3) {
        printf("Usage: %s file1 file2\n", argv[0]);
        return -1;
    }
    my_cp(argv[1], argv[2]);
    return 0;
}
  • Test Results
^_^$ ./my_cp holesize2048 holesize2048.cp
pagesize: 4096
st.st_blocks: 128
st.st_blksize: 4096
st.st_size: 65536
holesize2048 is not a sparse-block file!
chen@ubuntu18:~/study/apue.3e/exercises/4
^_^$ ./my_cp holesize4096 holesize4096.cp
pagesize: 4096
st.st_blocks: 72
st.st_blksize: 4096
st.st_size: 65536
holesize4096 is a sparse-block file!

^_^$ ll -s
total 1708
64 -rw-r--r-- 1 chen chen  65536 1月   6 17:27 holesize2048
64 -rw-r--r-- 1 chen chen  65536 1月   6 17:27 holesize2048.cp
36 -rw-r--r-- 1 chen chen  65536 1月   6 17:27 holesize4096
32 -rw-r--r-- 1 chen chen  65536 1月   6 17:27 holesize4096.cp

Empty files can be normal copies

Try optimizer

The above procedure is limited only minimal voids utilized in determining whether the file contains voids. In the actual reading and writing is not using this property.

Such short as 0 bytes will be empty, resulting in an increase in the number of system calls, performance degradation

To optimize performance, we must further explore the empty properties. Under what circumstances was to create a hole (not actually occupied blocks of disk space)?

  • Test program source code

This program creates three files:

- 文件1先`write`了1K的非零数据,然后`lseek` 7K-1字节。循环2次。
- 文件2先`write`了1K的非零数据,然后`lseek` 7K字节。循环2次
- 文件3先`write`了1K的非零数据,然后`lseek` 7K+1字节。循环2次
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

int holesize[]={4096};
int filesize = 64*1024;

int main()
{
    int i = 0;
    int count = 0;
    int ret = 0, fd1 = 0, fd2 = 0, fd3 = 0;
    char filename1[32]={0};
    char filename2[32]={0};
    char filename3[32]={0};
    unsigned char buf[32*1024]={0};
    memset(buf, 1, 32*1024);
    for (; i< sizeof(holesize)/ sizeof(int); ++i) {
        count = 0;
        memset(filename1, 0, 32);
        memset(filename2, 0, 32);
        memset(filename3, 0, 32);
        sprintf(filename1, "%s%d-1", "holesize", holesize[i]);
        sprintf(filename2, "%s%d-2", "holesize", holesize[i]);
        sprintf(filename3, "%s%d-3", "holesize", holesize[i]);
        fd1 = open(filename1, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
        fd2 = open(filename2, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
        fd3 = open(filename3, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
        if(fd1 < 0 || fd2 < 0 || fd3 < 0) {
            printf("open file fail\n");
            return -1;
        }
        count = 0;
        while(count < 2) {
            int remain = holesize[i] * 1 / 4;
            while(remain) {
                ret = write(fd1, buf, remain);
                if(ret < 0 ) {
                    perror("write fail\n");
                    return -1;
                }
                remain -= ret;
            }
            ret = lseek(fd1, holesize[i] * 7 / 4 - 1, SEEK_CUR);
            if(ret < 0) {
                printf("lseek fail\n");
                return -1;
            }
            ++count;
        }
        count = 0;
        while(count < 2) {
            int remain = holesize[i] * 1 / 4;
            while(remain) {
                ret = write(fd2, buf, remain);
                if(ret < 0 ) {
                    perror("write fail\n");
                    return -1;
                }
                remain -= ret;
            }
            ret = lseek(fd2, holesize[i] * 7 / 4, SEEK_CUR);
            if(ret < 0) {
                printf("lseek fail\n");
                return -1;
            }
            ++count;
        }
        count = 0;
        while(count < 2) {
            int remain = holesize[i] * 1 / 4;
            while(remain) {
                ret = write(fd3, buf, remain);
                if(ret < 0 ) {
                    perror("write fail\n");
                    return -1;
                }
                remain -= ret;
            }
            ret = lseek(fd3, holesize[i] * 7 / 4 + 1, SEEK_CUR);
            if(ret < 0) {
                printf("lseek fail\n");
                return -1;
            }
            ++count;
        }
        close(fd1);
        close(fd2);
        close(fd3);
    }
    return 0;
}
  • Test Results
^_^$ ll -s
12 -rw-r--r-- 1 chen chen  9215 1月   6 15:07 holesize4096-1
 8 -rw-r--r-- 1 chen chen  9216 1月   6 15:07 holesize4096-2
 8 -rw-r--r-- 1 chen chen  9217 1月   6 15:07 holesize4096-3

Visible hole must be measured from a starting position, and is equal to or more than pagesize, did not take up actual disk space

After optimization program

  • Source
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/errno.h>

ssize_t read_ex(int fd, void *buf, size_t nbyte){
    size_t read_remain = nbyte;
    unsigned char *read_start = (unsigned char*)buf;
    ssize_t read_num = -1;
    ssize_t total_num = 0;
    while(read_remain) {
        read_num = read(fd, read_start, read_remain);
        if(-1 == read_num){
            return -1;
        }
        else if(0 == read_num){
            break;
        }
        else{
            read_remain -= read_num;
            read_start += read_num;
            total_num += read_num;
        }
    }
    return total_num;
}

ssize_t write_ex(int fd, const void *buf, size_t nbyte){
    size_t write_remain = nbyte;
    unsigned char *write_start = (unsigned char*)buf;
    ssize_t write_num = -1;
    ssize_t total_num = 0;
    while(write_remain) {
        write_num = write(fd, write_start, write_remain);
        if(-1 == write_num){
            return -1;
        }
        else{
            write_remain -= write_num;
            write_start += write_num;
            total_num += write_num;
        }
    }
    return total_num;
}
int my_cp(const char *from, const char *to)
{
    int fd1 = -1, fd2 = -1;
    int rev = -1;
    unsigned char *buffer = NULL, *buffer_zero = NULL;
    long pagesize = 0;
    long long blocks, blksize, size;
    int read_num, write_num, write_remain, have_holes = 0;
    struct stat st;

    fd1 = open(from, O_RDONLY);
    if(-1 == fd1){
        perror("open file1 faild");
        goto err;
    }

    if(fstat(fd1, &st) !=0) {
        perror("fstat: ");
        goto err;
    }
    else{
#ifdef _SC_PAGESIZE
        pagesize = sysconf(_SC_PAGESIZE);
        if (pagesize < 0) {
            if (errno != 0) {
                if (errno == EINVAL) {
                    fputs(" (not supported)\n", stdout);
                    pagesize = st.st_blksize;
                }
                else {
                    perror("sysconf error");
                    goto err;
                }
            } else {
                fputs(" (no limit)\n", stdout);
                pagesize = st.st_blksize;
            }
        }
        printf("pagesize: %ld\n", pagesize);
#else
        pagesize = st.st_blksize;
#endif
        blocks = st.st_blocks;
        blksize = st.st_blksize;
        size = st.st_size;
        printf("st.st_blocks: %lld\n", blocks);
        printf("st.st_blksize: %lld\n", blksize);
        printf("st.st_size: %lld\n", size);
        /*块大小512,在不同平台上可能不兼容*/
        if(S_ISREG(st.st_mode) && (size / pagesize + (size%pagesize?1:0)) * pagesize > 512 * blocks) {
            have_holes = 1;
            printf("%s is a sparse-block file!\n", from);
        } else{
            have_holes = 0;
            printf("%s is not a sparse-block file!\n", from);
        }
    }
    buffer = malloc(pagesize);
    buffer_zero = malloc(pagesize);
    if(buffer == NULL || buffer_zero == NULL) {
        perror ("malloc fail");
        goto err;
    }
    memset(buffer, '\0', pagesize);
    memset(buffer_zero, '\0', pagesize);

    fd2 = open(to, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
    if (-1 == fd2) {
        perror ("open file2 faild");
        goto err;
    }

    while((read_num = read_ex(fd1, buffer, pagesize)) > 0) {
        /* 读取到空洞 */
        if(have_holes && !memcmp(buffer_zero, buffer, read_num)){
            if(-1 == lseek(fd2, read_num, SEEK_CUR)){
                perror("lseek file2 fail");
                goto err;
            }
        }
        /* 非空洞 */
        else{
            write_num = write_ex(fd2, buffer, read_num);
            if (-1 == write_num){
                perror( "write file2 error");
                goto err;
            }
        }
    }
    if(-1 == read_num){
        perror("read file1 error");
        goto err;
    }
    rev = 0;
err:
    if(buffer) free(buffer);
    if(buffer_zero) free(buffer_zero);
    close(fd1);
    close(fd2);
    return rev;
}

int main(int argc, char *argv[])
{
    if(argc < 3) {
        printf("Usage: %s file1 file2\n", argv[0]);
        return -1;
    }
    my_cp(argv[1], argv[2]);
    return 0;
}
  • Comparison test

Construct a file, in addition to the beginning of a hole, the remaining data of 100,000 repetitions 0x00,0x01

10,000 copies of the document with the program before optimization, about 2000s

10,000 copies of the document with the program after optimization, approximately 30s

Guess you like

Origin www.cnblogs.com/logchen/p/12157828.html