Linux system programming (2): files and directories

References

1. File storage

  • A file mainly consists of two parts, dentry (directory entry) and Inode
  • The so-called deletion of files means deleting the Inode, but the data is actually still on the hard disk and will be overwritten in the future.

Insert image description here

1.1 Inode

  • Its essence is a structure that stores the attribute information of the file. Such as: permissions, type, size, time, user, disk location
  • Inode is also called the file attribute management structure. Most Inodes are stored on the disk.
  • A small number of commonly used and recently used Inodes will be cached in memory.

1.2 Directory entry (dentry)

  • The essence of a directory item is still a structure. There are two important member variables {file name, Inode,...}, and the file content (data) is stored in the disk block.

2. File system

2.1 Functions stat, fstat, fstatat and lstat

#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

int stat(const char *pathname, struct stat *buf);
int fstat(int fd, struct stat *buf);
int lstat(const char *pathname, struct stat *buf);
int fstatat(int fd, const char *pathname, struct stat *buf, int flag);
  • function return value

    • Return 0 on success
    • Return -1 on error
  • Once pathname is given

    • The stat function will return an information structure related to this named file
    • The fstat function obtains information about files that have been opened on descriptor fd
    • The lstat function is similar to stat, but when the named file is a symbolic link, lstat returns information about the symbolic link rather than the file referenced by the symbolic link.
      • stat will get the attributes of the file or directory pointed to by the symbolic link. If you don’t want to penetrate the symbols, use lstat.
    • The fstatat function returns file statistics for a pathname relative to the currently open directory pointed to by the fd parameter.
      • The flag parameter controls whether a symbolic link is followed. When the AT_SYMLINK_NOFOLLOW flag is set, fstatat does not follow the symbolic link, but returns information about the symbolic link itself; otherwise, the default is to return information about the actual file pointed to by the symbolic link.
      • If the value of the fd parameter is AT_FDCWD and the pathname parameter is a relative pathname, fstatat will evaluate the pathname parameter relative to the current directory; if pathname is an absolute path, the fd parameter will be ignored
  • The second parameter buf is a pointer, which stores file attributes and is an Inode structure pointer . Its basic form is as follows

    struct stat {
          
          
        dev_t     st_dev;         /* ID of device containing file */
        ino_t     st_ino;         /* Inode number */
        mode_t    st_mode;        /* File type and mode */
        nlink_t   st_nlink;       /* Number of hard links */
        uid_t     st_uid;         /* User ID of owner */
        gid_t     st_gid;         /* Group ID of owner */
        dev_t     st_rdev;        /* Device ID (if special file) */
        off_t     st_size;        /* Total size, in bytes */
        blksize_t st_blksize;     /* Block size for filesystem I/O */
        blkcnt_t  st_blocks;      /* Number of 512B blocks allocated */
        struct timespec st_atim;  /* Time of last access */
        struct timespec st_mtim;  /* Time of last modification */
        struct timespec st_ctim;  /* Time of last status change */
    };
    

Case

  • Get file size: st_size

    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <sys/stat.h>
    
    int main(int argc, char* argv[]) {
          
          
        struct stat sbuf;
    
        int ret = stat(argv[1], &sbuf);
        if (ret == -1) {
          
          
            perror("stat error");
            exit(1);
        }
    
        printf("file size: %ld\n", sbuf.st_size);
    
        return 0;
    }
    
  • file permission bits
    Insert image description here

2.2 File types

  • Most files on a UNIX system are ordinary files or directories, but there are also other file types. File types include the following
    • regular file
      • Contains some form of data. Whether this data is text or binary data makes no difference to the UNIX kernel
    • directory file
      • Contains the names of other files and pointers to information about those files
      • Any process with read permissions on a directory file can read the contents of the directory, but only the kernel can write to the directory file directly.
    • block special file
      • Provides buffered access to a device (such as a disk) with a fixed length per access
    • character special file
      • Provides unbuffered access to the device, with each access being of variable length
      • All devices in the system are either character special files or block special files
    • Pipe FIFO
      • Used for inter-process communication, sometimes called named pipes
    • socket
      • Used for network communication between processes , and can also be used for non-network communication between processes on a host machine
    • symbolic link
      • This type of file points to another file

Insert image description here

Case

  • Get file type/permissions: st_mode
    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <sys/stat.h>
    
    int main(int argc, char* argv[]) {
          
          
        struct stat sbuf;
    
        // stat 会穿透符号链接,导致无法判断符号链接
        // int ret = stat(argv[1], &sbuf);
        int ret = lstat(argv[1], &sbuf);
        if (ret == -1) {
          
          
            perror("stat error");
            exit(1);
        }
    
        if (S_ISREG(sbuf.st_mode)) {
          
          
            printf("It's a regular\n");
        } else if (S_ISDIR(sbuf.st_mode)) {
          
          
            printf("It's a dir\n");
        } else if (S_ISFIFO(sbuf.st_mode)) {
          
          
            printf("It's a pipe\n");
        } else if (S_ISLNK(sbuf.st_mode)) {
          
          
            printf("It's a sym link\n");
        }
    
        return 0;
    }
    

The ls -l command does not penetrate symbolic links; the cat and vim commands do.

2.3 Set user ID and set group ID

  • There are 6 or more IDs associated with a process
    • The real user ID and real group ID identify who we are. These two fields are taken from the login entries in the password file at login time. Normally, these values ​​do not change during a login session, but the superuser process has methods to change them.
    • Effective user ID, effective group ID, and affiliated group ID determine file access permissions
    • Saved Settings User ID and Saved Settings Group ID contain a copy of the effective user ID and effective group ID when executing a program

Insert image description here

  • Typically, the effective user ID is equal to the actual user ID, and the effective group ID is equal to the actual group ID.
  • Each file has an owner and group owner
    • The owner is specified by st_uid in the stat structure
    • The group owner is specified by st_gid

2.4 File access permissions

  • The st_mode value also contains the access rights bits to the file. All file types (directories, character special files, etc.) have access rights

  • Each file has 9 access rights bits

    • Use u to represent the user (owner), g to represent the group, and o to represent others.

Insert image description here

  • File access rules
    • When opening a file of any type with a name, you should have execute permission for every directory contained in the name (including the implicit current working directory)
      • For example, to open the file /usr/include/stdio.h, you need execute permission on the directories /, /usr, and /usr/include
    • The read permission for a file determines whether an existing file can be opened for reading.
      • This is related to the O_RDONLY and O_RDWR flags of the open function
    • Write permissions for a file determine whether an existing file can be opened for writing
      • This is related to the O_WRONLY and O_RDWR flags of the open function
    • In order to specify the O_TRUNC flag on a file in the open function , you must have write permission on the file
    • In order to create a new file in a directory , you must have write and execute permissions on the directory
    • In order to delete an existing file , you must have write and execute permissions on the directory containing the file
      • You do not need to have read or write permissions on the file itself.

Every time a process opens, creates, or deletes a file, the kernel performs a file access test , and this test may involve the owner of the file (st_uid and st_gid), the effective ID of the process (effective user ID and effective group ID), and The process's affiliation group ID (if supported). The two owner IDs are properties of the file, while the two effective IDs and the affiliation group ID are properties of the process

2.5 Ownership of new files and directories

  • The user ID of the new file is set to the effective user ID of the process. Regarding group IDs, POSIX.1 allows implementations to select one of the following as the group ID for new files
    • The group ID of the new file can be the effective group ID of the process
    • The group ID of the new file can be the group ID of the directory in which it is located.

2.6 Function access and faccessat

#include <fcntl.h>
#include <unistd.h>

int access(const char *pathname, int mode);
int faccessat(int fd, const char *pathname, int mode, int flag);
  • Even though a process may already be running with superuser privileges by setting a user ID, it may still want to verify that its actual user can access a given file

  • function return value

    • If successful, return 0

    • If an error occurs, -1 is returned

    • The access and faccessat functions test access against actual user IDs and actual group IDs

  • If the test file already exists, mode is F_OK, otherwise mode is the bitwise OR of the constants listed in the figure below.

Insert image description here

  • The faccessat function is identical to the access function in the following two cases. Otherwise, faccessat calculates the pathname relative to the open directory pointed to by the fd parameter.

    • One is that the pathname parameter is an absolute path
    • The other is that the value of the fd parameter is AT_FDCWD and the pathname parameter is a relative path.
  • The flag parameter can be used to change the behavior of faccessat

    • If the flag is set to AT_EACCESS, the access check uses the effective user ID and effective group ID of the calling process instead of the actual user ID and actual group ID.

Case

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>

int main(int argc, char* argv[]) {
    
    
    if (argc != 2) {
    
    
        perror("usage: access <pathname>");
        exit(1);
    }

    if (access(argv[1], R_OK) < 0) {
    
    
        perror("access error");
        exit(1);
    } else {
    
    
        printf("read access OK\n");
    }

    if (open(argv[1], O_RDONLY) < 0) {
    
    
        perror("open error");
        exit(1);
    } else {
    
    
        printf("open for reading OK\n");
    }

    return 0;
}
$ gcc access.c -o access
$ ./access fcntl.c
read access OK
open for reading OK

2.7 Function umask

#include <sys/types.h>
#include <sys/stat.h>

// mask 取值见 2.4 节图
mode_t umask(mode_t mask);
  • The umask function creates a mask word for the process to set the file mode and returns the previous value. This is one of the few return functions that does not cause an error.

  • function return value

    • Previous file mode creates masked words
  • When a process creates a new file or directory, it will definitely use text mode to create screen characters.

    • Both the open and creat functions have a parameter mode, which specifies the access permission bits of the new file.

Case

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

#define RWRWRW (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)

int main(int argc, char* argv[]) {
    
    
    umask(0);
    if (creat("foo", RWRWRW) < 0) {
    
    
        perror("creat error for foo");
    }
    umask(S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
    if (creat("bar", RWRWRW) < 0) {
    
    
        perror("creat error for bar");
    }

    return 0;
}
$ umask      # 先打印当前文件模式创建屏蔽字
0002
$ gcc umask.c -o umask
$ ./umask
$ ls -l foo bar
-rw------- 1 yxd yxd 0 9月  14 08:53 bar
-rw-rw-rw- 1 yxd yxd 0 9月  14 08:53 foo
$ umask
0002
$ umask -S   # 观察文件模式创建屏蔽字是否更改
u=rwx,g=rwx,o=rx
$ umask 027  # 更改文件模式创建屏蔽字
$ umask -S
u=rwx,g=rx,o=
  • When writing a program that creates a new file, if you want to ensure that the specified access permission bit is activated, you must modify the umask value while the process is running. For example, if you want to ensure that any user can read the file, you should set umask to 0 . Otherwise, when the process is running, a valid umask value may turn off the permission bit
  • Changing the file mode of a process creates a mask without affecting the mask of its parent process (usually the shell)
  • The user can set the umask value to control the default permissions for created files. The value is expressed as an octal number. One bit represents a permission to be blocked. As shown in the figure below, after the corresponding bit is set, the corresponding permission will be denied.
    • Commonly used umask values ​​are 002, 022 and 027
      • 002 Prevent other users from writing to your files
      • 022 Prevent group members and other users from writing to your files
      • 027 Prevent members of the same group from writing to your files and other users from reading, writing, or executing your files

Insert image description here

2.8 Functions chmod, fchmod and fchmodat (change existing file access permissions)

#include <fcntl.h>
#include <sys/stat.h>

int chmod(const char *pathname, mode_t mode);
int fchmod(int fd, mode_t mode);
int fchmodat(int fd, const char *pathname, mode_t mode, int flag);
  • function return value

    • If successful, return 0
    • If an error occurs, -1 is returned
  • The chmod function operates on the specified file, while the fchmod function operates on the opened file.

  • To change the permission bits of a file

    • The effective user ID of the process must equal the owner ID of the file
    • Or the process must have superuser privileges
  • The parameter mode is the bitwise OR of the constants shown in the figure below

Insert image description here

Case

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

#define RWRWRW (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)

int main(int argc, char* argv[]) {
    
    
    struct stat statbuf;

    // 对于其当前状态设置权限:先调用 stat 获得其当前权限,然后修改它
    // 显式地打开设置组 ID 位、关闭了组执行位
    if (stat("foo", &statbuf) < 0) {
    
    
        perror("stat error for foo");
        exit(1);
    }
    if (chmod("foo", (statbuf.st_mode & ~S_IXGRP) | S_ISGID) < 0) {
    
    
        perror("chmod error for foo");
        exit(1);
    }

    // 不管文件 bar 的当前权限位如何,都将其权限设置为一个绝对值 rw-r--r--
    if (chmod("bar", S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) < 0) {
    
    
        perror("chmod error for bar");
    }

    return 0;
}
$ gcc chmod.c -o chmod
$ ./chmod
$ ls -l foo bar
-rw-r--r-- 1 yxd yxd 0 9月  14 08:53 bar
-rw-rwSrw- 1 yxd yxd 0 9月  14 08:53 foo

2.9 Adhesion bit

  • S_ISVTX is called the sticky bit
    • If this bit is set for an executable program file, then when the program is first executed, a copy of the program text is still saved in the swap area (the program text is the machine instructions) when it terminates. , which allows the program to be loaded into memory more quickly the next time it is executed
    • The directories /tmp and /var/tmp are typical candidates for setting the sticky bit : any user can create files in these two directories. The permissions for any user (user, group, and others) on these two directories are usually read, write, and execute. But users should not be able to delete or rename files belonging to other people, for which the sticky bit is set in the file mode of both directories

2.10 Functions chown, fchown, fchownat and lchown

  • The following chown functions can be used to change the user ID and group ID of a file

    #include <fcntl.h>
    #include <unistd.h>
    
    // 如果两个参数 owner 或 group 中的任意一个是 -1,则对应的 ID 不变
    int chown(const char *pathname, uid_t owner, gid_t group);
    int fchown(int fd, uid_t owner, gid_t group);
    int lchown(const char *pathname, uid_t owner, gid_t group);
    
    int fchownat(int fd, const char *pathname, uid_t owner, gid_t group, int flag);
    
  • function return value

    • If successful, return 0
    • If an error occurs, -1 is returned
  • The operations of these four functions are similar except that the referenced files are symbolic links.

    • In the case of symbolic links, lchown and fchownat (with the AT_SYMLINK NOFOLLOW flag set) change the owner of the symbolic link itself, not the owner of the file that the symbolic link points to.
    • The fchown function changes the owner of the open file pointed to by the fd parameter. Since it operates on an already open file, it cannot be used to change the owner of a symbolic link.

2.11 File length

  • The stat structure member st_size represents the length of the file in bytes.

    • This field is only meaningful for ordinary files, directory files and symbolic links
  • For ordinary files, the file length can be 0. When you start reading such a file, you will get an end-of-file indication.

  • For directories, the file length is usually an integral multiple of a number (such as 16 or 512)

  • For symbolic links, the file length is the actual number of bytes in the file name

    • Because the symbolic link file length is always indicated by st_size, it does not contain the null byte that is usually used by the C language to terminate the name.
  • File hole

    • Holes are caused by setting an offset beyond the end of the file and writing some data.

2.12 File truncation

  • Sometimes it is necessary to truncate some data at the end of the file to shorten the file
    • Truncating the length of a file to 0 is a special case and can be done using the O_TRUNC flag when opening the file.
    • To truncate the file you can call the functions truncate and ftruncate
#include <unistd.h>
#include <sys/types.h>

int truncate(const char *path, off_t length);
int ftruncate(int fd, off_t length);
  • return value

    • If successful, return 0
    • If an error occurs, -1 is returned
  • These two functions truncate the length of an existing file to length

    • If the previous length of the file is greater than length, data beyond length cannot be accessed.
    • If the previous length was less than length, the file length will be increased and the data between the previous end of the file and the new end of the file will read as 0 (i.e. a hole may be created in the file )

2.13 File system

  • A disk can be divided into one or more partitions. Each partition can contain a file system, and inodes are fixed-length record entries that contain most of the information about the file.

Insert image description here

  • The i-node and data block portion of a cylinder group
    • There is a link count in each i-node, whose value is the number of directory entries pointing to that i-node. Only when the link count is reduced to 0 can the file be deleted (that is, the data blocks occupied by the file can be released)
      • This is why "unlinking a file" does not always mean "freeing the disk blocks occupied by the file"
      • This is why the function to delete a directory entry is called unlink instead of delete.
    • Another type of link is called a symbolic link. The actual contents of the symbolic link file (in the data block) contain the name of the file pointed to by the symbolic link
    • The i node contains all information about the file: file type, file access permission bits, file length and pointers to file data blocks, etc. Most of the information in the stat structure is taken from i nodes. Only two important pieces of data are stored in directory entries: file names and i-node numbers.
    • Because the i-node number in a directory entry points to the corresponding i-node in the same file system, a directory entry cannot point to an i-node in another file system.
    • When you rename a file without changing the file system, the actual contents of the file are not moved; only a new directory entry is constructed pointing to the existing i-node and the old directory entry is deleted . Link count will not change
      • Example: Rename the file /usr/lib/foo to /usr/foo. If /usr/lib and /usr are in the same file system, the contents of the file foo do not need to be moved.

Insert image description here

2.14 Functions link, linkat, unlink, unlinkat and remove

Why should directory entries be kept outside the inode and file names stored separately? What are the benefits of this storage method?

  • Its purpose is to achieve file sharing . Linux allows multiple directory entries to share an inode, that is, shared disk block (data)
  • Different file names are interpreted as two files in human eyes, but they are the same file in the eyes of the kernel.

2.14.1 Function link, linkat

  • The way to create a link to an existing file is to use the link function or the linkat function
    #include <fcntl.h>
    #include <unistd.h>
    
    int link(const char *oldpath, const char *newpath);
    int linkat(int oldfd, const char *oldpath, int newfd, const char *newpath, int flag);
    
  • These two functions create a new directory entry newpath which references the existing file oldpath
    • If newpath already exists, an error is returned. Only create the last component in newpath, the rest of the path should already exist
  • return value
    • If successful, return 0
    • If an error occurs, -1 is returned
  • When the existing file is a symbolic link, the flag parameter controls whether the linkat function creates a link to the existing symbolic link or to the file pointed to by the existing symbolic link.

Case

  • Implement mv command
    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <fcntl.h>
    #include <sys/stat.h>
    
    int main(int argc, char* argv[]) {
          
          
        link (argv[1], argv[2]);
        unlink(argv[1]);    
    
        return 0;
    }
    

2.14.2 Functions unlink and unlinkat

  • To delete an existing directory entry, call the unlink function
#include <fcntl.h>
#include <unistd.h>

int unlink(const char *pathname);
int unlinkat(int fd, const char *pathname, int flag);
  • These two functions delete the directory entry and decrement the link count of the file referenced by pathname by 1
    • If there are other links to the file, the file's data can still be accessed through the other links
    • If an error occurs, no changes are made to the file
  • return value
    • If successful, return 0
    • If an error occurs, -1 is returned
  • The flag parameter provides a way for the calling process to change the default behavior of the unlinkat function.
    • When the AT_REMOVEDIR flag is set, the unlinkat function can delete a directory similar to rmdir.
    • If this flag is cleared, unlinkat performs the same operation as unlink

Only when the link count reaches 0 can the contents of the file be deleted . Another condition also prevents deletion of the file's contents: as long as a process has the file open, its contents cannot be deleted . When closing a file, the kernel first checks the number of processes that have the file open. If this count reaches 0, the kernel then checks its link count. If the count is also 0, then the contents of the file are deleted.

  • This feature of unlink is often used by programs to ensure that even if the program crashes, the temporary files it creates will not be left behind.
    • The process creates a file with open or creat and then immediately calls unlink. Because the file is still open, its contents will not be deleted. The contents of the file are deleted only when the process closes the file or terminates (in which case the kernel closes all files opened by the process)
    • Deleting a file, in a sense, just makes the file ready for release
    • Characteristics of the unlink function: When clearing a file, if the hard link count of the file reaches 0, there will be no corresponding dentry, but the file will not be released immediately. The system will wait until all processes that have the file open close the file before the system takes time to release the file.

Implicit recycling : When a process ends, all files opened by the process will be closed and the memory space requested will be released. This feature of the system is called implicit recycling of system resources.

Case

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

int main(int argc, char* argv[]) {
    
    
    int fd, ret;
    char* p = "test of unlink\n";
    char* p2 = "after write something.\n";

    fd = open("lseek.txt", O_RDWR | O_CREAT | O_TRUNC, 0644);
    if (fd < 0) {
    
    
        perror("open temp error");
        exit(1);
    }

    ret = unlink("lseek.txt");
    if (ret < 0) {
    
    
        perror("unlink error");
        exit(1);
    }

    // 此处的 write 实际是把内容写到了缓冲区而非磁盘区
    ret = write(fd, p, strlen(p));
    if (ret == -1) {
    
    
        perror("-----write error");
    }
    printf("hi! I'm printf\n");

    ret = write(fd, p2, strlen(p2));
    if (ret == -1) {
    
    
        perror("-----write error");
    }
    printf("Enter anykey continue\n");
    getchar();

    p[3] = 'H';

    close(fd);

    return 0;
}

2.14.3 Function remove

#include <stdio.h>

int remove(const char* pathname);
  • You can use the remove function to unlink a file or directory.
    • For files, remove has the same function as unlink
    • For directories, remove has the same function as rmdir

2.15 Functions rename and renameat

  • Files or directories can be renamed using the rename function or the renameat function.
#include <fcntl.h> 
#include <stdio.h>

int rename(const char *oldpath, const char *newpath);
int renameat(int oldfd, const char *oldpath, int newfd, const char *newpath);
  • If oldname refers to a file, rename the file or symbolic link

    • newname cannot reference a directory if it already exists
    • If newname already exists and is not a directory, delete the directory entry and then rename oldname to newname.
    • The calling process must have write permissions on the directory containing oldname and on the directory containing newname because changes will be made to both directories
  • If oldname refers to a directory, rename the directory

    • If newname already exists, it must refer to a directory, and the directory should be an empty directory (an empty directory means that there are only . and... entries in the directory)
    • If newname exists (and is an empty directory), delete it first, and then rename oldname to newname
    • When renaming a directory, newname cannot contain oldname as its path prefix
      • Example: /usr/foo cannot be renamed to /usr/foo/testdir because the old name (/usr/foo) is the path prefix of the new name and cannot be deleted.
  • . and ... cannot be renamed. More precisely, neither . nor ... can appear in the last part of oldname and newname.

  • As a special case, if oldname and newname refer to the same file, the function returns successfully without any changes.

2.16 Symbolic links

  • A symbolic link is an indirect pointer to a file. It is different from the hard link described in the previous section. The hard link directly points to the i node of the file.

  • The reason for introducing symbolic links is to avoid some limitations of hard links

    • Hard links usually require that the link and file are on the same file system
    • Only superuser can create hard links to directories (if the underlying file system supports it)
  • There are no file system restrictions on symbolic links and what objects they point to. Any user can create a symbolic link to a directory. Symbolic links are typically used to move a file or an entire directory structure to another location on the system.

Case

  • Using symbolic links may introduce cycles in the file system . Most functions that look up pathnames will return an error with an errno value of ELOOP when this occurs. Consider the following sequence of commands:
    $ mkdir foo                 # 创建一个新目录
    $ touch foo/a               # 创建一个 0 长度的文件
    $ ln -s ../foo foo/testdir  # 创建一个符号链接
    $ ls -l foo
    total 0
    -rw-rw-r-- 1 yxd yxd 0 9月  14 15:28 a
    lrwxrwxrwx 1 yxd yxd 6 9月  14 15:28 testdir -> ../foo
    
  • The above command creates a directory foo, which contains a file named a and a symbolic link pointing to foo
    • Symbolic link testdir that forms a loop
  • Such a cycle is easily eliminated
    • Because unlink does not follow symbolic links, you can unlink the file foo/testdir
  • But if a hard link is created that forms such a loop, then it will be difficult to eliminate it
    • This is why the link function does not allow the construction of hard links to directories (unless the process has superuser privileges)

Insert image description here

  • When opening a file with open, if the pathname passed to the open function specifies a symbolic link, open follows the link to the specified file. The file pointed to by this symbolic link does not exist, and open returns an error, indicating that it cannot open the file.
    $ ln -s /no/such/file myfile   # 创建一个符号链接
    $ ls myfile
    myfile
    $ cat myfile                   # 试图查看文件
    cat: myfile: No such file or directory
    $ ls -l myfile
    lrwxrwxrwx 1 yxd yxd 13 9月  14 15:37 myfile -> /no/such/file
    
    • The file myfile exists, but cat says it does not exist. The reason is that myfile is a symbolic link and the file pointed to by the symbolic link does not exist.
    • The -l option of the ls command has two prompts
      • The first character is l, which indicates that this is a symbolic link, and -> also indicates that this is a symbolic link
    • The ls command has another option -F
      • It appends an @ symbol to the end of the symbolic link's file name, which can help identify the symbolic link when the -l option is not used.

2.17 Creating and reading symbolic links

  • A symbolic link can be created using the symlink or symlinkat function
    #include <fcntl.h>
    #include <unistd.h>
    
    int symlink(const char *target, const char *linkpath);
    int symlinkat(const char *target, int newdirfd, const char *linkpath);
    
  • return value
    • If successful, return 0
    • If an error occurs, -1 is returned
  • Because the open function follows a symbolic link, there needs to be a way to open the link itself and read the names in the link . The readlink and readlinkat functions provide this functionality.
    #include <fcntl.h>
    #include <unistd.h>
    
    ssize_t readlink(const char *pathname, char *buf, size_t bufsiz);
    ssize_t readlinkat(int fd, const char *pathname, char *buf, size_t bufsiz);
    

2.18 Functions mkdir, mkdirat and rmdir

2.18.1 Functions mkdir, mkdirat (create directory)

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>

int mkdir(const char *pathname, mode_t mode);
int mkdirat(int fd, const char *pathname, mode_t mode);
  • return value
    • If successful, return 0
    • If an error occurs, -1 is returned
  • These two functions create a new empty directory
    • Among them, the . and ... directory entries are automatically created. The specified file access permission mode is modified by the process's file mode creation mask.
    • A common mistake is to specify the same mode as the file (only read and write permissions are specified). However, directories usually have at least one execute permission bit set to allow access to file names in the directory.

2.18.2 Function rmdir (delete directory)

#include <unistd.h>

int rmdir(const char *pathname);
  • return value

    • If successful, return 0
    • If an error occurs, -1 is returned
  • Use the rmdir function to delete an empty directory

    • An empty directory is a directory containing only . and...
  • If this function is called so that the directory's link count becomes 0, and no other process has this directory open, the space occupied by this directory is released.

  • If one or more processes have this directory open when the link count reaches 0, the last link and the . and... entries are deleted before this function returns.

  • Additionally, no new files can be created in this directory. But this directory is not released until the last process closes it

2.18.3 File and directory permissions

  • A directory file is also a "file", and its file content is the directory entry dentry of all subfiles in the directory . You can try to open a directory with vim

Insert image description here

2.19 Reading directory

  • Any user with access to a directory can read the directory, but to prevent file system clutter, only the kernel can write to the directory.
    #include <sys/types.h>
    #include <dirent.h>
    
    // 1、打开目录
    // 若成功,返回目录结构体指针;若出错,返回 NULL
    // DIR* 类似于 FILE*
    DIR *opendir(const char *name);
    DIR *fdopendir(int fd);
    
    // 2、读目录
    // 若成功,返回目录项结构体指针;若在目录尾或出错,返回 NULL,设置 errno 为相应值
    struct dirent *readdir(DIR *dirp);
    
    // 3、关闭目录
    // 若成功,返回 0;若出错,返回 -1,设置 errno 为相应值
    int closedir(DIR *dirp);
    
  • The dirent structure defined in the header file <dirent.h> is implementation dependent. The implementation's definition of this structure contains at least the following two members
    ino_t d_ino;       // inode 编号
    char d_name[256]   // 文件名
    

Case

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <dirent.h>

int main(int argc, char* argv[]) {
    
    
    DIR* dp;
    struct dirent* sdp;

    dp = opendir(argv[1]);
    if (dp == NULL) {
    
    
        perror("opendir error");
        exit(1);
    }

    while ((sdp = readdir(dp)) != NULL) {
    
    
        if ((strcmp(sdp->d_name, ".") == 0)) {
    
    
            continue;
        }
        printf("%s\t", sdp->d_name);
    }
    printf("\n");

    closedir(dp);

    return 0;
}
$ gcc myls.c -o myls
$ ./myls ..   # 与 ls .. 命令等价

2.20 Functions chdir, fchdir and getcwd

2.20.1 Function chdir, fchdir

  • Each process has a current working directory, which is the starting point for searching all relative pathnames (pathnames that do not start with a slash are relative pathnames). The current working directory is an attribute of the process, and the starting directory is an attribute of the login name.

  • The process calls the chdir or fchdir function to change the current working directory

    #include <unistd.h>
    
    int chdir(const char *path);
    int fchdir(int fd);
    
  • return value

    • If successful, return 0
    • If an error occurs, -1 is returned
  • Because the current working directory is an attribute of the process, it only affects the process itself that calls chdir and does not affect other processes.

2.20.2 Function getcwd

  • function getcwd function

    • Starting from the current working directory (.), use... to find its upper-level directory, and then read its directory entry until the i-node number in the directory entry is the same as the i-node number in the working directory, thus finding its corresponding file name
    • According to this method, move up layer by layer until you encounter the root. In this way, you will get the complete absolute path name of the current working directory.
    #include <unistd.h>
    
    char* getcwd(char* buf, size_t size);
    
  • return value

    • If successful, return buf
    • If an error occurs, NULL is returned
  • Two parameters must be passed to this function, one is the buffer address buf, and the other is the length of the buffer size (in bytes). The buffer must be long enough to hold the absolute pathname plus a terminating null byte, otherwise an error is returned

  • The getcwd function is useful when an application needs to return to the starting point of its work in the file system

    • Before changing the working directory, you can call the getcwd function to save it first. After completing the processing, the saved original working directory path name can be passed to chdir as a calling parameter, thus returning to the starting point in the file system

2.21 Device special files

  • st_dev and st_rdev are two fields that often cause confusion
    • The storage device on which each file system resides is represented by its major and minor device numbers.
      • The data type used for the device number is the basic system data type dev_t
      • The major device number identifies the device driver
      • The minor device number identifies a specific subdevice
    • You can usually use two macros: major and minor to access the major and minor device numbers.
    • The st_dev value associated with each file name in the system is the device number of the file system that contains the file name and its corresponding i node
    • Only character special files and block special files have the st_rdev value , which contains the device number of the actual device.

Recursive directory traversal case

1. Idea analysis

Insert image description here

2. Code implementation

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <dirent.h>

void isFile(char* name);

// 打开目录读取,处理目录
void read_dir(char* dir, void (*func)(char*)) {
    
    
    char path[256];
    DIR* dp;
    struct dirent *sdp;

    dp = opendir(dir);
    if (dp == NULL) {
    
    
        perror("opendir error");
        return;
    }

    // 读取目录项
    while ((sdp = readdir(dp)) != NULL) {
    
    
        if (strcmp(sdp->d_name, ".") == 0 || strcmp(sdp->d_name, "..") == 0) {
    
    
            continue;
        }

        // 目录项本身不可访问, 拼接 目录/目录项
        sprintf(path, "%s/%s", dir, sdp->d_name);

        // 判断文件类型,目录递归进入,文件显示名字/大小
        (*func)(path);
    }
    closedir(dp);

    return;
}

void isFile(char* name) {
    
    
    int ret = 0;
    struct stat sub;

    // 获取文件属性, 判断文件类型
    ret = stat(name, &sub);
    if (ret == -1) {
    
    
        perror("stat error");
        return;
    }

    // 是目录文件
    if (S_ISDIR(sub.st_mode)) {
    
    
        read_dir(name, isFile);
    }

    // 是普通文件, 直接打印名字/大小
    printf("%10s\t\t%ld\n", name, sub.st_size);

    return;
}

int main(int argc, char* argv[]) {
    
    
    // 命令行参数个数 argc = ---→ ./ls-R
    // 命令行参数列表 argv[1]---→ ./ls-R /home/test
    if (argc == 1) {
    
      // 判断命令行参数
        isFile(".");
    } else {
    
    
        isFile(argv[1]);
    }

    return 0;
}
$ gcc ls-R.c -o ls-R
$ ./ls-R 
   ./fcntl		8384
 ./mycat.c		262
  ./ls-R.c		943
  ./fcntl2		8432
    ./ls-R		8768
         .		4096

Guess you like

Origin blog.csdn.net/qq_42994487/article/details/132910626