linux内核源码研究--文件系统VFS层实现

文件系统的分类:

  • 磁盘文件系统
  • 内存虚拟文件系
  • 统网络文件系统

通用的模型VFS对上(用户)解决了各种不同文件系统的统计接口问题,同时对下(具体的文件系统)兼容各种新的文件系统,让linux变得更加强大灵活

                                                                                      VFS的架构图

实现思路:

  1. 定义一个最小的通用模型
  1. 定义一个强大的模型,实现都可以选择性的实现(ext2->VFS)

下面是通过inode查找一个文件的过程(实际的实现过程中会有缓存来加速文件查找)

 链接

  1. 软链接:有一个目录项,指向独立INODE,INODE不存文件数据,存指向真实文件的路径,可以在不同分区
  1. 硬链接:有一个目录项,没有独立INODE,指向原始INODE,并且原始INODE的引用计数加1 ,不能在不同分区

编程接口

  1. 文件描述符(系统提供)
  2. 流操作(C库)

万物皆文件

  1. 字符和块设备文件
  2. 管道文件
  3. 用于所有网络协议的套接字(网络设备例外)
  4. 终端

VFS的主要数据结构

 

INODE操作:用来操作文件元数据和文件管理,创建文件,删除文件,重命名文件,创建链接

文件操作:用来操作数据的,如读写文件,操作文件位置,创建内存映射

 

struct inode {
        umode_t                i_mode;                   //访问权限
        unsigned short                i_opflags;
        kuid_t                        i_uid;                      //文件的创建用户UID
        kgid_t                        i_gid;                      //文件的创建用户GID
        unsigned int                i_flags;

#ifdef CONFIG_FS_POSIX_ACL
        struct posix_acl        *i_acl;
        struct posix_acl        *i_default_acl;
#endif

const struct inode_operations        *i_op;                                    //指向文件内容相关操作集合(C++对象方法指针)
        struct super_block        *i_sb;                                                  //文件所在的分区的超级块
        struct address_space        *i_mapping;                          //文件内存映射

#ifdef CONFIG_SECURITY
        void                        *i_security;
#endif

/* Stat data, not accessed from path walking */
        unsigned long                i_ino;                                            //INDOD的编号
        /*
         * Filesystems may only read i_nlink directly.  They shall use the
         * following functions for modification:
         *
         *    (set|clear|inc|drop)_nlink
         *    inode_(inc|dec)_link_count
         */
        union {
                const unsigned int i_nlink;                            //文件的硬链接数

                unsigned int __i_nlink;
        };
        dev_t                        i_rdev;                                        //指向存储的真实设备(dev_t代表)
        loff_t                        i_size;                                        //文件大小(以字节为单位)
        struct timespec                i_atime;                        //访问时间
        struct timespec                i_mtime;                        //修改时间
        struct timespec                i_ctime;                        //创建时间
        spinlock_t                i_lock;        /* i_blocks, i_bytes, maybe i_size */
        unsigned short        i_bytes;

        unsigned int                i_blkbits;
        blkcnt_t                i_blocks;                                      //文件大小(以块为单位,块的对应的字节数由文件系统格式化时指定)

#ifdef __NEED_I_SIZE_ORDERED
        seqcount_t                i_size_seqcount;
#endif

/* Misc */
        unsigned long                i_state;
        struct mutex                i_mutex;

        unsigned long                dirtied_when;        /* jiffies of first dirtying */
        unsigned long                dirtied_time_when;

        struct hlist_node        i_hash;                                                  //全局inode_hashtable的链接元素
        struct list_head        i_wb_list;        /* backing dev IO list */
        struct list_head        i_lru;                /* inode LRU list */
        struct list_head        i_sb_list;
        union {
                struct hlist_head        i_dentry;                               //包含的目录项(如果这个inode代表的是一个目录)
                struct rcu_head                i_rcu;
        };
        u64                        i_version;
        atomic_t                i_count;                                                //同时引用的进程数

        atomic_t                i_dio_count;
        atomic_t                i_writecount;
#ifdef CONFIG_IMA
        atomic_t                i_readcount; /* struct files open RO */
#endif
        const struct file_operations        *i_fop;        /* former ->i_op->default_file_ops */  //指向文件管理相关操作集合(C++对象方法指针)
        struct file_lock_context        *i_flctx;
        struct address_space        i_data;
        struct list_head        i_devices;                                               //用于特定设备同时对应多个inode文件时(chroot时)
        union {
                struct pipe_inode_info        *i_pipe;                      //用于指向管道数据结构
                struct block_device        *i_bdev;                                     //用于指向块设备专用数据结构
                struct cdev                *i_cdev;                                     //用于指向字符设备专用数据结构
        };

__u32                        i_generation;

#ifdef CONFIG_FSNOTIFY
        __u32                        i_fsnotify_mask; /* all events this inode cares about */
        struct hlist_head        i_fsnotify_marks;
#endif

void                        *i_private; /* fs or device private pointer */       //用于指向自定义数据结构(用的非常多)
};

 

struct dentry {
        /* RCU lookup touched fields */
        unsigned int d_flags;                /* protected by d_lock */                                               //flags是目录还是文件
        seqcount_t d_seq;                /* per dentry seqlock */
        struct hlist_bl_node d_hash;        /* lookup hash list */                                                     //用于全局dentry_hashtable冲突
        struct dentry *d_parent;        /* parent directory */                                                                   // 父目录
        struct qstr d_name;                                                                                                                     //长文件名
        struct inode *d_inode;                /* Where the name belongs to - NULL is                        // 文件的inode
                                         * negative */
        unsigned char d_iname[DNAME_INLINE_LEN];        /* small names */                                   //文件名(短名)

/* Ref lookup also touches following */
        struct lockref d_lockref;        /* per-dentry lock and refcount */
        const struct dentry_operations *d_op;                                                                             //目录项的操作方法集合
        struct super_block *d_sb;        /* The root of the dentry tree */                                //指向所属的文件系统的超级块实例
        unsigned long d_time;                /* used by d_revalidate */
        void *d_fsdata;                        /* fs-specific data */

        struct list_head d_lru;                /* LRU list */                                                //最近最少使用的列表
        struct list_head d_child;        /* child of parent list */                                             //兄弟目录
        struct list_head d_subdirs;        /* our children */                                                             //子目录
        /*
         * d_alias and d_rcu can share memory
         */
        union {
                struct hlist_node d_alias;        /* inode alias list */
                 struct rcu_head d_rcu;
        } d_u;
};

 

 

struct dentry_operations {
        int (*d_revalidate)(struct dentry *, unsigned int);                   //对网络文件系统非常有用,用于同步远端dentry是否还有效
        int (*d_weak_revalidate)(struct dentry *, unsigned int);
        int (*d_hash)(const struct dentry *, struct qstr *);                     //计算hash
        int (*d_compare)(const struct dentry *, const struct dentry *,     //比较对象的文件名

                        unsigned int, const char *, const struct qstr *);
        int (*d_delete)(const struct dentry *);                    //引用为0时调用

        void (*d_release)(struct dentry *);                         //引用计数减1
        void (*d_prune)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
        char *(*d_dname)(struct dentry *, char *, int);
        struct vfsmount *(*d_automount)(struct path *);
        int (*d_manage)(struct dentry *, bool);
} ____cacheline_aligned;

 

 

struct super_block {
        struct list_head        s_list;                /* Keep this first */        //打开的所有文件(struct file

        dev_t                        s_dev;                /* search index; _not_ kdev_t */     //对应的真实设备
        unsigned char                s_blocksize_bits;                                            //最大块大小(对数)
        unsigned long                s_blocksize;                                                     //最大的块大小

        loff_t                        s_maxbytes;        /* Max file size */          //最大的文件长度

        struct file_system_type        *s_type;                                              //指向文件系统类对象
        const struct super_operations        *s_op;                                   //super_block对象的操作方法
        
const struct dquot_operations        *dq_op;
        const struct quotactl_ops        *s_qcop;
        const struct export_operations *s_export_op;
        unsigned long                s_flags;
        unsigned long                s_magic;                                                           //文件系统的魔术字
        struct dentry                *s_root;                                                           //指向根目录(如果为空,则是伪文件系统仅内存可见)
        
struct rw_semaphore        s_umount;
        int                        s_count;
        atomic_t                s_active;
#ifdef CONFIG_SECURITY
        void                    *s_security;
#endif
        const struct xattr_handler **s_xattr;

        struct list_head        s_inodes;        /* all inodes */                      //管理的所有inode

        struct hlist_bl_head        s_anon;                /* anonymous dentries for (nfs) exporting */
        struct list_head        s_mounts;        /* list of mounts; _not_ for fs use */
        struct block_device        *s_bdev;                         //指向块设备结构体
        
struct backing_dev_info *s_bdi;
        struct mtd_info                *s_mtd;
        struct hlist_node        s_instances;                 //文件系统的fs_supers为链表头,把相同文件系统下的所有实例串联起来
        
unsigned int                s_quota_types;        /* Bitmask of supported quota types */
        struct quota_info        s_dquot;        /* Diskquota specific options */

        struct sb_writers        s_writers;

        char s_id[32];                                /* Informational name */
        u8 s_uuid[16];                                /* UUID */

        void                         *s_fs_info;        /* Filesystem private info */     //指向文件系统私有数据
        
unsigned int                s_max_links;
        fmode_t                        s_mode;

/* Granularity of c/m/atime in ns.
           Cannot be worse than a second */
        u32                   s_time_gran;

/*
         * The next field is for VFS *only*. No filesystems have any business
         * even looking at it. You had been warned.
         */
        struct mutex s_vfs_rename_mutex;        /* Kludge */

/*
         * Filesystem subtype.  If non-empty the filesystem type field
         * in /proc/mounts will be "type.subtype"
         */
        char *s_subtype;

/*
         * Saved mount options for lazy filesystems using
         * generic_show_options()
         */
        char __rcu *s_options;
        const struct dentry_operations *s_d_op; /* default d_op for dentries */

/*
         * Saved pool identifier for cleancache (-1 means none)
         */
        int cleancache_poolid;

              struct shrinker s_shrink;        /* per-sb shrinker handle */

/* Number of inodes with nlink == 0 but still referenced */
        atomic_long_t s_remove_count;

/* Being remounted read-only */
        int s_readonly_remount;

/* AIO completions deferred from interrupt context */
        struct workqueue_struct *s_dio_done_wq;
        struct hlist_head s_pins;

/*
         * Keep the lru lists last in the structure so they always sit on their
         * own individual cachelines.
         */
        struct list_lru                s_dentry_lru ____cacheline_aligned_in_smp;
        struct list_lru                s_inode_lru ____cacheline_aligned_in_smp;
        struct rcu_head                rcu;

/*
         * Indicates how deep in a filesystem stack this SB is
         */
        int s_stack_depth;
};

struct path {
        struct vfsmount *mnt;                 //挂载点
        struct dentry *dentry;                  //文件名和INODE的对应关系
};

struct file {
        union {
                struct llist_node        fu_llist;                           // 用于super_blocks_list, 保存所有已经打开的文件
                struct rcu_head         fu_rcuhead;
        } f_u;
        struct path                f_path;                                             //挂载点、文件名和INODE的对应关系
        struct inode                *f_inode;        /* cached value */
        const struct file_operations        *f_op;                  //文件操作的方法集合

/*
         * Protects f_ep_links, f_flags.
         * Must not be taken from IRQ context.
         */
        spinlock_t                f_lock;
        atomic_long_t                f_count;
        unsigned int                 f_flags;                                             //open系统调用的flags
        fmode_t                        f_mode;                                          //文件访问权限mode
        struct mutex                f_pos_lock;
        loff_t                        f_pos;                                                  //文件操作的位置
        struct fown_struct        f_owner;                                         //通知文件操作进程SIGIO,用于实现异步IO
        const struct cred        *f_cred;
        struct file_ra_state        f_ra;

        u64                        f_version;
#ifdef CONFIG_SECURITY
        void                        *f_security;
#endif
        /* needed for tty driver, and maybe others */
        void                        *private_data;

#ifdef CONFIG_EPOLL
        /* Used by fs/eventpoll.c to link all the hooks to this file */
        struct list_head        f_ep_links;
        struct list_head        f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
        struct address_space        *f_mapping;                          //指向inode实例的地址空间映射
} __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */

 

 

对如下的操作方法的理解需要很熟悉文件相关的系统调用

struct file_operations {
        struct module *owner;
        loff_t (*llseek) (struct file *, loff_t, int);
        ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);                     //读文件内容操作
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);           //写文件内容操作

        ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);    //异步读操作
        ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);    //异步写操作
        ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);                 用于支持向量数组读(多个buffer的scatter-gather操作)

        ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iterate) (struct file *, struct dir_context *);
        unsigned int (*poll) (struct file *, struct poll_table_struct *);           //用于支持可限时的文件读操作(对应poll和select系统调用)

        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
        long (*compat_ioctl) (struct file *, unsigned int, unsigned long);               //针对设备文件才需要的ioctl操作
        int (*mmap) (struct file *, struct vm_area_struct *);                                    //用于实现文件内存映射操作(为mmap系统调用提供支持)

         int (*mremap)(struct file *, struct vm_area_struct *);
        int (*open) (struct inode *, struct file *);                                                          //打开文件,其实就是把file=>indoe关联上

        int (*flush) (struct file *, fl_owner_t id);       //关闭文件时调用,引用计数减1
        int (*release) (struct inode *, struct file *);    //释放空间
        int (*fsync) (struct file *, loff_t, loff_t, int datasync);   //对fsync,fdatasync的系统调用,同步数据到设备上
        int (*aio_fsync) (struct kiocb *, int datasync);
        int (*fasync) (int, struct file *, int);
        int (*lock) (struct file *, int, struct file_lock *);    //支持文件锁
        ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
        unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
        int (*check_flags)(int);
        int (*flock) (struct file *, int, struct file_lock *);
        ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);  //支持splice调用(file<=>pipe)
        ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
        int (*setlease)(struct file *, long, struct file_lock **, void **);
        long (*fallocate)(struct file *file, int mode, loff_t offset,
                          loff_t len);
        void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
        unsigned (*mmap_capabilities)(struct file *);
#endif
};

struct inode_operations {
        struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
        void * (*follow_link) (struct dentry *, struct nameidata *);
        int (*permission) (struct inode *, int);
        struct posix_acl * (*get_acl)(struct inode *, int);

int (*readlink) (struct dentry *, char __user *,int);
        void (*put_link) (struct dentry *, struct nameidata *, void *);

int (*create) (struct inode *,struct dentry *, umode_t, bool);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
        int (*unlink) (struct inode *,struct dentry *);
        int (*symlink) (struct inode *,struct dentry *,const char *);
        int (*mkdir) (struct inode *,struct dentry *,umode_t);
        int (*rmdir) (struct inode *,struct dentry *);
        int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
        int (*rename2) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *, unsigned int);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
        int (*update_time)(struct inode *, struct timespec *, int);
        int (*atomic_open)(struct inode *, struct dentry *,
                           struct file *, unsigned open_flag,
                           umode_t create_mode, int *opened);
        int (*tmpfile) (struct inode *, struct dentry *, umode_t);
        int (*set_acl)(struct inode *, struct posix_acl *, int);

/* WARNING: probably going away soon, do not use! */
        int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
} ____cacheline_aligned;

 

//task_struct中保存的文件系统信息

struct fs_struct {

int users;

spinlock_t lock;

seqcount_t seq;

int umask;                                        //进程新创建文件的默认权限

int in_exec;

struct path root, pwd;               //保存了root,pwd目录和mount点

};

struct mnt_namespace {

atomic_t                count;      //使用这个命名空间的进程数

struct ns_common        ns;

struct mount *        root;                      //根目录的mount点

struct list_head        list;        //同命名空间的所有mount点通过个链接元素连接起来

struct user_namespace        *user_ns;

u64                        seq;        /* Sequence number to prevent loops */

wait_queue_head_t poll;

u64 event;

};

 

struct file_system_type {
        const char *name;                                //文件系统的名称

         int fs_flags;
#define FS_REQUIRES_DEV                1
#define FS_BINARY_MOUNTDATA        2
#define FS_HAS_SUBTYPE                4
#define FS_USERNS_MOUNT                8        /* Can be mounted by userns root */
#define FS_USERNS_DEV_MOUNT        16 /* A userns mount does not imply MNT_NODEV */
#define FS_RENAME_DOES_D_MOVE        32768        /* FS will handle d_move() during rename() internally. */
        struct dentry *(*mount) (struct file_system_type *, int,   //mount操作
                       const char *, void *);
        void (*kill_sb) (struct super_block *);
        struct module *owner;                                      //指向内核模块
        struct file_system_type * next;                         //用于构建系统文件系统链表 
        struct hlist_head fs_supers;                              //把同一文件系统中的多个super_block实例串联起来

struct lock_class_key s_lock_key;
        struct lock_class_key s_umount_key;
        struct lock_class_key s_vfs_rename_key;
        struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];

struct lock_class_key i_lock_key;
        struct lock_class_key i_mutex_key;
        struct lock_class_key i_mutex_dir_key;
};

注册一个新文件系统

int register_filesystem(struct file_system_type *);

 

 

 

struct vfsmount {
        struct dentry *mnt_root;        /* root of the mounted tree */   //当前根目录项
        struct super_block *mnt_sb;        /* pointer to superblock */        //指向超级块的指针
        int mnt_flags;

};

struct super_operations {

struct inode *(*alloc_inode)(struct super_block *sb);    //分配新的inode

void (*destroy_inode)(struct inode *);

void (*dirty_inode) (struct inode *, int flags);              //标记为脏节点

int (*write_inode) (struct inode *, struct writeback_control *wbc);  //

int (*drop_inode) (struct inode *);

void (*evict_inode) (struct inode *);

void (*put_super) (struct super_block *);

int (*sync_fs)(struct super_block *sb, int wait);                将文件系统数据与设备同步

int (*freeze_super) (struct super_block *);

int (*freeze_fs) (struct super_block *);

int (*thaw_super) (struct super_block *);

int (*unfreeze_fs) (struct super_block *);

int (*statfs) (struct dentry *, struct kstatfs *);              //支持statfs系统调用

int (*remount_fs) (struct super_block *, int *, char *);   //重新装载文件系统

void (*umount_begin) (struct super_block *);                           //用于网络文件系统

int (*show_options)(struct seq_file *, struct dentry *);        //用于proc文件系统

int (*show_devname)(struct seq_file *, struct dentry *);

int (*show_path)(struct seq_file *, struct dentry *);

int (*show_stats)(struct seq_file *, struct dentry *);             //用于proc文件系统

#ifdef CONFIG_QUOTA

ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);

ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);

struct dquot **(*get_dquots)(struct inode *);

#endif

int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);

long (*nr_cached_objects)(struct super_block *, struct shrink_control *);

long (*free_cached_objects)(struct super_block *, struct shrink_control *);

}; 

/fs/namespace.c

long do_mount(const char *dev_name, const char __user *dir_name, const char *type_page, unsigned long flags, void *data_page)

实现了文件系统mount操作

伪文件系统,是不可以被装载的,专门用于内核来管理一些特殊文件的inode

猜你喜欢

转载自blog.csdn.net/softgmx/article/details/82259567