Linux VFS 之mount系统调用

1. 我们知道,在对文件的打开,读和写操作之前,必须挂载文件系统。那么,内核如何挂载文件系统,换句话说,在挂载文件系统时内核都做了哪些事情。这是本文讨论的事情。在挂载文件系统之前,必须格式化文件系统类型,通过mkfs命令实现。在Linux中,一个文件系统类型包括多个文件系统,如/dev/sda, /dev/sdb都可以格式化为ext3类型的文件系统,即它们属于ext3. 每个文件系统都拥有一个超级块对象,通过super_block来标识。通常,一个目录可以挂载多种不同的文件系统,如目录/mnt可以先后挂载/dev/sda, /dev/sdb。因此,这些文件系统之间存在父子关系,这些关系就组成了mount tree.  而目录项对象的d_mounted域表示这个目录挂载文件系统的次数。我们多次强调,目录项对象不是目录,不只是目录具有目录项对象,文件也具有目录项对象,它主要用于内核的路径查找。同时,一个文件系统也可以挂载在多个目录下,但它只有一个超级块对象,即共享同一个超级块对象。

2. 首先调用sys_mount,代码在<fs/namaspace.c>文件,如下:

/**
dev_name:包含一个文件系统的设备文件名,如/dev/sda
dir_name:安装点目录
type:已注册的文件系统类型
flags:安装标志
data:文件系统相关的数据结构,可以为NULL
**/
asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
			  char __user * type, unsigned long flags,
			  void __user * data)
{
	int retval;
	unsigned long data_page;
	unsigned long type_page;
	unsigned long dev_page;
	char *dir_page;

	retval = copy_mount_options(type, &type_page);/*将类型复制到类型页,如果不足一页,补0*/
	if (retval < 0)
		return retval;

	dir_page = getname(dir_name);/*路径名从用户空间复制到内存页*/
	retval = PTR_ERR(dir_page);
	if (IS_ERR(dir_page))
		goto out1;

	retval = copy_mount_options(dev_name, &dev_page);/*将设备名从用户空间复制到内存*/
	if (retval < 0)
		goto out2;

	retval = copy_mount_options(data, &data_page);
	if (retval < 0)
		goto out3;
	/*锁定内核 */
	lock_kernel();
	/*安装文件系统*/
	retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
			  flags, (void *)data_page);
	unlock_kernel();
	free_page(data_page);

out3:
	free_page(dev_page);
out2:
	putname(dir_page);
out1:
	free_page(type_page);
	return retval;
}

sys_mount函数首先将用户空间传入的设备路径名dev_name, 文件系统类型type和data分别复制到内核页dev_page, type_page和data_page中,然后锁定内核,调用do_mount函数进行处理。

/*
 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
 *
 * data is a (void *) that can point to any structure up to
 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
 * information (or be NULL).
 *
 * Pre-0.97 versions of mount() didn't have a flags word.
 * When the flags word was introduced its top half was required
 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
 * Therefore, if this magic number is present, it carries no information
 * and must be discarded.
 */

long do_mount(char *dev_name, char *dir_name, char *type_page,
		  unsigned long flags, void *data_page)
{
	struct nameidata nd;
	int retval = 0;
	int mnt_flags = 0;

	/* Discard magic */
	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
		flags &= ~MS_MGC_MSK;

	/* Basic sanity checks */
	/*基本检查,包括目录名是否为空,设备名是否为空等*/
	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
		return -EINVAL;
	if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
		return -EINVAL;

	if (data_page)
		((char *)data_page)[PAGE_SIZE - 1] = 0;
	/*安装标志*/
	/* Separate the per-mountpoint flags */
	if (flags & MS_NOSUID)
		mnt_flags |= MNT_NOSUID;/*禁止使用setuid和setgid标志*/
	if (flags & MS_NODEV)
		mnt_flags |= MNT_NODEV;/*禁止访问设备文件*/
	if (flags & MS_NOEXEC)
		mnt_flags |= MNT_NOEXEC;/*不允许执行程序*/
	if (flags & MS_NOATIME)
		mnt_flags |= MNT_NOATIME;/*不更新文件的存取时间*/
	if (flags & MS_NODIRATIME)
		mnt_flags |= MNT_NODIRATIME;/*不更新目录的存取时间*/
	if (flags & MS_RELATIME)
		mnt_flags |= MNT_RELATIME;
	/*清除这些标志*/
	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME);

	/* ... and get the mountpoint查询路径,存储在namidata结构体对象,存放了安装点目录项对象和安装点对象 */
	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
	if (retval)
		return retval;

	retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
	if (retval)
		goto dput_out;
	/*是否需要重新挂载,通常改变文件挂载的标志,如将只读的文件系统变为可写,一般不改变安装点*/
	if (flags & MS_REMOUNT)
		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
			    data_page);
	/*可以将文件系统的部分目录挂载到另外一个地方,这样,在两个地方都可以访问该目录*/
	else if (flags & MS_BIND)
		retval = do_loopback(&nd, dev_name, flags & MS_REC);
	/*改变安装点的类型*/
	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
		retval = do_change_type(&nd, flags);
	/*将已经挂载的文件系统移动到新的安装点,即移动目录树*/
	else if (flags & MS_MOVE)
		retval = do_move_mount(&nd, dev_name);
	else/*通常情况下调用这个函数,建立一个新的安装点*/
		retval = do_new_mount(&nd, type_page, flags, mnt_flags,
				      dev_name, data_page);
dput_out:
	path_release(&nd);
	return retval;
}

do_mount首先进行基本检查,包括目录名是否为空,设备名是否为空等,接下来,根据flags设置挂载标志mnt_flags. 其中, path_lookup是路径查找函数,根据目录名找到目录项对象,并将目录项对象和安装点对象存储在nameidata结构体。根据标志作一些判断,包括是否需要重新挂载文件系统(do_remount), 我们关心的是do_new_mount这个函数,即挂载一个新的文件系统,继续跟踪这个函数:

/*创建一个新的挂载
 * create a new mount for userspace and request it to be added into the
 * namespace's tree
 */
/**
参数1:nameidata结构体指针
参数2:挂载点类型
参数3:原挂载标志
参数4:新挂载标志
参数5:设备名称指针
参数6:私有数据结构指针
**/
static int do_new_mount(struct nameidata *nd, char *type, int flags,
			int mnt_flags, char *name, void *data)
{
	struct vfsmount *mnt;/*vfsmount结构*/

	if (!type || !memchr(type, 0, PAGE_SIZE))
		return -EINVAL;

	/* we need capabilities... 查看是否具有挂载权限*/
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	/*返回一个新的安装点对象,包括建立一个超级块对象*/
	mnt = do_kern_mount(type, flags, name, data);
	if (IS_ERR(mnt))
		return PTR_ERR(mnt);
	/*将安装点添加到安装目录树,hash表和父安装点的子链表*/
	return do_add_mount(mnt, nd, mnt_flags, NULL);
}

这个函数主要完成两大功能,第一,建立一个新的安装点对象和超级块对象,并将安装点对象和超级块对象相关联。第二,将安装点对象加入到mount tree。我们分别看一下这两个函数,首先跟踪do_kern_mount函数:

/**
参数1:要挂载的文件系统类型,如ext3
参数2:挂载标志
参数3:块设备路径名,如/dev/sda
参数4:指向additional data的指针,传入 read_super函数 
返回值:vfsmount指针
**/

struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
	/*在file_system链表查找,得到一个已经注册的文件系统类型*/
	struct file_system_type *type = get_fs_type(fstype);
	struct vfsmount *mnt;
	if (!type)
		return ERR_PTR(-ENODEV);
	/*返回挂载点对象*/
	mnt = vfs_kern_mount(type, flags, name, data);
	put_filesystem(type);
	return mnt;
}

首先在file_system链表查找已经注册的文件系统类型,在文件系统类型一节提到。定义一个mnt指针,调用vfs_kern_mount:

/**
参数1:文件系统类型
参数2:挂载标志,如MS_BIND 
参数3:设备路径
参数4:私有additional data,传入read_super函数
返回值:已经和superblock关联的vfsmount对象
**/
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
	struct vfsmount *mnt;
	char *secdata = NULL;
	int error;

	if (!type)
		return ERR_PTR(-ENODEV);

	error = -ENOMEM;
	/*分配并初始化文件系统对象*/
	mnt = alloc_vfsmnt(name);
	if (!mnt)
		goto out;

	if (data) {
		secdata = alloc_secdata();
		if (!secdata)
			goto out_mnt;

		error = security_sb_copy_data(type, data, secdata);
		if (error)
			goto out_free_secdata;
	}
	/*根据具体的文件系统,分配超级块,并初始化超级块信息,建立超级块和vfsmount之间关系*/
	error = type->get_sb(type, flags, name, data, mnt);
	if (error < 0)
		goto out_free_secdata;

 	error = security_sb_kern_mount(mnt->mnt_sb, secdata);
 	if (error)
 		goto out_sb;
	/*设置安装点目录项对象和父安装点,在以后graft_free持载到目录树中更新为合适的值*/
	mnt->mnt_mountpoint = mnt->mnt_root;
	mnt->mnt_parent = mnt;
	up_write(&mnt->mnt_sb->s_umount);
	free_secdata(secdata);
	return mnt;
out_sb:
	dput(mnt->mnt_root);
	up_write(&mnt->mnt_sb->s_umount);
	deactivate_super(mnt->mnt_sb);
out_free_secdata:
	free_secdata(secdata);
out_mnt:
	free_vfsmnt(mnt);
out:
	return ERR_PTR(error);
}

这个函数比较复杂,包括几个关键的部分,首先调用alloc_vfsmnt分配并初始化安装点对象。接下来,调用type->get_sb分配并初始化超级块信息,并将超级块信息和mnt相关联,type涉及到具体的文件系统,一会分析。最后设置安装点目录项对象和父安装点。先看一下alloc_vfsmnt函数:

/*分配并初始化安装点对象vfsmount*/
struct vfsmount *alloc_vfsmnt(const char *name)
{	/*在内存分配一个struct vfsmount*/
	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
	if (mnt) {
		memset(mnt, 0, sizeof(struct vfsmount));
		atomic_set(&mnt->mnt_count, 1);
		/*hash链表指针*/
		INIT_LIST_HEAD(&mnt->mnt_hash);
		/*子安装点的下一个对象指针*/
		INIT_LIST_HEAD(&mnt->mnt_child);
		/*子安装点链表的头指针*/
		INIT_LIST_HEAD(&mnt->mnt_mounts);
		/*指向命名空间的下一个安装点对象*/
		INIT_LIST_HEAD(&mnt->mnt_list);
		/*文件系统的过期链表*/
		INIT_LIST_HEAD(&mnt->mnt_expire);
		INIT_LIST_HEAD(&mnt->mnt_share);
		INIT_LIST_HEAD(&mnt->mnt_slave_list);
		INIT_LIST_HEAD(&mnt->mnt_slave);
		if (name) {
			int size = strlen(name) + 1;
			/*分配设备名内存*/
			char *newname = kmalloc(size, GFP_KERNEL);
			if (newname) {
				memcpy(newname, name, size);
				/*将安装点对象关联设备名称*/
				mnt->mnt_devname = newname;
			}
		}
	}
	return mnt;
}


看到这个函数,应该欣喜,比较简单,在内存分配一个vfsmount,并初始化相应的链表信息。最后将挂载点对象关联设备名称。

下面看一下type->get_sb函数,由于在注册文件系统类型时就注册了get_sb函数,所以这个函数与具体的文件系统类型相关,以ext3为例,其对应函数为: ext3_get_sb:

static int ext3_get_sb(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
}

调用了get_sb_bdev,设置回调函数ext3_fill_super,进行超级块的填充,这个函数会在以后调用。

/**
每个文件系统类型对应多个超级块对象,每个文件系统有一个超级块对象,例如ext3文件系统类型可对应多个超级块对象,而/dev/sda,/dev/sdb拥有一个超级块对象
最后将安装点和超级块相关联,这样vfsmount和super_block之间的关系就建立好了
**/
int get_sb_bdev(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data,
	int (*fill_super)(struct super_block *, void *, int),
	struct vfsmount *mnt)
{
	struct block_device *bdev;
	struct super_block *s;
	int error = 0;
	/*打开一个块设备,传入类型,只读或者读写*/
	bdev = open_bdev_excl(dev_name, flags, fs_type);
	if (IS_ERR(bdev))
		return PTR_ERR(bdev);

	/*
	 * once the super is inserted into the list by sget, s_umount
	 * will protect the lockfs code from trying to start a snapshot
	 * while we are mounting
	 */
	down(&bdev->bd_mount_sem);
	/*得到一个超级块对象,根据bdev查询*/
	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
	up(&bdev->bd_mount_sem);
	if (IS_ERR(s))
		goto error_s;
	/*如果原超级块存在*/
	if (s->s_root) {
		if ((flags ^ s->s_flags) & MS_RDONLY) {
			up_write(&s->s_umount);
			deactivate_super(s);
			error = -EBUSY;
			goto error_bdev;
		}
		/*关闭块设备*/
		close_bdev_excl(bdev);
	} else {
		char b[BDEVNAME_SIZE];
		/*设置挂载标志*/
		s->s_flags = flags;
		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
		/*设置块大小,在512字节-4K之间*/
		sb_set_blocksize(s, block_size(bdev));
		/*填充超级块对象相关信息,包括建立超级块的根目录项对象,相关操作方法super_operations等*/
		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
		if (error) {
			up_write(&s->s_umount);
			deactivate_super(s);
			goto error;
		}

		s->s_flags |= MS_ACTIVE;
		bdev_uevent(bdev, KOBJ_MOUNT);
	}
	/*将安装点和超级块相关联,成功返回0*/
	return simple_set_mnt(mnt, s);

error_s:
	error = PTR_ERR(s);
error_bdev:
	close_bdev_excl(bdev);
error:
	return error;
}

首先,以互斥的方式打开设备open_bdev_excl, 接下来sget函数得到一个超级块对象,fill_super填充超级块的相关信息,最后simple_set_mnt函数将超级块对象和挂载点对象相关联。那么sget函数是怎么得到一个超级块对象呢?

/**从type->fs_supers链表查找属于同一个文件系统类型的超级块对象,如果找到,则返回超级块对象地址,否则,创建一个超级块对象,并将超级块对象加入到type->fs_supers链表
 *	sget	-	find or create a superblock
 *	@type:	filesystem type superblock should belong to
 *	@test:	comparison callback
 *	@set:	setup callback
 *	@data:	argument to each of them
 */
struct super_block *sget(struct file_system_type *type,
			int (*test)(struct super_block *,void *),
			int (*set)(struct super_block *,void *),
			void *data)
{
	struct super_block *s = NULL;
	struct list_head *p;
	int err;

retry:
	spin_lock(&sb_lock);
	/*s->s_bdev == data在新创建一个超级块时,进行了设置!因此,当test为真时,说明此超级块很有可能已经被创建过,在属于同一文件系统类型的超级块链表查找,fs->supers指向表头,s_instances指向下一个超级块对象*/
	if (test) list_for_each(p, &type->fs_supers) {
		struct super_block *old;
		old = list_entry(p, struct super_block, s_instances);
		if (!test(old, data))/*说明不是此超级块*/
			continue;
		if (!grab_super(old))
			goto retry;
		if (s)
			destroy_super(s);
		/*找到返回*/
		return old;
	}
	/*如果没找到*/
	if (!s) {
		spin_unlock(&sb_lock);
		/*创建一个超级块对象*/
		s = alloc_super(type);
		if (!s)
			return ERR_PTR(-ENOMEM);
		goto retry;
	}
	/*将s->s_bdev和data相关联*/
	err = set(s, data);
	if (err) {
		spin_unlock(&sb_lock);
		destroy_super(s);
		return ERR_PTR(err);
	}
	/*设置所属文件系统类型*/
	s->s_type = type;
	/*将包含超级块设备的名称复制到s_id字符数组*/
	strlcpy(s->s_id, type->name, sizeof(s->s_id));
	/*将超级块加入到所有超级块链表,表头存在super_blocks变量*/
	list_add_tail(&s->s_list, &super_blocks);
	/*将超级块加入到属于同种文件系统类型的链表*/
	list_add(&s->s_instances, &type->fs_supers);
	spin_unlock(&sb_lock);
	/*增加文件系统计数*/
	get_filesystem(type);
	return s;
}

这个函数,首先在超级块链表fs_supers查找超级块对象,如果找到,则返回。否则,创建一个超级块对象。alloc_super在内存分配超级块对象,然后设置所属的文件系统类型,将超级块对象加入到所有超级块链表(super_blocks)和将超级块加入到属于同种文件系统类型的链表(type->fs_supers). 最后返回超级块对象s.  在得到超级块对象之后,在get_sb_bdev函数的第30行,判断一下原超级块是否存在,如果原超级块存在,说明文件系统已存在,并且超级块已经填充过,此时调用close_bdev_excl关闭块设备。如果原超级块不存在,接下来,对超级块进行填充,以ext3文件系统为例,调用ext3_fill_super函数。

/*填充超级块信息*/
static int ext3_fill_super (struct super_block *sb, void *data, int silent)
{
	struct buffer_head * bh;
	/*超级块的磁盘结构*/
	struct ext3_super_block *es = NULL;
	/*超级块相关信息,存于内存*/
	struct ext3_sb_info *sbi;
	ext3_fsblk_t block;
	/*得到超级块的逻辑块号*/
	ext3_fsblk_t sb_block = get_sb_block(&data);
	ext3_fsblk_t logic_sb_block;
	unsigned long offset = 0;
	unsigned int journal_inum = 0;
	unsigned long journal_devnum = 0;
	unsigned long def_mount_opts;
	/*根索引节点对象*/
	struct inode *root;
	int blocksize;
	int hblock;
	int db_count;
	int i;
	int needs_recovery;
	__le32 features;
	/*分配超级块的相关信息的内存结构*/
	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
	if (!sbi)
		return -ENOMEM;
	/*将s_fs_info指向sbi*/
	sb->s_fs_info = sbi;
	sbi->s_mount_opt = 0;
	sbi->s_resuid = EXT3_DEF_RESUID;
	sbi->s_resgid = EXT3_DEF_RESGID;

	unlock_kernel();
	/*得到块的大小*/
	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
	if (!blocksize) {
		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
		goto out_fail;
	}

	/*
	 * The ext3 superblock will not be buffer aligned for other than 1kB
	 * block sizes.  We need to calculate the offset from buffer start.
	 */
	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
	} else {
		logic_sb_block = sb_block;
	}
	/*读取超级块信息*/
	if (!(bh = sb_bread(sb, logic_sb_block))) {
		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
		goto out_fail;
	}
	/*
	 * Note: s_es must be initialized as soon as possible because
	 *       some ext3 macro-instructions depend on its value
	 */
	/*取得磁盘上的struct ext3_super_block信息*/
	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
	/*将s_es指向缓冲区的es*/
	sbi->s_es = es;
	sb->s_magic = le16_to_cpu(es->s_magic);
	if (sb->s_magic != EXT3_SUPER_MAGIC)
		goto cantfind_ext3;

	/* Set defaults before we parse the mount options */
	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
	if (def_mount_opts & EXT3_DEFM_DEBUG)
		set_opt(sbi->s_mount_opt, DEBUG);
	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
		set_opt(sbi->s_mount_opt, GRPID);
	if (def_mount_opts & EXT3_DEFM_UID16)
		set_opt(sbi->s_mount_opt, NO_UID32);
	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
		set_opt(sbi->s_mount_opt, XATTR_USER);
	if (def_mount_opts & EXT3_DEFM_ACL)
		set_opt(sbi->s_mount_opt, POSIX_ACL);
	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;

	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
		set_opt(sbi->s_mount_opt, ERRORS_RO);
	else
		set_opt(sbi->s_mount_opt, ERRORS_CONT);

	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);

	set_opt(sbi->s_mount_opt, RESERVATION);

	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
			    NULL, 0))
		goto failed_mount;

	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);

	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
		printk(KERN_WARNING
		       "EXT3-fs warning: feature flags set on rev 0 fs, "
		       "running e2fsck is recommended\n");
	/*
	 * Check feature flags regardless of the revision level, since we
	 * previously didn't change the revision level when setting the flags,
	 * so there is a chance incompat flags are set on a rev 0 filesystem.
	 */
	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
	if (features) {
		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
		       "unsupported optional features (%x).\n",
		       sb->s_id, le32_to_cpu(features));
		goto failed_mount;
	}
	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
	if (!(sb->s_flags & MS_RDONLY) && features) {
		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
		       "unsupported optional features (%x).\n",
		       sb->s_id, le32_to_cpu(features));
		goto failed_mount;
	}
	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);

	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
	    blocksize > EXT3_MAX_BLOCK_SIZE) {
		printk(KERN_ERR
		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
		       blocksize, sb->s_id);
		goto failed_mount;
	}

	hblock = bdev_hardsect_size(sb->s_bdev);
	if (sb->s_blocksize != blocksize) {
		/*
		 * Make sure the blocksize for the filesystem is larger
		 * than the hardware sectorsize for the machine.
		 */
		if (blocksize < hblock) {
			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
			       "device blocksize %d.\n", blocksize, hblock);
			goto failed_mount;
		}

		brelse (bh);
		sb_set_blocksize(sb, blocksize);
		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
		bh = sb_bread(sb, logic_sb_block);
		if (!bh) {
			printk(KERN_ERR
			       "EXT3-fs: Can't read superblock on 2nd try.\n");
			goto failed_mount;
		}
		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
		sbi->s_es = es;
		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
			printk (KERN_ERR
				"EXT3-fs: Magic mismatch, very weird !\n");
			goto failed_mount;
		}
	}

	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);

	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
	} else {
		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
		    (sbi->s_inode_size > blocksize)) {
			printk (KERN_ERR
				"EXT3-fs: unsupported inode size: %d\n",
				sbi->s_inode_size);
			goto failed_mount;
		}
	}
	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
				   le32_to_cpu(es->s_log_frag_size);
	if (blocksize != sbi->s_frag_size) {
		printk(KERN_ERR
		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
		       sbi->s_frag_size, blocksize);
		goto failed_mount;
	}
	sbi->s_frags_per_block = 1;
	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
	if (EXT3_INODE_SIZE(sb) == 0)
		goto cantfind_ext3;
	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
	if (sbi->s_inodes_per_block == 0)
		goto cantfind_ext3;
	sbi->s_itb_per_group = sbi->s_inodes_per_group /
					sbi->s_inodes_per_block;
	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
	sbi->s_sbh = bh;
	sbi->s_mount_state = le16_to_cpu(es->s_state);
	sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
	sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
	for (i=0; i < 4; i++)
		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
	sbi->s_def_hash_version = es->s_def_hash_version;

	if (sbi->s_blocks_per_group > blocksize * 8) {
		printk (KERN_ERR
			"EXT3-fs: #blocks per group too big: %lu\n",
			sbi->s_blocks_per_group);
		goto failed_mount;
	}
	if (sbi->s_frags_per_group > blocksize * 8) {
		printk (KERN_ERR
			"EXT3-fs: #fragments per group too big: %lu\n",
			sbi->s_frags_per_group);
		goto failed_mount;
	}
	if (sbi->s_inodes_per_group > blocksize * 8) {
		printk (KERN_ERR
			"EXT3-fs: #inodes per group too big: %lu\n",
			sbi->s_inodes_per_group);
		goto failed_mount;
	}

	if (le32_to_cpu(es->s_blocks_count) >
		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
			" too large to mount safely\n", sb->s_id);
		if (sizeof(sector_t) < 8)
			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
					"enabled\n");
		goto failed_mount;
	}

	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
		goto cantfind_ext3;
	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
			       le32_to_cpu(es->s_first_data_block) - 1)
				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
		   EXT3_DESC_PER_BLOCK(sb);
	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
				    GFP_KERNEL);
	if (sbi->s_group_desc == NULL) {
		printk (KERN_ERR "EXT3-fs: not enough memory\n");
		goto failed_mount;
	}

	bgl_lock_init(&sbi->s_blockgroup_lock);

	for (i = 0; i < db_count; i++) {
		block = descriptor_loc(sb, logic_sb_block, i);
		sbi->s_group_desc[i] = sb_bread(sb, block);
		if (!sbi->s_group_desc[i]) {
			printk (KERN_ERR "EXT3-fs: "
				"can't read group descriptor %d\n", i);
			db_count = i;
			goto failed_mount2;
		}
	}
	if (!ext3_check_descriptors (sb)) {
		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
		goto failed_mount2;
	}
	sbi->s_gdb_count = db_count;
	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
	spin_lock_init(&sbi->s_next_gen_lock);

	percpu_counter_init(&sbi->s_freeblocks_counter,
		ext3_count_free_blocks(sb));
	percpu_counter_init(&sbi->s_freeinodes_counter,
		ext3_count_free_inodes(sb));
	percpu_counter_init(&sbi->s_dirs_counter,
		ext3_count_dirs(sb));

	/* per fileystem reservation list head & lock */
	spin_lock_init(&sbi->s_rsv_window_lock);
	sbi->s_rsv_window_root = RB_ROOT;
	/* Add a single, static dummy reservation to the start of the
	 * reservation window list --- it gives us a placeholder for
	 * append-at-start-of-list which makes the allocation logic
	 * _much_ simpler. */
	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
	sbi->s_rsv_window_head.rsv_goal_size = 0;
	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);

	/*
	 * set up enough so that it can read an inode
	 */
	sb->s_op = &ext3_sops;/*超级块对象操作,read_inode读索引节点*/
	sb->s_export_op = &ext3_export_ops;
	sb->s_xattr = ext3_xattr_handlers;
#ifdef CONFIG_QUOTA
	sb->s_qcop = &ext3_qctl_operations;
	sb->dq_op = &ext3_quota_operations;
#endif
	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
	/*将s_root设置为空*/
	sb->s_root = NULL;

	needs_recovery = (es->s_last_orphan != 0 ||
			  EXT3_HAS_INCOMPAT_FEATURE(sb,
				    EXT3_FEATURE_INCOMPAT_RECOVER));

	/*
	 * The first inode we look at is the journal inode.  Don't try
	 * root first: it may be modified in the journal!
	 */
	if (!test_opt(sb, NOLOAD) &&
	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
		if (ext3_load_journal(sb, es, journal_devnum))
			goto failed_mount3;
	} else if (journal_inum) {
		if (ext3_create_journal(sb, es, journal_inum))
			goto failed_mount3;
	} else {
		if (!silent)
			printk (KERN_ERR
				"ext3: No journal on filesystem on %s\n",
				sb->s_id);
		goto failed_mount3;
	}

	/* We have now updated the journal if required, so we can
	 * validate the data journaling mode. */
	switch (test_opt(sb, DATA_FLAGS)) {
	case 0:
		/* No mode set, assume a default based on the journal
                   capabilities: ORDERED_DATA if the journal can
                   cope, else JOURNAL_DATA */
		if (journal_check_available_features
		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
			set_opt(sbi->s_mount_opt, ORDERED_DATA);
		else
			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
		break;

	case EXT3_MOUNT_ORDERED_DATA:
	case EXT3_MOUNT_WRITEBACK_DATA:
		if (!journal_check_available_features
		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
			printk(KERN_ERR "EXT3-fs: Journal does not support "
			       "requested data journaling mode\n");
			goto failed_mount4;
		}
	default:
		break;
	}

	if (test_opt(sb, NOBH)) {
		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
				"its supported only with writeback mode\n");
			clear_opt(sbi->s_mount_opt, NOBH);
		}
	}
	/*
	 * The journal_load will have done any necessary log recovery,
	 * so we can safely mount the rest of the filesystem now.
	 */
	/*根据根的索引节点号,得到索引节点对象,首先在inode cache查找*/
	root = iget(sb, EXT3_ROOT_INO);
	/*分配目录项对象,并将目录项对象与索引节点对象关联*/
	sb->s_root = d_alloc_root(root);
	if (!sb->s_root) {
		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
		iput(root);
		goto failed_mount4;
	}
	/*如是不是目录*/
	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
		dput(sb->s_root);
		sb->s_root = NULL;
		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
		goto failed_mount4;
	}
	/*将超级块写到磁盘上*/
	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
	/*
	 * akpm: core read_super() calls in here with the superblock locked.
	 * That deadlocks, because orphan cleanup needs to lock the superblock
	 * in numerous places.  Here we just pop the lock - it's relatively
	 * harmless, because we are now ready to accept write_super() requests,
	 * and aviro says that's the only reason for hanging onto the
	 * superblock lock.
	 */
	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
	ext3_orphan_cleanup(sb, es);
	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
	if (needs_recovery)
		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
	ext3_mark_recovery_complete(sb, es);
	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
		"writeback");

	lock_kernel();
	return 0;

cantfind_ext3:
	if (!silent)
		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
		       sb->s_id);
	goto failed_mount;

failed_mount4:
	journal_destroy(sbi->s_journal);
failed_mount3:
	percpu_counter_destroy(&sbi->s_freeblocks_counter);
	percpu_counter_destroy(&sbi->s_freeinodes_counter);
	percpu_counter_destroy(&sbi->s_dirs_counter);
failed_mount2:
	for (i = 0; i < db_count; i++)
		brelse(sbi->s_group_desc[i]);
	kfree(sbi->s_group_desc);
failed_mount:
#ifdef CONFIG_QUOTA
	for (i = 0; i < MAXQUOTAS; i++)
		kfree(sbi->s_qf_names[i]);
#endif
	ext3_blkdev_remove(sbi);
	brelse(bh);
out_fail:
	sb->s_fs_info = NULL;
	kfree(sbi);
	lock_kernel();
	return -EINVAL;
}

这个函数非常长,主要对内存超级块对象填充,在378-380行,得到根的索引节点,并将sb->s_root指向根的目录项对象, 然后调用ext3_setup_super将超级块写到磁盘上。

在填充超级块之后,在get_sb_bdev函数的第58行,调用simple_set_mnt将挂载点对象和超级块对象相关联,具体如下:

/*将安装点和超级块关联*/
int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
{
	/*vfsmount对应的超级块指针*/
	mnt->mnt_sb = sb;
	/*安装点对应的根目录项*/
	mnt->mnt_root = dget(sb->s_root);
	return 0;
}

主要进行赋值操作,将安装点对象的mnt_sb指向超级块,安装点的根目录项对象mnt_root指向sb->s_root. 至此,挂载点已经完成了,超级块对象的创建,挂载点对象的创建,并且把超级块对象和挂载点对象关联起来了。回到vfs_kern_mount函数,第42行和第43行,设置挂载点的目录项对象和父挂载点。在以后的do_add_mount函数会重新设置。当vfs_kern_mount函数返回时,do_kern_mount也就返回了。接下来,在do_new_mount函数中继续调用do_add_mount函数,将挂载点对象加入到mount tree,具体实现如下:

/*  将vfsmount对象加入到命名空间的安装树
 * add a mount into a namespace's mount tree
 * - provide the option of adding the new mount to an expiration list
 */
/**
传入参数:
newmnt-新的安装点对象
nd-包含了分量的目录项对象和安装点对象
mnt_flags-安装标志
fslist-过期链表
**/
int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
		 int mnt_flags, struct list_head *fslist)
{
	int err;
	/*得到写信号量*/
	down_write(&namespace_sem);
	/* Something was mounted here while we slept */
	while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
		;
	err = -EINVAL;
	if (!check_mnt(nd->mnt))
		goto unlock;

	/* Refuse the same filesystem on the same mount point 将同一个文件系统两次安装在同一个安装点,就是已经挂载了 */
	err = -EBUSY;
	/*超级块相同,并且目录项对象相同,也就是同一文件系统挂载到相同的目录下,没有实际意义*/
	if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
	    nd->mnt->mnt_root == nd->dentry)
		goto unlock;
	err = -EINVAL;
	/*如果安装点是一个符号链接*/
	if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
		goto unlock;
	/*安装标志*/
	newmnt->mnt_flags = mnt_flags;
	/*将新的安装点插入到namespace list对象,hash表和父安装点的子链表中*/
	if ((err = graft_tree(newmnt, nd)))
		goto unlock;
	/*加入过期链表*/
	if (fslist) {
		/* add to the specified expiration list */
		spin_lock(&vfsmount_lock);
		list_add_tail(&newmnt->mnt_expire, fslist);
		spin_unlock(&vfsmount_lock);
	}
	up_write(&namespace_sem);
	return 0;

unlock:
	up_write(&namespace_sem);
	mntput(newmnt);
	return err;
}

第28行,nd->mnt->mnt_sb==newmnt->mnt_sb表示超级块相同,代表同一个文件系统。nd->mnt->mnt_root==nd->dentry表示安装在同一目录。即将同一个文件系统两将安装在同一个目录,则返回,没有什么实际意义。第36行,设置挂载点标志,调用graft_tree函数。

static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
{
	int err;
	if (mnt->mnt_sb->s_flags & MS_NOUSER)
		return -EINVAL;

	if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
	      S_ISDIR(mnt->mnt_root->d_inode->i_mode))
		return -ENOTDIR;

	err = -ENOENT;
	mutex_lock(&nd->dentry->d_inode->i_mutex);
	if (IS_DEADDIR(nd->dentry->d_inode))
		goto out_unlock;

	err = security_sb_check_sb(mnt, nd);
	if (err)
		goto out_unlock;

	err = -ENOENT;
	/*调用attach_recursive_mnt加入到全局安装树*/
	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
		err = attach_recursive_mnt(mnt, nd, NULL);
out_unlock:
	mutex_unlock(&nd->dentry->d_inode->i_mutex);
	if (!err)
		security_sb_post_addmount(mnt, nd);
	return err;
}

这个函数调用attach_recursive_mnt将安装点加入到全局mount tree中。传入参数分别是挂载点对象,nameidata和原父挂载点。具体原型如下所示:

/*
 *  @source_mnt : mount tree to be attached
 *  @nd         : place the mount tree @source_mnt is attached
 *  @parent_nd  : if non-null, detach the source_mnt from its parent and
 *  		   store the parent mount and mountpoint dentry.
 *  		   (done when source_mnt is moved)
 *
 *  NOTE: in the table below explains the semantics when a source mount
 *  of a given type is attached to a destination mount of a given type.
 * ---------------------------------------------------------------------------
 * |         BIND MOUNT OPERATION                                            |
 * |**************************************************************************
 * | source-->| shared        |       private  |       slave    | unbindable |
 * | dest     |               |                |                |            |
 * |   |      |               |                |                |            |
 * |   v      |               |                |                |            |
 * |**************************************************************************
 * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
 * |          |               |                |                |            |
 * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
 * ***************************************************************************
 * A bind operation clones the source mount and mounts the clone on the
 * destination mount.
 *
 * (++)  the cloned mount is propagated to all the mounts in the propagation
 * 	 tree of the destination mount and the cloned mount is added to
 * 	 the peer group of the source mount.
 * (+)   the cloned mount is created under the destination mount and is marked
 *       as shared. The cloned mount is added to the peer group of the source
 *       mount.
 * (+++) the mount is propagated to all the mounts in the propagation tree
 *       of the destination mount and the cloned mount is made slave
 *       of the same master as that of the source mount. The cloned mount
 *       is marked as 'shared and slave'.
 * (*)   the cloned mount is made a slave of the same master as that of the
 * 	 source mount.
 *
 * ---------------------------------------------------------------------------
 * |         		MOVE MOUNT OPERATION                                 |
 * |**************************************************************************
 * | source-->| shared        |       private  |       slave    | unbindable |
 * | dest     |               |                |                |            |
 * |   |      |               |                |                |            |
 * |   v      |               |                |                |            |
 * |**************************************************************************
 * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
 * |          |               |                |                |            |
 * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
 * ***************************************************************************
 *
 * (+)  the mount is moved to the destination. And is then propagated to
 * 	all the mounts in the propagation tree of the destination mount.
 * (+*)  the mount is moved to the destination.
 * (+++)  the mount is moved to the destination and is then propagated to
 * 	all the mounts belonging to the destination mount's propagation tree.
 * 	the mount is marked as 'shared and slave'.
 * (*)	the mount continues to be a slave at the new location.
 *
 * if the source mount is a tree, the operations explained above is
 * applied to each mount in the tree.
 * Must be called without spinlocks held, since this function can sleep
 * in allocations.
 */
/**
第一步:设置父安装点nd->mnt和安装点目录项对象nd->dentry
第二步:将安装点加入到全局目录树,即
将安装点添加到三个链表:
(1)全局hash链表
(2)命名空间链表mnt_list
(3)父安装点的子链表
**/
static int attach_recursive_mnt(struct vfsmount *source_mnt,
			struct nameidata *nd, struct nameidata *parent_nd)
{
	LIST_HEAD(tree_list);
	/*nd->mnt指向父文件系统安装点*/
	struct vfsmount *dest_mnt = nd->mnt;
	/*安装点的目录项对象*/
	struct dentry *dest_dentry = nd->dentry;
	struct vfsmount *child, *p;

	if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
		return -EINVAL;

	if (IS_MNT_SHARED(dest_mnt)) {
		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
			set_mnt_shared(p);
	}

	spin_lock(&vfsmount_lock);
	if (parent_nd) {/*如果父文件系统安装点存在,先与父文件系统的安装点断开,再添加到新的父文件系统的安装点*/
		detach_mnt(source_mnt, parent_nd);
		attach_mnt(source_mnt, nd);//链接到父安装点
		touch_mnt_namespace(current->nsproxy->mnt_ns);
	} else {
		/*设置父安装点,安装点目录项和d_mounted*/
		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
		/*将安装点加入hash链表,命名空间链表和父安装点的子链表*/
		commit_tree(source_mnt);
	}
	list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
		list_del_init(&child->mnt_hash);
		commit_tree(child);
	}
	spin_unlock(&vfsmount_lock);
	return 0;
}

第91行,首先判断父挂载点是否存在,如果存在,先与父挂载点断后,再链接到新的父挂载点。如果父挂载点不存在,则调用mnt_set_mountpoint设置父挂载点,挂载点目录项对象和d_mounted,然后将挂载点加入到全局hash表,命名空间链表和父挂载点的子链表。注意,nd->mnt表示的是新的父挂载点。mnt_set_mountpoint和commit_tree函数如下所示,分别对关键部分进行了注释:

/*设置安装点和父目录*/
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
			struct vfsmount *child_mnt)
{
	/*将子文件系统安装点mnt_parent指向父文件系统安装点*/
	child_mnt->mnt_parent = mntget(mnt);
	/*装载点的目录项对象*/
	child_mnt->mnt_mountpoint = dget(dentry);
	/*目录项对象加1,由于同一个目录项可以装载多个文件系统*/
	dentry->d_mounted++;
}

/*
 * the caller must hold vfsmount_lock
 */
static void commit_tree(struct vfsmount *mnt)
{
	struct vfsmount *parent = mnt->mnt_parent;/*父文件系统安装点对象*/
	struct vfsmount *m;
	LIST_HEAD(head);
	struct mnt_namespace *n = parent->mnt_ns;

	BUG_ON(parent == mnt);
	/*加入到命名空间的list链表*/
	list_add_tail(&head, &mnt->mnt_list);
	list_for_each_entry(m, &head, mnt_list)
		m->mnt_ns = n;
	list_splice(&head, n->list.prev);
	/*添加到hash表,mount_hashtable*/
	list_add_tail(&mnt->mnt_hash, mount_hashtable +
				hash(parent, mnt->mnt_mountpoint));
	/*添加到父文件系统的子链表*/
	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
	touch_mnt_namespace(n);
}
至此,mount系统调用就完成了,呵呵,条理还算清晰。


3.总结

mount系统调用可以总结如下:

(1)得到一个挂载点对象(vfsmount)->do_kern_mount

(2)将挂载点对象加入到mount tree->do_add_mount

其中(1)又分为:

构建vfsmount对象,构建超级块对象super_block,将超级块对象和挂载点对象相关联。

(2)可分为:

设置vfsmount的父挂载点,安装点目录项,加入到全局mount_hashtable, 命名空间链表list和父挂载点的子链表mnt_mounts.

对于mount系统调用就写到这了,在接下来,我们将一步一步分析Linux内核,包括文件系统,块设备层,I/O调度层, SCSI设备驱动。有机会的话,还将分析一下Linux内核对SSD的支持,包括trim命令。

参考书籍: [深入理解Linux内核第3版]

猜你喜欢

转载自blog.csdn.net/chenjin_zhong/article/details/8448862