|
最近都有碰到一些关于linux的mount的问题,需要去解决,先把整个mount流程理清楚后,查一个问题会比较清楚,同时也是为了巩固自己的知识点。 先上图,看完整个文章后再倒过来看这个,会更清晰哦!
如果要了解mount,需要知道super block,dentry,file等概念。这就跟学习linux的网络子系统是一个道理,需要先关注几个结构体。 其中block_device一般和块设备操作操作有关,如读取super block、读写数据等,所以block_device掌管着文件系统的底层设备。 struct block_device {dev_tbd_dev; /* not a kdev_t - it's a search key */ intbd_openers; struct inode *bd_inode;/* will die */ struct super_block *bd_super; struct mutexbd_mutex;/* open/close mutex */ struct list_headbd_inodes; void *bd_claiming; void *bd_holder; intbd_holders; boolbd_write_holder; #ifdef CONFIG_SYSFS struct list_headbd_holder_disks; #endif struct block_device *bd_contains; unsignedbd_block_size; struct hd_struct *bd_part; /* number of times partitions within this device have been opened. */ unsignedbd_part_count; intbd_invalidated; struct gendisk *bd_disk; struct request_queue * bd_queue; struct list_headbd_list; /* * Private data. You must have bd_claim'ed the block_device * to use this. NOTE: bd_claim allows an owner to claim * the same device multiple times, the owner must take special * care to not mess up bd_private for that case. */ unsigned longbd_private; /* The counter of freeze processes */ intbd_fsfreeze_count; /* Mutex for freeze */ struct mutexbd_fsfreeze_mutex; }; 暂时先抛开块设备结构体,关注一下文件系统类型的结构体。笔者用//标注了一下结构体 struct file_system_type {const char *name;//文件系统的名字,如yaffs2 int fs_flags;//说明文件系统的类型 #define FS_REQUIRES_DEV1 //文件系统必须在物理设备上 #define FS_BINARY_MOUNTDATA2 //mount此文件系统时(参见mount_fs函数 - fs/super.c)需要使用二进制数据结构的mount data(如每个位域都有固定的位置和意义) #define FS_HAS_SUBTYPE4 //文件系统含有子类型,最常见的就是FUSE,FUSE本是不是真正的文件系统,所以要通过子文件系统类型来区别通过FUSE接口实现的不同文件系统 #define FS_USERNS_MOUNT8/* Can be mounted by userns root */ //文件系统每次挂载都后都是不同的user namespace #define FS_USERNS_DEV_MOUNT16 /* A userns mount does not imply MNT_NODEV */ //user namespace挂载支持MNT_DEV, 即非nodev模式 #define FS_RENAME_DOES_D_MOVE32768/* FS will handle d_move() during rename() internally. *///文件系统将把重命名操作reame()直接按照移动操作d_move()来处理,主要用于网络文件系统 struct dentry *(*mount) (struct file_system_type *, int, const char *, void *);//用户挂载此文件系统时使用的回调函数 void (*kill_sb) (struct super_block *);//删除内存中的super block,在卸载文件系统时使用 struct module *owner;//指向实现这个文件系统的模块,通常为THIS_MODULE宏 struct file_system_type * next;//指向文件系统类型链表的下一个文件系统类型 struct hlist_head fs_supers;//具有同样此文件系统类型的超级块结构,都串连在这个表头下 struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; struct lock_class_key s_vfs_rename_key; struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; struct lock_class_key i_lock_key; struct lock_class_key i_mutex_key; struct lock_class_key i_mutex_dir_key; }; file_system_type的基本操作都在fs/filesystems.c文件里.其中最重要的得看register_filesystem /***register_filesystem - register a new filesystem *@fs: the file system structure * *Adds the file system passed to the list of file systems the kernel *is aware of for mount and other syscalls. Returns 0 on success, *or a negative errno code on an error. * *The &struct file_system_type that is passed is linked into the kernel *structures and must not be freed until the file system has been *unregistered. */ int register_filesystem(struct file_system_type * fs) { int res = 0; struct file_system_type ** p; BUG_ON(strchr(fs->name, '.')); if (fs->next) return -EBUSY; write_lock(&file_systems_lock); p = find_filesystem(fs->name, strlen(fs->name)); if (*p) res = -EBUSY; else *p = fs; write_unlock(&file_systems_lock); return res; } EXPORT_SYMBOL(register_filesystem); 此函数将文件系统注册进系统中。如果要分析文件系统,这个一定是会被调用到的。它告诉内核我叫什么文件系统,并且告诉内核是如何使用超级块的。如此mount和kill_sb回调函数就是file_system_type中是文件系统实现的重点。上面所说的,请跳转到文件系统,自己来查看。这里笔者用的是yaffs2文件系统 可以看到mount的回调函数: static struct dentry *yaffs2_mount(struct file_system_type *fs_type, intflags,const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data,yaffs2_internal_read_super_mtd); } 看起来似乎很简单啊,只有一个mount_bdev函数。那么关注一下参数。
在上面一节中,笔者已经向大家介绍了注册file_system_type的时候我们主要提供两个成员给内核,一个是文件系统的名字,一个是mount这个文件系统的方法。 命令行:
此刻我们需要探求一下mount又是如何被系统调用的。 在linux中的shell输入
此时可以得到系统的mount API介绍 #include <sys/mount.h>int mount(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data); 看一下, source是要挂载的设备名,target是要挂载到哪,filesystemtype顾名思义就是文件系统类型名。还有2个参数也是我们所要关注的。 先来看一下mountflags,其主要取值来自include/uapi/linux/fs.h /** These are the fs-independent mount-flags: up to 32 flags are supported */ #define MS_RDONLY 1/* Mount read-only */ #define MS_NOSUID 2/* Ignore suid and sgid bits */ #define MS_NODEV 4/* Disallow access to device special files */ #define MS_NOEXEC 8/* Disallow program execution */ #define MS_SYNCHRONOUS16/* Writes are synced at once */ #define MS_REMOUNT32/* Alter flags of a mounted FS */ #define MS_MANDLOCK64/* Allow mandatory locks on an FS */ #define MS_DIRSYNC128/* Directory modifications are synchronous */ #define MS_NOATIME1024/* Do not update access times. */ #define MS_NODIRATIME2048/* Do not update directory access times */ #define MS_BIND4096 //对应-B/--bind选项,告诉mount这是一次bind操作 #define MS_MOVE8192 //对应-M/--move,告诉mount这是一次move操作 #define MS_REC16384 //rec是recursive的意思,这个flag一般不单独出现,都是伴随这个flag,表示递归的进行操作 #define MS_VERBOSE32768/* War is peace. Verbosity is silence. MS_VERBOSE is deprecated. */ #define MS_SILENT32768 #define MS_POSIXACL(1<<16)/* VFS does not apply the umask */ #define MS_UNBINDABLE(1<<17)/* change to unbindable */ #define MS_PRIVATE(1<<18)/* change to private */ #define MS_SLAVE(1<<19)/* change to slave */ #define MS_SHARED(1<<20)/* change to shared */ #define MS_RELATIME(1<<21)/* Update atime relative to mtime/ctime. */ #define MS_KERNMOUNT(1<<22) /* this is a kern_mount call */ #define MS_I_VERSION(1<<23) /* Update inode I_version field */ #define MS_STRICTATIME(1<<24) /* Always perform atime updates */ #define MS_LAZYTIME(1<<25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ #define MS_NOSEC(1<<28) //有些文件系统不支持suid,security xattr等安全标记 #define MS_BORN(1<<29) //表示内存superblock已经创建完成 #define MS_ACTIVE(1<<30) //表示内存superblock正处于活动状态 #define MS_NOUSER(1<<31) //表示文件系统不能被应用层挂载使用,只能被内核使用,如rootfs /* * Superblock flags that can be altered by MS_REMOUNT */ #define MS_RMT_MASK(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ MS_LAZYTIME) // 可以在remount时改变的flags /* * Old magic mount flag and mask */ #define MS_MGC_VAL 0xC0ED0000 #define MS_MGC_MSK 0xffff0000 从定义上看,这些基本上是大家都有的,绝大部分是可以被VFS层解析掉。而data就是一下特定的参数了。 mount系统调用定义在fs/namespace.c中 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *,dir_name, char __user *, type, unsigned long, flags, void __user *, data) { int ret; char *kernel_type; char *kernel_dev; unsigned long data_page; //拷贝文件系统类型名 kernel_type = copy_mount_string(type); ret = PTR_ERR(kernel_type); if (IS_ERR(kernel_type)) goto out_type; //拷贝文件系统所在的设备名 kernel_dev = copy_mount_string(dev_name); ret = PTR_ERR(kernel_dev); if (IS_ERR(kernel_dev)) goto out_dev; //拷贝文件系统定制的mount data ret = copy_mount_options(data, &data_page); if (ret < 0) goto out_data; //到此mount所需要的fstype, dev_name, mountpoint, flags和data这几个参数都拷贝到内核空间了,启动底层mount吧 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, (void *) data_page); free_page(data_page); out_data: kfree(kernel_dev); out_dev: kfree(kernel_type); out_type: return ret; } 继续,来看看do_mount /** Flags is a 32-bit value that allows up to 31 non-fs dependent flags to * be given to the mount() call (ie: read-only, no-dev, no-suid etc). * * data is a (void *) that can point to any structure up to * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent * information (or be NULL). * * Pre-0.97 versions of mount() didn't have a flags word. * When the flags word was introduced its top half was required * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. * Therefore, if this magic number is present, it carries no information * and must be discarded. */ long do_mount(const char *dev_name, const char __user *dir_name, const char *type_page, unsigned long flags, void *data_page) { struct path path; int retval = 0; int mnt_flags = 0; #if defined(CONFIG_DM_NFSB) int is_nfsb = 0; dev_t nfsb_dev; char mapper_path[256] = {0}; #endif /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; /* Basic sanity checks */ #if defined(CONFIG_DM_NFSB) if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; #endif if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; #if defined(CONFIG_DM_NFSB) if(type_page != NULL && (strncmp(type_page,"nfsb",strlen("nfsb")) == 0)) { struct nfsb_header *hdr = NULL; int nfsb_ret = 1; nfsb_dev = name_to_dev_t(dev_name); is_nfsb = 1; hdr = kmalloc(sizeof(struct nfsb_header), GFP_KERNEL); if(!hdr) return -ENOMEM; nfsb_ret = nvt_dm_check_nfsb(hdr, dev_name); if(!nfsb_ret) { nfsb_ret = -EINVAL; goto nfsb_out; } nfsb_ret = nvt_dm_setup_linear(hdr, &nfsb_dev); if(!nfsb_ret) { nfsb_ret = -EINVAL; goto nfsb_out; } nfsb_ret = nvt_dm_setup_nfsb(hdr, &nfsb_dev, dir_name, mapper_path); if(!nfsb_ret) { nfsb_ret = -EINVAL; goto nfsb_out; } nfsb_out: kfree(hdr); if(nfsb_ret < 0) return nfsb_ret; } #endif /* ... and get the mountpoint */ retval = user_path(dir_name, &path); if (retval) return retval; retval = security_sb_mount(dev_name, &path, type_page, flags, data_page); if (!retval && !may_mount()) retval = -EPERM; if (retval) goto dput_out; //这里就是一系列的对flags的解析 /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; /* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; if (flags & MS_NOATIME) mnt_flags |= MNT_NOATIME; if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; /* The default atime for remount is preservation */ if ((flags & MS_REMOUNT) && ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_STRICTATIME)) == 0)) { mnt_flags &= ~MNT_ATIME_MASK; mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; } flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE |MS_UNBINDABLE)) retval = do_change_type(&path, flags); else if (flags & MS_MOVE) retval = do_move_mount(&path, dev_name); else #if defined(CONFIG_DM_NFSB) { if(is_nfsb) retval = do_new_mount(&path, "ext4", flags, mnt_flags, mapper_path, data_page); else retval = do_new_mount(&path, type_page, flags, mnt_flags, dev_name, data_page); } #else retval = do_new_mount(&path, type_page, flags, mnt_flags, dev_name, data_page); #endif dput_out: path_put(&path); return retval; } 看完上面的代码:大约可以提炼出do_mount要做的事情: 1.将要挂载的目录提取到内核中的path结构体中去 2.将传入的flash的通用标记分解出来 3.根据这些标记执行相应的操作:do_remount、do_loopback、do_change_type、do_move_mount、do_new_mount 接下来我们来分析一下do_new_mount。 /** create a new mount for userspace and request it to be added into the * namespace's tree */ static int do_new_mount(struct path *path, const char *fstype, int flags, int mnt_flags, const char *name, void *data) { struct file_system_type *type; struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct vfsmount *mnt; int err; if (!fstype) return -EINVAL; //根据fs类型名(如xfs)在全局文件系统类型链表上找到其对应的file_system_type结构 type = get_fs_type(fstype); if (!type) return -ENODEV; if (user_ns != &init_user_ns) { if (!(type->fs_flags & FS_USERNS_MOUNT)) { put_filesystem(type); return -EPERM; } /* Only in special cases allow devices from mounts * created outside the initial user namespace. */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } } //以文件系统类型、挂载标记、设备名和挂载选项信息为参数,并没有mountpoint参数。这里只是想用type中的mount回调函数读取设备的superblock信息,填充mnt结构,然后把flag和data解析后填充到mnt结构中 mnt = vfs_kern_mount(type, flags, name, data); if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); put_filesystem(type); if (IS_ERR(mnt)) return PTR_ERR(mnt); err = do_add_mount(real_mount(mnt), path, mnt_flags); if (err) mntput(mnt); return err; } 由上面的源码可知do_new_mount所谓3件事。 1.根据fstype从全局文件系统类型(file_system_type)链表中找到对应的文件系统类型结构2.特定文件系统类型结构中的mount回调函数执行下面的挂载操作,最终构建一个mount结构体,其中包含vfsmount信息。3.将得到的mount结构体加入全局文件系统树中 vfs_kern_mount和do_add_mount是接下来重要的两个步骤,vfs_kern_mount继续解析superblock并填充mnt结构,do_add_mount将创建好的mnt加入到全局文件系统树中。 struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name,void *data) { struct mount *mnt; struct dentry *root; if (!type) return ERR_PTR(-ENODEV); // alloc一个新的struct mount结构,并初始化里面一部分(如链表指针、mnt_devname等成员内容) mnt = alloc_vfsmnt(name); if (!mnt) return ERR_PTR(-ENOMEM); if (type->alloc_mnt_data) { mnt->mnt.data = type->alloc_mnt_data(); if (!mnt->mnt.data) { mnt_free_id(mnt); free_vfsmnt(mnt); return ERR_PTR(-ENOMEM); } } if (flags & MS_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; // 调用具体文件系统的mount回调函数type->mount,继续挂载操作 root = mount_fs(type, flags, name, &mnt->mnt, data); if (IS_ERR(root)) { mnt_free_id(mnt); free_vfsmnt(mnt); return ERR_CAST(root); } //完成mnt结构的最后赋值,并返回vfsmount结构 mnt->mnt.mnt_root = root; mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); unlock_mount_hash(); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); 主要作用:
mount_fs主要就做一件事,调用type->mount回调函数。 mount再往下就是每个文件系统自己实现的mount回调函数了。 前面有讲到相关的mount_bdev,虽然它是一个通用函数,但是其最后一个参数是一个函数指针,yaffs2传入yaffs2_internal_read_super_mtd作为参数。yaffs2_internal_read_super_mtd是yaffs2自己实现的代码,也就是这里还是需要一个每个文件系统各异的处理函数。 |
微信公众号
手机版