一文淺淺分析Linux sockfs檔案系統

本文主要對Linux網路檔案系統的註冊與掛載過程進行分析

一、簡介

Linux中“萬物皆檔案”，socket在Linux中對應的檔案系統叫Sockfs，每建立一個socket，就在sockfs中建立了一個特殊的檔案，同時建立了sockfs檔案系統中的inode，該inode唯一標識當前socket的通訊。

本文的重點放在sockfs檔案系統的註冊和掛載流程上，以後會對socket的底層來龍去脈進行詳細地分析與記錄。

二、三個核心結構體

1、結構file_system_type

file_system_type結構體代表Linux核心的各種檔案系統，每一種檔案系統必須要有自己的file_system_type結構，用於描述具體的檔案系統的型別，如ext4對應的ext4_fs_type，struct file_system_type結構體如所示：

struct file_system_type { const char *name； //檔案系統的名字 int fs_flags；#define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2#define FS_HAS_SUBTYPE 4#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move（） during rename（） internally。 */ struct dentry *（*mount）（struct file_system_type *， int， const char *， void *）； //掛載此檔案系統時使用的回撥函式 void （*kill_sb）（struct super_block *）； //釋放超級塊函式指標 struct module *owner；//指向實現這個檔案系統的模組，通常為THIS_MODULE宏 struct file_system_type * next；//指向檔案系統型別連結串列的下一個檔案系統型別 struct hlist_head fs_supers； struct lock_class_key s_lock_key； struct lock_class_key s_umount_key； struct lock_class_key s_vfs_rename_key； struct lock_class_key s_writers_key［SB_FREEZE_LEVELS］； struct lock_class_key i_lock_key； struct lock_class_key i_mutex_key； struct lock_class_key i_mutex_dir_key；}；

如struct file_system_type * next結構體成員，所有檔案系統的file_system_type結構形成一個連結串列，在fs/filesystem。c中有一個全域性的檔案系統變數變數

/* fs/filesystem。c*/static struct file_system_type *file_systems；

在Linux核心中sock_fs_type結構定義代表了sockfs的網路檔案系統，如下所示：

static struct file_system_type sock_fs_type = { 。name = “sockfs”，。mount = sockfs_mount，。kill_sb = kill_anon_super，}；

2、結構 vfs 掛載與結構掛載

每當一個檔案系統被安裝時，就會有一個vfsmount結構和mount被建立，mount代表該檔案系統的一個安裝例項，比較舊的核心版本中mount和vfsmount的成員都在vfsmount裡，現在Linux核心將vfsmount改作mount結構體，並將mount中mnt_root， mnt_sb， mnt_flags成員移到vfsmount結構體中了。這樣使得vfsmount的內容更加精簡，在很多情況下只需要傳遞vfsmount而已。struct vfsmount如下：

struct vfsmount { struct dentry *mnt_root； //指向這個檔案系統的根的dentry struct super_block *mnt_sb； // 指向這個檔案系統的超級塊物件 int mnt_flags； // 此檔案系統的掛載標誌}

對於每一個mount的檔案系統都有一個vfsmount結構來表示，sockfs安裝時的vfsmount定義如下所示：

static struct vfsmount *sock_mnt __read_mostly；

結構掛載如下：

struct mount { struct hlist_node mnt_hash； /* 用於連結到全域性已掛載檔案系統的連結串列 */ struct mount *mnt_parent； /* 指向此檔案系統的掛載點所屬的檔案系統，即父檔案系統 */ struct dentry *mnt_mountpoint； /* 指向此檔案系統的掛載點的dentry */ struct vfsmount mnt； /* 指向此檔案系統的vfsmount例項 */ union { struct rcu_head mnt_rcu； struct llist_node mnt_llist； }；#ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp；#else int mnt_count； int mnt_writers；#endif struct list_head mnt_mounts； /* 掛載在此檔案系統下的所有子檔案系統的連結串列的表頭，下面的節點都是mnt_child */ struct list_head mnt_child； /* 連結到被此檔案系統所掛的父檔案系統的mnt_mounts上 */ struct list_head mnt_instance； /* 連結到sb->s_mounts上的一個mount例項 */ const char *mnt_devname； /* 裝置名，如/dev/sdb1 */ struct list_head mnt_list； /* 連結到程序namespace中已掛載檔案系統中，表頭為mnt_namespace的list域 */ struct list_head mnt_expire； /* 連結到一些檔案系統專有的過期連結串列，如NFS， CIFS等 */ struct list_head mnt_share； /* 連結到共享掛載的迴圈連結串列中 */ struct list_head mnt_slave_list；/* 此檔案系統的slave mount連結串列的表頭 */ struct list_head mnt_slave； /* 連線到master檔案系統的mnt_slave_list */ struct mount *mnt_master； /* 指向此檔案系統的master檔案系統，slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns； /* 指向包含這個檔案系統的程序的name space */ struct mountpoint *mnt_mp； /* where is it mounted */ struct hlist_node mnt_mp_list； /* list mounts with the same mountpoint */ struct list_head mnt_umounting； /* list entry for umount propagation */#ifdef CONFIG_FSNOTIFY struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks； __u32 mnt_fsnotify_mask；#endif int mnt_id； /* mount identifier */ int mnt_group_id； /* peer group identifier */ int mnt_expiry_mark； /* true if marked for expiry */ struct hlist_head mnt_pins； struct fs_pin mnt_umount； struct dentry *mnt_ex_mountpoint；}

更多linux核心影片教程文件資料免費領取後臺私信

【核心】

自行獲取。

Linux核心原始碼/記憶體調優/檔案系統/程序管理/裝置驅動/網路協議棧-學習影片教程-騰訊課堂

三、sockfs檔案系統的註冊

Linux核心初始化時，執行sock_init（）函式登記sockfs，sock_init（）函式如下：

static int __init sock_init（void）{ …… err = register_filesystem（&sock_fs_type）；//註冊網路檔案系統 …… sock_mnt = kern_mount（&sock_fs_type）；//安裝網路檔案系統 ……}

註冊函式：

int register_filesystem（struct file_system_type * fs）{ int res = 0； struct file_system_type ** p； BUG_ON（strchr（fs->name， ‘。’））； if （fs->next） return -EBUSY； write_lock（&file_systems_lock）； p = find_filesystem（fs->name， strlen（fs->name））； //查詢是否存在 if （*p） res = -EBUSY； else *p = fs； //將filesystem靜態變數指向fs write_unlock（&file_systems_lock）； return res；}

註冊函式中的find函式如下，for迴圈一開始的file_systems變數就是上面說的註冊檔案系統使用到的全域性變數指標，strncmp去比較file_system_type的第一項name（檔案系統名）是否和將要註冊的檔案系統名字相同，如果相同返回的P就是指向同名file_system_type結構的指標，如果沒找到則指向NULL。

static struct file_system_type **find_filesystem（const char *name， unsigned len）{ struct file_system_type **p； for （p = &file_systems； *p； p = &（*p）->next） if （strncmp（（*p）->name， name， len） == 0 && ！（*p）->name［len］） break； return p；}

在返回register_filesystem函式後，判斷返回值，如果找到重複的則返回EBUSY錯誤，如果沒找到重複的，就把當前要註冊的檔案系統掛到尾端file_system_type的next指標上，串聯進連結串列，至此一個檔案系統模組就註冊好了。

四、sockfs檔案系統的安裝

在上面的sock_init（）函式中的sock_mnt = kern_mount（&sock_fs_type）開始進行安裝。kern_mount函式主要用於那些沒有實體介質的檔案系統，該函式主要是獲取檔案系統的super_block物件與根目錄的inode與dentry物件，並將這些物件加入到系統連結串列。kern_mount宏如下所示：

#define kern_mount（type） kern_mount_data（type， NULL）

kern_mount_data如下：

struct vfsmount *kern_mount_data（struct file_system_type *type， void *data）{ struct vfsmount *mnt； mnt = vfs_kern_mount（type， SB_KERNMOUNT， type->name， data）； if （！IS_ERR（mnt）） { /* * it is a longterm mount， don‘t release mnt until * we unmount before file sys is unregistered */ real_mount（mnt）->mnt_ns = MNT_NS_INTERNAL； } return mnt；}

呼叫：vfs_kern_mount

struct vfsmount *vfs_kern_mount（struct file_system_type *type， int flags， const char *name， void *data）{ struct mount *mnt； struct dentry *root； if （！type） return ERR_PTR（-ENODEV）； mnt = alloc_vfsmnt（name）；//分配一個mount物件，並對其進行部分初始化 if （！mnt） return ERR_PTR（-ENOMEM）； if （flags & SB_KERNMOUNT） mnt->mnt。mnt_flags = MNT_INTERNAL； root = mount_fs（type， flags， name， data）；//獲取該檔案系統的根目錄的dentry，同時也獲取super_block if （IS_ERR（root）） { mnt_free_id（mnt）； free_vfsmnt（mnt）； return ERR_CAST（root）； }//對mnt物件與root進行繫結 mnt->mnt。mnt_root = root； mnt->mnt。mnt_sb = root->d_sb； mnt->mnt_mountpoint = mnt->mnt。mnt_root； mnt->mnt_parent = mnt； lock_mount_hash（）； list_add_tail（&mnt->mnt_instance， &root->d_sb->s_mounts）；//將mnt新增到root->d_sb->s_mounts連結串列中 unlock_mount_hash（）； return &mnt->mnt；}

vfs_kern_mount函式呼叫mount_fs獲取該檔案系統的根目錄的dentry，同時也獲取super_block，具體實現如下：

struct dentry *mount_fs（struct file_system_type *type， int flags， const char *name， void *data）{ struct dentry *root； struct super_block *sb； char *secdata = NULL； int error = -ENOMEM； if （data && ！（type->fs_flags & FS_BINARY_MOUNTDATA）） {//在kern_mount呼叫中data為NULL，所以該if判斷為假 secdata = alloc_secdata（）； if （！secdata） goto out； error = security_sb_copy_data（data， secdata）； if （error） goto out_free_secdata； } root = type->mount（type， flags， name， data）；//呼叫file_system_type中的 mount方法 if （IS_ERR（root）） { error = PTR_ERR（root）； goto out_free_secdata； } sb = root->d_sb； BUG_ON（！sb）； WARN_ON（！sb->s_bdi）； sb->s_flags |= SB_BORN； error = security_sb_kern_mount（sb， flags， secdata）；……}

其中type->mount（）繼續呼叫了sockfs的回撥函式sockfs_mount

static struct dentry *sockfs_mount（struct file_system_type *fs_type， int flags， const char *dev_name， void *data）{ return mount_pseudo_xattr（fs_type， “socket：”， &sockfs_ops， sockfs_xattr_handlers， &sockfs_dentry_operations， SOCKFS_MAGIC）；}

struct dentry *mount_pseudo_xattr（struct file_system_type *fs_type， char *name， const struct super_operations *ops， const struct xattr_handler **xattr， const struct dentry_operations *dops， unsigned long magic）{ struct super_block *s； struct dentry *dentry； struct inode *root； struct qstr d_name = QSTR_INIT（name， strlen（name））； s = sget_userns（fs_type， NULL， set_anon_super， SB_KERNMOUNT|SB_NOUSER， &init_user_ns， NULL）； if （IS_ERR（s）） return ERR_CAST（s）； s->s_maxbytes = MAX_LFS_FILESIZE； s->s_blocksize = PAGE_SIZE； s->s_blocksize_bits = PAGE_SHIFT； s->s_magic = magic； s->s_op = ops ？ ops ： &simple_super_operations； s->s_xattr = xattr； s->s_time_gran = 1； root = new_inode（s）； if （！root） goto Enomem； /* * since this is the first inode， make it number 1。 New inodes created * after this must take care not to collide with it （by passing * max_reserved of 1 to iunique）。 */ root->i_ino = 1； root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR； root->i_atime = root->i_mtime = root->i_ctime = current_time（root）； dentry = __d_alloc（s， &d_name）； if （！dentry） { iput（root）； goto Enomem； } d_instantiate（dentry， root）； s->s_root = dentry； s->s_d_op = dops； s->s_flags |= SB_ACTIVE； return dget（s->s_root）；Enomem： deactivate_locked_super（s）； return ERR_PTR（-ENOMEM）；}

以上函式進行超級塊、根root、根dentry相關的建立及初始化操作，其中上面的s->s_d_op =dops就說指向了sockfs_ops結構體，也就是該sockfs檔案系統的struct super_block的函式操作集指向了sockfs_ops。

static const struct super_operations sockfs_ops = { 。alloc_inode = sock_alloc_inode，。destroy_inode = sock_destroy_inode，。statfs = simple_statfs，}；

該函式表對sockfs檔案系統的節點和目錄提供了具體的操作函式，後面涉及到的sockfs檔案系統的重要操作均會到該函式表中查詢到對應的操作函式，例如Linux核心在建立socket節點時會查詢sockfs_ops的alloc_inode函式，從而呼叫sock_alloc_indode函式完成socket以及inode節點的建立。

原文連結：淺析Linux sockfs檔案系統 - 檔案系統 - 我愛核心網 - 構建全國最權威的核心技術交流分享論壇