2007-11-29
照理说这个模块的耦合做的真是不错的. 偌大一堆函数是static, 非static的函数也就两三个是真正的接口函数而已. 在2.4中只有 ext2文件系统实现了disk quota,其他的文件系统还不支持. disk quota是照着BSD接口来实现的(见注释). 顺着文件看下去, 1. 首先知道, quot分成两种,一种针对user,一种针对group 也就是说一种针对uid,一种针对gid. #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ #define GRPQUOTA 1 /* element used for group quotas */
2. quot的使能的单位是文件系统,一个super block, sb struct super_block { struct file_system_type *s_type; struct super_operations *s_op; struct dquot_operations *dq_op; /*除了s_op,还有disk quot 的ops, dq 就是disk quot*/
struct quota_mount_options s_dquot; /* Diskquota specific options */ quot的全局配置参数也存在sb中 /*这些配置里边我们先关心那个files:这个文件是存储uid/gid对应的quot值的:分别存在两个文件中,以gid/uid为索引*/ ........... } option: struct quota_mount_options { unsigned int flags; /* Flags for diskquotas on this device */ struct semaphore dqio_sem; /* lock device while I/O in progress */ struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */ struct file *files[MAXQUOTAS]; /* fp's to quotafiles */ /*quot文件存储uid/gid对应的各种limit*/ time_t inode_expire[MAXQUOTAS]; /* expiretime for inode-quota */ time_t block_expire[MAXQUOTAS]; /* expiretime for block-quota */ char rsquash[MAXQUOTAS]; /* for quotas threat root as any other user */ };
/* * Definitions for disk quotas imposed on the average user * (big brother finally hits Linux). * * The following constants define the amount of time given a user * before the soft limits are treated as hard limits (usually resulting * in an allocation failure). The timer is started when the user crosses * their soft limit, it is reset when they go below their soft limit. */ #define MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */ #define MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */ 为了理解hard quot和soft quot以expiretime, 只需看看check_idq或者check_bdq即可.
3. 在quot文件中存储的数据如下(quot文件本身没有quot的限制) /* * The following structure defines the format of the disk quota file * (as it appears on disk) - the file is an array of these structures * indexed by user or group number. */ struct dqblk { __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ __u32 dqb_curblocks; /* current block count */ __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ __u32 dqb_isoftlimit; /* preferred inode limit */ __u32 dqb_curinodes; /* current # allocated inodes */ time_t dqb_btime; /* time limit for excessive disk use */ time_t dqb_itime; /* time limit for excessive inode use */ }; 参考read_dquot,write_dquot.
4.从quot文件中读出数据后存在一个hash中,以(kdev_t,id,type)为索引
进入hash表: dget的时候,如果是新分配的dqota就进入hash表 撤出hash表: dquot只在重新被使用的时候,或者quot关闭(invalid)的时候才从hash中删除.并且重用的时候仅仅是临时删除 马上又加进来.
id: gid 或者uid, type:group quot或者user quot 参考函数: static struct dquot *dqget(struct super_block *sb, unsigned int id, short type) { unsigned int hashent = hashfn(sb->s_dev, id, type); struct dquot *dquot, *empty = NULL; struct quota_mount_options *dqopt = sb_dqopt(sb);
if (!is_enabled(dqopt, type)) return(NODQUOT);
we_slept: if ((dquot = find_dquot(hashent, sb->s_dev, id, type)) == NULL) { //从hash查找 if (empty == NULL) { //没有查到 dquot_updating[hashent]++; empty = get_empty_dquot(); //分配新的quot数据结构 if (!--dquot_updating[hashent]) wake_up(&update_wait); goto we_slept; //竞争处理,一定从hash中找的的才算. } //进行初始化 dquot = empty; dquot->dq_id = id; dquot->dq_type = type; dquot->dq_dev = sb->s_dev; dquot->dq_sb = sb; /* hash it first so it can be found */ hash_dquot(dquot); //挂入hash表 read_dquot(dquot); //从磁盘读入配置 } else { //quot已经在hash的情况 if (!dquot->dq_count++) { //增加其引用计数,引用计数是0代表在free list中 //参考dqput, 当引用计数是0的时候并不从hash中摘除的 remove_free_dquot(dquot); } else dqstats.cache_hits++; wait_on_dquot(dquot); if (empty) dqput(empty); }
while (dquot_updating[hashent]) sleep_on(&update_wait);
if (!dquot->dq_sb) { /* Has somebody invalidated entry under us? */ /* * Do it as if the quota was invalidated before we started */ dqput(dquot); return NODQUOT; } dquot->dq_referenced++; dqstats.lookups++;
return dquot; }
5. quot管理: quot的list使用策略 注释说的比较清楚了: /* * Dquot List Management: * The quota code uses three lists for dquot management: the inuse_list, * free_dquots, and dquot_hash[] array. A single dquot structure may be * on all three lists, depending on its current state. * * All dquots are placed on the inuse_list when first created, and this * list is used for the sync and invalidate operations, which must look * at every dquot. * * Unused dquots (dq_count == 0) are added to the free_dquots list when * freed, and this list is searched whenever we need an available dquot. * Dquots are removed from the list as soon as they are used again, and * nr_free_dquots gives the number of dquots on the list. * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to lcoate a specific dquot. */ hash表我们已经说过了. dquot最多只分配max_dquots个(1024, typical). 然后从不释放.为所有文件系统所共享.并且一旦分配就进入 inuse队列. 当引用计数降到0的时候,进入unused队列,等待重用.此时其磁盘部分已经写入quot文件.(见dqput).等待重用的dquot 还在hash中,如果重新命中就马上回复使用. (而引用计数不为0,必定不再unused队列)
quot的设计是采用一定量的内存(dquot),轮换使用,映射为不同的quot file中的具体数据.
quot尽力保持高引用度的dquot存在于hash中,所以可以看到为此所做的努力: static struct dquot *find_best_candidate_weighted(void) static inline struct dquot *find_best_free(void)
说明白这些后,关于quot的分配释放/hash的管理/重用策略等都说完了,相关函数在这些知识下很容易理解.就不再一一列举. static void dqput(struct dquot *dquot) : 不说了... -:)
6. lock/unlock 操作dquot的时候需要lock: 读写/分配释放inode block. 还有一个per dquot的sleep队列... DQ_MOD: 不难理解修改过,需要同步到quot file, 写入文件后清除此标记....
7.inode 的quota操作 参考quotaops.h. 下面就是所有的操作接口: 初始化(找到dquota写入inode:i_dquta),释放, 分配释放block/分配释放inode,transfer(chown). /* * Definitions of diskquota operations. */ struct dquot_operations dquot_operations = { dquot_initialize, /* mandatory */ 初始化 inode->i_dquot,简单明了.. dquot_drop, /* mandatory */ dqput(inode->i_dquot),同上... dquot_alloc_block, dquot_alloc_inode, dquot_free_block, dquot_free_inode, dquot_transfer /*chown的时候进行....*/ }; 然后通过quotaops.h定义的宏来使用这个接口.... 唯一值得注意的是 static struct dquot *dqduplicate(struct dquot *dquot) { if (dquot == NODQUOT || !dquot->dq_sb) return NODQUOT; dquot->dq_count++; wait_on_dquot(dquot); if (!dquot->dq_sb) { dquot->dq_count--; return NODQUOT; } dquot->dq_referenced++; dqstats.lookups++; return dquot; } 只是获取一个引用计数,同时考虑加锁的情况:参考函数 dquot_alloc_block, dquot_alloc_inode,结合其流程,此操作不难理解的.
8. Sync , quota on/off 其实主要的部分已经完了. 剩下的就是使能, 关闭,同步操作了. 同步: 看一下函数,就是写入quota int sync_dquots(kdev_t dev, short type) { struct dquot *dquot, *next, *ddquot; int need_restart;
restart: next = inuse_list; need_restart = 0; while ((dquot = next) != NULL) { next = dquot->dq_next; /*各种有效性检查*/ if (dev && dquot->dq_dev != dev) continue; if (type != -1 && dquot->dq_type != type) continue; if (!dquot->dq_sb) /* Invalidated? */ continue; if (!(dquot->dq_flags & (DQ_LOCKED | DQ_MOD))) continue;
if ((ddquot = dqduplicate(dquot)) == NODQUOT) continue; if (ddquot->dq_flags & DQ_MOD) write_dquot(ddquot); dqput(ddquot); /* Set the flag for another pass. */ need_restart = 1; //可能会睡眠,如要重新扫描 } /* * If anything blocked, restart the operation * to ensure we don't miss any dquots. */ if (need_restart) goto restart;
dqstats.syncs++; return(0); } 下面所有的函数都是为quota on/off准备的: void invalidate_dquots(kdev_t dev, short type) //说实话这个函数和同步差不多,只是要等待已经lock的quota而已 { //invalid 操作必须完成(on/off),所以要等待->写入磁盘->重新初始化 ......... restart: next = inuse_list; /* Here it is better. Otherwise the restart doesn't have any sense ;-) */ need_restart = 0; while ((dquot = next) != NULL) { ...... if (dquot->dq_flags & DQ_LOCKED) { __wait_on_dquot(dquot);
/* Set the flag for another pass. */ need_restart = 1; /* * Make sure it's still the same dquot. */ if (dquot->dq_dev != dev) continue; if (dquot->dq_type != type) continue; if (!dquot->dq_sb) continue; } /* * Because inodes needn't to be the only holders of dquot * the quota needn't to be written to disk. So we write it * ourselves before discarding the data just for sure... */ if (dquot->dq_flags & DQ_MOD && dquot->dq_sb) { write_dquot(dquot); need_restart = 1; /* We slept on IO */ } clear_dquot(dquot); } /* * If anything blocked, restart the operation * to ensure we don't miss any dquots. */ if (need_restart) goto restart; }
add_dquot_ref : quota on的时候, 遍历使能的quota的这个文件系统所有已经打开的文件,挂接一个dquot到inode->i_dquot
remove_dquot_ref:这个函数在inode.c quota off的时候,复杂性在于必须遍历所有inode,与上面add_dquot_ref 有所不 同,并且将dqput分成两个步骤来做: remove_inode_dquot_ref : remove_dquot_ref is caller, 和dqput相比,不写入文件,仅仅减少引用计数+放到一个临时 链表 put_dquot_list :遍历临时链表做dqput操作.
说完了这些, dquot 的on off 函数反到根本不比再说了.
9.dquota的配置函数
这个好像不用说吧............,bye (函数虽多,配置而已,重要,但是理解他们的方式不再配置本身....)
|