dcache.c       
2007-11-26

感觉文件系统里尽是cache, buffer cache, dcahce, inode, super 呵呵. 真是叫人目不暇给(这成语啥意思??). 好在dcache 并不想buffer cache那样千丝万缕不易理出个头绪.

分配dentry和dentry tree


文件系统的比较核心的一棵树, 也就是dentry和mnt组成的一整棵大树, 这里给出一个dentry树的以部分结构,我们先集中精力到这棵树的机构上来,这是一个数据结构的问题:
struct dentry {
    .....
    struct inode  * d_inode;    /* Where the name belongs to - NULL is negative */
    struct dentry * d_parent;    /* parent directory */
    struct list_head d_vfsmnt;
    struct list_head d_hash;    /* lookup hash list */
    struct list_head d_lru;        /* d_count = 0 LRU list */
    struct list_head d_child;    /* child of parent list */
    struct list_head d_subdirs;    /* our children */
    struct list_head d_alias;    /* inode alias list */
    .....  
    struct super_block * d_sb;    /* The root of the dentry tree */
    ......
};

dentry本身是一个树形结构,下面两个函数 build这颗树:
static kmem_cache_t *dentry_cache;  /*dentry 的free list,一个mem cache*/
struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
struct dentry * d_alloc_root(struct inode * root_inode)

dentry的回收


static struct list_head *dentry_hashtable; hash,通过dentry->d_hash接入.
一个dentry可以存在于hash中,可以有一个inode与之对应或者没有(negative,已删除).还有一个unused队列:
static LIST_HEAD(dentry_unused);   这是一个已经删除了但是还没有unhash的,等待回收的denty list, 通过dentry->d_lru接入;

dentry cache 的复杂就在释放和回收上了. 简单的ref count是不够的. 先看看纯粹的内存释放函数:
static inline void d_free(struct dentry *dentry)
{
    if (dentry->d_op && dentry->d_op->d_release)
        dentry->d_op->d_release(dentry);
    if (dname_external(dentry))
        kfree(dentry->d_name.name);
    kmem_cache_free(dentry_cache, dentry);
    dentry_stat.nr_dentry--;
}
然后是hash摘除函数:
/**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
 *
 * d_drop() unhashes the entry from the parent
 * dentry hashes, so that it won't be found through
 * a VFS lookup any more. Note that this is different
 * from deleting the dentry - d_delete will try to
 * mark the dentry negative if possible, giving a
 * successful _negative_ lookup, while d_drop will
 * just make the cache lookup fail.
 *
 * d_drop() is used mainly for stuff that wants
 * to invalidate a dentry for some reason (NFS
 * timeouts or autofs deletes).
 */
static __inline__ void d_drop(struct dentry * dentry)
{
    spin_lock(&dcache_lock);
    list_del(&dentry->d_hash);
    INIT_LIST_HEAD(&dentry->d_hash);
    spin_unlock(&dcache_lock);
}

然后删除一个文件时,对detnry的影响是d_delete, 注意delete总是试图只reset inode, 使其变为negetive的dentry,除非还有其他object还在引用这个文件:
/*
 * When a file is deleted, we have two options:
 * - turn this dentry into a negative dentry
 * - unhash this dentry and free it.
 *
 * Usually, we want to just turn this into
 * a negative dentry, but if anybody else is
 * currently using the dentry or the inode
 * we can't do that and we fall back on removing
 * it from the hash queues and waiting for
 * it to be deleted later when it has no users
 */
void d_delete(struct dentry * dentry)
{
    /*
     * Are we the only user?
     */
    spin_lock(&dcache_lock);
    if (atomic_read(&dentry->d_count) == 1) {
        dentry_iput(dentry);
        return;
    }
    spin_unlock(&dcache_lock);

    /*
     * If not, just drop the dentry and let dput
     * pick up the tab..
     */
    d_drop(dentry);
}
为了清楚到底如何分配和释放一个dentry,还是简单看看文件删除,目录删除,和shrink dcache 的主要流程吧:
sys_unlink ->vfs_unlink -> dir->i_op->unlink + d_delete +dput
sys_rmdir->vfs_rmdir -> dir->i_op->rmdir + d_unhash (d_drop+shrink_dcache_parent) + d_delete+dput
kswapd || bdflush -> ... -->shrink_dcache_memory->prune_dcache->(d_drop +inode put+ d_free)
首先第一个问题是:
为啥删除的文件的dentry留在目录树中? 呵呵,快呗,起码我从内存就知道,这个文件已经被删除.(不过首先被prune,unlink的时候会被转移到unused 链表,呵呵). 曾经在哪里见过说unused队列中的dentry中还保留有inode.....起码是不太对头的. (大家讨论).

在分析dput前,看看一个lock free的原子操作,有助理解dcache的实现:(lock free 的算法)
int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
{
    int counter;
    int newcount;

repeat:
    counter = atomic_read(atomic);
    newcount = counter-1;

    if (!newcount) /*如果是最后一个引用则需要加锁*/
        goto slow_path;
    /*不是最后一个引用采用的是lock free 的算法!!!!*/
    asm volatile("lock; cmpxchgl %1,%2"
        :"=a" (newcount)
        :"r" (newcount), "m" (atomic->counter), "0" (counter));

    /* If the above failed, "eax" will have changed */
    if (newcount != counter)
        goto repeat;
    return 0; /*不是最后一个引用返回0*/

slow_path:
    spin_lock(lock);
    if (atomic_dec_and_test(atomic))
        return 1; /*最后一个引用返回1*/
    spin_unlock(lock);
    return 0;
}


/*
 * This is dput
 *
 * This is complicated by the fact that we do not want to put
 * dentries that are no longer on any hash chain on the unused
 * list: we'd much rather just get rid of them immediately.
 *
 * However, that implies that we have to traverse the dentry
 * tree upwards to the parents which might _also_ now be
 * scheduled for deletion (it may have been only waiting for
 * its last child to go away).
 *
 * This tail recursion is done by hand as we don't want to depend
 * on the compiler to always get this right (gcc generally doesn't).
 * Real recursion would eat up our stack space.
 */

/*
 * dput - release a dentry
 * @dentry: dentry to release
 *
 * Release a dentry. This will drop the usage count and if appropriate
 * call the dentry unlink method as well as removing it from the queues and
 * releasing its resources. If the parent dentries were scheduled for release
 * they too may now get deleted.
 *
 * no dcache lock, please.
 */

void dput(struct dentry *dentry)
{
    if (!dentry)
        return;

repeat:
    if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
        return; /*不是最后一个引用则无事可做, 看来到unused list的dentry连引用计数都是0*/

    /* dput on a free dentry? */
    if (!list_empty(&dentry->d_lru)) /*bug check*/
        BUG();
    /*
     * AV: ->d_delete() is _NOT_ allowed to block now.
     */
    if (dentry->d_op && dentry->d_op->d_delete) { /*自己实现d delete的文件系统不多,ext2就没有的*/
        if (dentry->d_op->d_delete(dentry))
            goto unhash_it;
    }
    /* Unreachable? Get rid of it */
    if (list_empty(&dentry->d_hash)) /*已经unhash? ,delete的时候还用别的obj或者过程在用,不过现在没有了*/
        goto kill_it;
    list_add(&dentry->d_lru, &dentry_unused); /*普通的删除就放到unused队列中*/
    dentry_stat.nr_unused++;
    /*
     * Update the timestamp
     */
    dentry->d_reftime = jiffies;
    spin_unlock(&dcache_lock);
    return;

unhash_it:
    list_del_init(&dentry->d_hash);

kill_it: {
        struct dentry *parent;
        list_del(&dentry->d_child);
        /* drops the lock, at that point nobody can reach this dentry */
        dentry_iput(dentry);
        parent = dentry->d_parent; /*对parent dentry尝试进行dput操作*/
        d_free(dentry);
        if (dentry == parent)
            return;
        dentry = parent;
        goto repeat;
    }
}

为了理解dput为啥需要"递归"到父节点进行操作,又为啥看到unhash的删除方式(而不是make negative),我们考虑这样一个操作流程:
假设有一个目录:  home/usrx, 内有文件 home/usrx/myfile
操作流程为(fix me,这样容许不?):
0) open myfile
1) rm -r home/usrx
2) close myfile

当rm home/usrx的时候由于myfile有步骤0)的引用,所以 d_delete不能negative这个dentry, 只是unhash他, 其父节点home/usrx亦同.
然后当close myfile的时候进行 dput,就有必要"递归"到父节点了. (在这种情况下直接free掉myfile的dentry也是有道理的,父节点都没有了,呵呵,不用留着快速查找deleted的文件了).

static inline void dentry_iput(struct dentry * dentry) 就不多说了.

shrink and prune

void shrink_dcache_memory(int priority, unsigned int gfp_mask) 就不多费口舌了.
/**
 * prune_dcache - shrink the dcache
 * @count: number of entries to try and free
 *
 * Shrink the dcache. This is done when we need
 * more memory, or simply when we need to unmount
 * something (at which point we need to unuse
 * all dentries).
 *
 * This function may fail to free any resources if
 * all the dentries are in use.
 */
 
void prune_dcache(int count)
{
    spin_lock(&dcache_lock);
    for (;;) {
        struct dentry *dentry;
        struct list_head *tmp;

        tmp = dentry_unused.prev; /*遍历unused 链表*/

        if (tmp == &dentry_unused)
            break;
        list_del_init(tmp); /*先unshash*/
        dentry = list_entry(tmp, struct dentry, d_lru);

        /* If the dentry was recently referenced, don't free it. */
        if (dentry->d_flags & DCACHE_REFERENCED) { /*如果有lookup命中这个节点就还留用,继续加速查找已经删除的文件*/
             dentry->d_flags &= ~DCACHE_REFERENCED;
            list_add(&dentry->d_lru, &dentry_unused);
            count--;
            continue;
        }
        dentry_stat.nr_unused--;

        /* Unused dentry with a count? */
        if (atomic_read(&dentry->d_count)) /*引用计数应当为0, 印证了猜测.*/
            BUG();

        prune_one_dentry(dentry); /*全部的释放... 也有对parent节点的dput,这个函数就不罗列了*/
        if (!--count)
            break;
    }
    spin_unlock(&dcache_lock);
}

prune dcache的过程就是在unused链表中找些东西释放给mm. 剩下的对parent节点的prune和对sb的prune不难理解, 不再罗列了.倒是select_parent的非递归算法可以看看.(这里就算了)

初始化相关的部分,dcache_init,不再分析.
到此补充一张dentry的示意图:


顺便看看如何加入hash表吧:
static __inline__ void d_add(struct dentry * entry, struct inode * inode)
{
    d_instantiate(entry, inode); /*建立entry->d_alias, inode->i_dentry关系*/
    d_rehash(entry); /*加入hash表*/
}
简单搜索可知这种 make postive的工作都交给了具体的文件系统去做了,如 ext2_lookup. d_delete试图只是将删除的文件的inode和dentry的关系脱离,这里复习?一下.


说了这么多看dget_locked就知道为什么了:
/* This should be called _only_ with dcache_lock held */

static inline struct dentry * __dget_locked(struct dentry *dentry)
{
    atomic_inc(&dentry->d_count);
    if (atomic_read(&dentry->d_count) == 1) { /*我们inc后等于1, 代表他在lru队列,即unused队列*/
        dentry_stat.nr_unused--;
        list_del(&dentry->d_lru); /*暂时吧他从lru搞出来,dpu又会回到这里来*/
        INIT_LIST_HEAD(&dentry->d_lru);        /* make "list_empty()" work */
    }
    return dentry;
}
struct dentry * dget_locked(struct dentry *dentry)
{
    return __dget_locked(dentry);
}
最后,反而d_lookup比较清新宜人,没啥令人难懂的操作.
struct dentry * d_lookup(struct dentry * parent, struct qstr * name)

每次查完dcache,都要检查是不是要做revalidate操作,逻辑是:如果需要做, 组成了就好,做不成就释放invalidate这个dentry(unhash
然后 prune掉所有子目录):
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
        if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
            dput(dentry);
            dentry = NULL;
        }
    }
比较典型的例子可能是NFS了,但是过于复杂了些. 看看pid_base_revalidate也就够了.
/**
 * d_invalidate - invalidate a dentry
 * @dentry: dentry to invalidate
 *
 * Try to invalidate the dentry if it turns out to be
 * possible. If there are other dentries that can be
 * reached through this one we can't delete it and we
 * return -EBUSY. On success we return 0.
 *
 * no dcache lock.
 */
 

int d_invalidate(struct dentry * dentry) /*invalidate, 使其彻底的失效*/
{
    /*
     * If it's already been dropped, return OK.
     */
    spin_lock(&dcache_lock);
    if (list_empty(&dentry->d_hash)) { /*已经无效了*/
        spin_unlock(&dcache_lock);
        return 0;
    }
    /*
     * Check whether to do a partial shrink_dcache
     * to get rid of unused child entries.
     */
    if (!list_empty(&dentry->d_subdirs)) { /*清除所有子目录,树*/
        spin_unlock(&dcache_lock);
        shrink_dcache_parent(dentry);
        spin_lock(&dcache_lock);
    }

    /*
     * Somebody else still using it?
     *
     * If it's a directory, we can't drop it
     * for fear of somebody re-populating it
     * with children (even though dropping it
     * would make it unreachable from the root,
     * we might still populate it if it was a
     * working directory or similar).
     */
    if (atomic_read(&dentry->d_count) > 1) {
        if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
            spin_unlock(&dcache_lock);
            return -EBUSY;
        }
    }

    list_del_init(&dentry->d_hash); /*这个操作就是使其无效的操作,,,,随后的dput会将其放入lru(如果ref==1)*/
    spin_unlock(&dcache_lock);
    return 0;
}


而 int d_validate(struct dentry *dentry, struct dentry *dparent,)并不是d_invalid的对应者, 这个函数左看右看是验证一个没有引用的的指针是否有效....., ncp, smb使用, 估计做这两个文件系统的人才知道这个鬼东西到底是啥玩意.


struct dentry * d_find_alias(struct inode *inode) :只有VFAT使用(2.4), 看来VFAT不支持dentry的alias(不能在其一个目录下安装多个设备? 还是另有隐情..... 待解问题....
void d_prune_aliases(struct inode *inode) 太具体了,这两个函数



int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
void d_move(struct dentry * dentry, struct dentry * target) :虽然vfs_rename很复杂的,这里倒是尽是些力气活.
char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, /*从一个dentry需找其父节点的时候,要知道
mnt才能在这其中做出选择, 从follow_dot_dot我们能学到这个概念, 倒是再细细品味吧*/

        struct dentry *root, struct vfsmount *rootmnt,
        char *buffer, int buflen)
asmlinkage long sys_getcwd(char *buf, unsigned long size) 刚好是__d_path的一个应用.
void d_genocide(struct dentry *root) : 非递归遍历树,呵呵...
int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) :呵呵
ino_t find_inode_number(struct dentry *dir, struct qstr *name)
int have_submounts(struct dentry *parent) :有任何子目录是安装点就返回真,树的遍历...

这里最后给出一个图,表明dentry 和mnt 所组成的一幅全景图,并且还是一个dentry下面安装多个文件系统的例子:体会下
上面的这句话:
/*从一个dentry需找其父节点的时候,要知道mnt才能在这其中做出选择, 从follow_dot_dot我们能学到这个。。。*/