Commit 5755f006 authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: inode op, del, rmdir



Implement i_op->rmdir() with supporting logical deletion by whiteout
including all children.

As struct.txt in previous commit described, the target dir is renamed to
a whiteout-ed temporary unique name in rmdir(2), and then removed
asynchronously by the system global workqueue.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent 4dc51987
......@@ -185,3 +185,210 @@ void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
out:
dput(dentry);
}
/* ---------------------------------------------------------------------- */
#define AuTestEmpty_WHONLY 1
#define AuTestEmpty_CALLED (1 << 1)
#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
#define au_fset_testempty(flags, name) \
do { (flags) |= AuTestEmpty_##name; } while (0)
#define au_fclr_testempty(flags, name) \
do { (flags) &= ~AuTestEmpty_##name; } while (0)
struct test_empty_arg {
struct dir_context ctx;
struct au_nhash *whlist;
unsigned int flags;
int err;
aufs_bindex_t bindex;
};
static int test_empty_cb(struct dir_context *ctx, const char *__name,
int namelen, loff_t offset __maybe_unused, u64 ino,
unsigned int d_type)
{
struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
ctx);
char *name = (void *)__name;
arg->err = 0;
au_fset_testempty(arg->flags, CALLED);
/* smp_mb(); */
if (name[0] == '.'
&& (namelen == 1 || (name[1] == '.' && namelen == 2)))
goto out; /* success */
if (namelen <= AUFS_WH_PFX_LEN
|| memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
if (au_ftest_testempty(arg->flags, WHONLY)
&& !au_nhash_test_known_wh(arg->whlist, name, namelen))
arg->err = -ENOTEMPTY;
goto out;
}
name += AUFS_WH_PFX_LEN;
namelen -= AUFS_WH_PFX_LEN;
if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
arg->err = au_nhash_append_wh
(arg->whlist, name, namelen, ino, d_type, arg->bindex);
out:
/* smp_mb(); */
AuTraceErr(arg->err);
return arg->err;
}
static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
{
int err;
struct file *h_file;
struct au_branch *br;
h_file = au_h_open(dentry, arg->bindex,
O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
/*file*/NULL);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = 0;
if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
&& !file_inode(h_file)->i_nlink)
goto out_put;
do {
arg->err = 0;
au_fclr_testempty(arg->flags, CALLED);
/* smp_mb(); */
err = vfsub_iterate_dir(h_file, &arg->ctx);
if (err >= 0)
err = arg->err;
} while (!err && au_ftest_testempty(arg->flags, CALLED));
out_put:
fput(h_file);
br = au_sbr(dentry->d_sb, arg->bindex);
au_lcnt_dec(&br->br_nfiles);
out:
return err;
}
struct do_test_empty_args {
int *errp;
struct dentry *dentry;
struct test_empty_arg *arg;
};
static void call_do_test_empty(void *args)
{
struct do_test_empty_args *a = args;
*a->errp = do_test_empty(a->dentry, a->arg);
}
static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
{
int err, wkq_err;
struct dentry *h_dentry;
struct inode *h_inode;
h_dentry = au_h_dptr(dentry, arg->bindex);
h_inode = d_inode(h_dentry);
/* todo: i_mode changes anytime? */
inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
inode_unlock_shared(h_inode);
if (!err)
err = do_test_empty(dentry, arg);
else {
struct do_test_empty_args args = {
.errp = &err,
.dentry = dentry,
.arg = arg
};
unsigned int flags = arg->flags;
wkq_err = au_wkq_wait(call_do_test_empty, &args);
if (unlikely(wkq_err))
err = wkq_err;
arg->flags = flags;
}
return err;
}
int au_test_empty_lower(struct dentry *dentry)
{
int err;
unsigned int rdhash;
aufs_bindex_t bindex, btop, btail;
struct au_nhash whlist;
struct test_empty_arg arg = {
.ctx = {
.actor = test_empty_cb
}
};
int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
SiMustAnyLock(dentry->d_sb);
rdhash = au_sbi(dentry->d_sb)->si_rdhash;
if (!rdhash)
rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
if (unlikely(err))
goto out;
arg.flags = 0;
arg.whlist = &whlist;
btop = au_dbtop(dentry);
test_empty = do_test_empty;
arg.bindex = btop;
err = test_empty(dentry, &arg);
if (unlikely(err))
goto out_whlist;
au_fset_testempty(arg.flags, WHONLY);
btail = au_dbtaildir(dentry);
for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && d_is_positive(h_dentry)) {
arg.bindex = bindex;
err = test_empty(dentry, &arg);
}
}
out_whlist:
au_nhash_wh_free(&whlist);
out:
return err;
}
int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
{
int err;
struct test_empty_arg arg = {
.ctx = {
.actor = test_empty_cb
}
};
aufs_bindex_t bindex, btail;
err = 0;
arg.whlist = whlist;
arg.flags = AuTestEmpty_WHONLY;
btail = au_dbtaildir(dentry);
for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && d_is_positive(h_dentry)) {
arg.bindex = bindex;
err = sio_test_empty(dentry, &arg);
}
}
return err;
}
......@@ -74,6 +74,8 @@ void au_add_nlink(struct inode *dir, struct inode *h_dir);
void au_sub_nlink(struct inode *dir, struct inode *h_dir);
loff_t au_dir_size(struct file *file, struct dentry *dentry);
void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
int au_test_empty_lower(struct dentry *dentry);
int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
/* vdir.c */
unsigned int au_rdhash_est(loff_t sz);
......
......@@ -650,6 +650,7 @@ struct inode_operations aufs_iop[] = {
.unlink = aufs_unlink,
.symlink = aufs_symlink,
.mkdir = aufs_mkdir,
.rmdir = aufs_rmdir,
.mknod = aufs_mknod,
.permission = aufs_permission,
......
......@@ -192,6 +192,46 @@ out:
return wh_dentry;
}
/*
* when removing a dir, rename it to a unique temporary whiteout-ed name first
* in order to be revertible and save time for removing many child whiteouts
* under the dir.
* returns 1 when there are too many child whiteout and caller should remove
* them asynchronously. returns 0 when the number of children is enough small to
* remove now or the branch fs is a remote fs.
* otherwise return an error.
*/
static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
struct au_nhash *whlist, struct inode *dir)
{
int err;
struct dentry *h_dentry;
struct super_block *sb;
struct inode *inode;
sb = dentry->d_sb;
SiMustAnyLock(sb);
h_dentry = au_h_dptr(dentry, bindex);
err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
if (unlikely(err))
goto out;
/* stop monitoring */
inode = d_inode(dentry);
au_hn_free(au_hi(inode, bindex));
err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
if (unlikely(err)) {
AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
h_dentry, bindex, err);
err = 0;
}
out:
AuTraceErr(err);
return err;
}
/*
* final procedure for deleting a entry.
* maintain dentry and iattr.
......@@ -333,3 +373,114 @@ out_free:
out:
return err;
}
int aufs_rmdir(struct inode *dir, struct dentry *dentry)
{
int err, rmdir_later;
aufs_bindex_t bwh, bindex, btop;
struct inode *inode;
struct dentry *parent, *wh_dentry, *h_dentry;
struct au_whtmp_rmdir *args;
/* to reduce stack size */
struct {
struct au_dtime dt;
struct au_pin pin;
} *a;
IMustLock(dir);
err = -ENOMEM;
a = kmalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
if (unlikely(err))
goto out_free;
err = au_alive_dir(dentry);
if (unlikely(err))
goto out_unlock;
inode = d_inode(dentry);
IMustLock(inode);
err = -ENOTDIR;
if (unlikely(!d_is_dir(dentry)))
goto out_unlock; /* possible? */
err = -ENOMEM;
args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
if (unlikely(!args))
goto out_unlock;
parent = dentry->d_parent; /* dir inode is locked */
di_write_lock_parent(parent);
err = au_test_empty(dentry, &args->whlist);
if (unlikely(err))
goto out_parent;
btop = au_dbtop(dentry);
bwh = au_dbwh(dentry);
bindex = -1;
wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
&a->pin);
err = PTR_ERR(wh_dentry);
if (IS_ERR(wh_dentry))
goto out_parent;
h_dentry = au_h_dptr(dentry, btop);
dget(h_dentry);
rmdir_later = 0;
if (bindex == btop) {
err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
if (err > 0) {
rmdir_later = err;
err = 0;
}
} else {
/* stop monitoring */
au_hn_free(au_hi(inode, btop));
/* dir inode is locked */
IMustLock(d_inode(wh_dentry->d_parent));
err = 0;
}
if (!err) {
vfsub_dead_dir(inode);
au_set_dbdiropq(dentry, -1);
epilog(dir, dentry, bindex);
if (rmdir_later) {
au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
args = NULL;
}
goto out_unpin; /* success */
}
/* revert */
AuLabel(revert);
if (wh_dentry) {
int rerr;
rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
&a->dt);
if (rerr)
err = rerr;
}
out_unpin:
au_unpin(&a->pin);
dput(wh_dentry);
dput(h_dentry);
out_parent:
di_write_unlock(parent);
if (args)
au_whtmp_rmdir_free(args);
out_unlock:
aufs_read_unlock(dentry, AuLock_DW);
out_free:
au_kfree_rcu(a);
out:
AuTraceErr(err);
return err;
}
......@@ -177,6 +177,7 @@ int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent, int isdir);
int aufs_unlink(struct inode *dir, struct dentry *dentry);
int aufs_rmdir(struct inode *dir, struct dentry *dentry);
/* iinfo.c */
struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
......
......@@ -43,6 +43,13 @@ static inline void vfsub_drop_nlink(struct inode *inode)
drop_nlink(inode);
}
static inline void vfsub_dead_dir(struct inode *inode)
{
AuDebugOn(!S_ISDIR(inode->i_mode));
inode->i_flags |= S_DEAD;
clear_nlink(inode);
}
int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
/* ---------------------------------------------------------------------- */
......
......@@ -156,6 +156,44 @@ out:
return dentry;
}
/*
* rename the @h_dentry on @br to the whiteouted temporary name.
*/
int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
{
int err;
struct path h_path = {
.mnt = au_br_mnt(br)
};
struct inode *h_dir, *delegated;
struct dentry *h_parent;
h_parent = h_dentry->d_parent; /* dir inode is locked */
h_dir = d_inode(h_parent);
IMustLock(h_dir);
h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
err = PTR_ERR(h_path.dentry);
if (IS_ERR(h_path.dentry))
goto out;
/* under the same dir, no need to lock_rename() */
delegated = NULL;
err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
/*flags*/0);
AuTraceErr(err);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal rename\n");
iput(delegated);
}
dput(h_path.dentry);
out:
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
/*
* functions for removing a whiteout
......@@ -194,6 +232,27 @@ int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
return err;
}
static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
struct au_branch *br)
{
int err;
struct path h_path = {
.mnt = au_br_mnt(br)
};
err = 0;
h_path.dentry = vfsub_lkup_one(wh, h_parent);
if (IS_ERR(h_path.dentry))
err = PTR_ERR(h_path.dentry);
else {
if (d_is_reg(h_path.dentry))
err = do_unlink_wh(d_inode(h_parent), &h_path);
dput(h_path.dentry);
}
return err;
}
/* ---------------------------------------------------------------------- */
/*
* initialize/clean whiteout for a branch
......@@ -743,3 +802,243 @@ struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
return wh_dentry;
}
/* ---------------------------------------------------------------------- */
/* Delete all whiteouts in this directory on branch bindex. */
static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
aufs_bindex_t bindex, struct au_branch *br)
{
int err;
unsigned long ul, n;
struct qstr wh_name;
char *p;
struct hlist_head *head;
struct au_vdir_wh *pos;
struct au_vdir_destr *str;
err = -ENOMEM;
p = (void *)__get_free_page(GFP_NOFS);
wh_name.name = p;
if (unlikely(!wh_name.name))
goto out;
err = 0;
memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
p += AUFS_WH_PFX_LEN;
n = whlist->nh_num;
head = whlist->nh_head;
for (ul = 0; !err && ul < n; ul++, head++) {
hlist_for_each_entry(pos, head, wh_hash) {
if (pos->wh_bindex != bindex)
continue;
str = &pos->wh_str;
if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
memcpy(p, str->name, str->len);
wh_name.len = AUFS_WH_PFX_LEN + str->len;
err = unlink_wh_name(h_dentry, &wh_name, br);
if (!err)
continue;
break;
}
AuIOErr("whiteout name too long %.*s\n",
str->len, str->name);
err = -EIO;
break;
}
}
free_page((unsigned long)wh_name.name);
out:
return err;
}
struct del_wh_children_args {
int *errp;
struct dentry *h_dentry;
struct au_nhash *whlist;
aufs_bindex_t bindex;
struct au_branch *br;
};
static void call_del_wh_children(void *args)
{
struct del_wh_children_args *a = args;
*a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
}
/* ---------------------------------------------------------------------- */
struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
{
struct au_whtmp_rmdir *whtmp;
int err;
unsigned int rdhash;
SiMustAnyLock(sb);
whtmp = kzalloc(sizeof(*whtmp), gfp);
if (unlikely(!whtmp)) {
whtmp = ERR_PTR(-ENOMEM);
goto out;
}
/* no estimation for dir size */
rdhash = au_sbi(sb)->si_rdhash;
if (!rdhash)
rdhash = AUFS_RDHASH_DEF;
err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
if (unlikely(err)) {
au_kfree_rcu(whtmp);
whtmp = ERR_PTR(err);
}
out:
return whtmp;
}
void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
{
if (whtmp->br)
au_lcnt_dec(&whtmp->br->br_count);
dput(whtmp->wh_dentry);
iput(whtmp->dir);
au_nhash_wh_free(&whtmp->whlist);
au_kfree_rcu(whtmp);
}
/*
* rmdir the whiteouted temporary named dir @h_dentry.
* @whlist: whiteouted children.
*/
int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct au_nhash *whlist)
{
int err;
unsigned int h_nlink;
struct path h_tmp;
struct inode *wh_inode, *h_dir;
struct au_branch *br;
h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
IMustLock(h_dir);
br = au_sbr(dir->i_sb, bindex);
wh_inode = d_inode(wh_dentry);
inode_lock_nested(wh_inode, AuLsc_I_CHILD);
/*
* someone else might change some whiteouts while we were sleeping.
* it means this whlist may have an obsoleted entry.
*/
if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
err = del_wh_children(wh_dentry, whlist, bindex, br);
else {
int wkq_err;
struct del_wh_children_args args = {
.errp = &err,
.h_dentry = wh_dentry,
.whlist = whlist,
.bindex = bindex,
.br = br
};
wkq_err = au_wkq_wait(call_del_wh_children, &args);
if (unlikely(wkq_err))
err = wkq_err;
}
inode_unlock(wh_inode);
if (!err) {
h_tmp.dentry = wh_dentry;
h_tmp.mnt = au_br_mnt(br);
h_nlink = h_dir->i_nlink;
err = vfsub_rmdir(h_dir, &h_tmp);
/* some fs doesn't change the parent nlink in some cases */
h_nlink -= h_dir->i_nlink;
}
if (!err) {
if (au_ibtop(dir) == bindex) {
/* todo: dir->i_mutex is necessary */
au_cpup_attr_timesizes(dir);
if (h_nlink)
vfsub_drop_nlink(dir);
}
return 0; /* success */
}
pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
return err;
}
static void call_rmdir_whtmp(void *args)
{
int err;
aufs_bindex_t bindex;
struct au_whtmp_rmdir *a = args;
struct super_block *sb;
struct dentry *h_parent;
struct inode *h_dir;
struct au_hinode *hdir;
/* rmdir by nfsd may cause deadlock with this i_mutex */
/* inode_lock(a->dir); */
err = -EROFS;
sb = a->dir->i_sb;
si_read_lock(sb, !AuLock_FLUSH);
if (!au_br_writable(a->br->br_perm))
goto out;
bindex = au_br_index(sb, a->br->br_id);
if (unlikely(bindex < 0))
goto out;
err = -EIO;
ii_write_lock_parent(a->dir);
h_parent = dget_parent(a->wh_dentry);
h_dir = d_inode(h_parent);
hdir = au_hi(a->dir, bindex);
err = vfsub_mnt_want_write(au_br_mnt(a->br));
if (unlikely(err))
goto out_mnt;
au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
a->br);
if (!err)
err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
au_hn_inode_unlock(hdir);
vfsub_mnt_drop_write(au_br_mnt(a->br));
out_mnt:
dput(h_parent);
ii_write_unlock(a->dir);
out:
/* inode_unlock(a->dir); */
au_whtmp_rmdir_free(a);
si_read_unlock(sb);
au_nwt_done(&au_sbi(sb)->si_nowait);
if (unlikely(err))
AuIOErr("err %d\n", err);
}
void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
{
int wkq_err;
struct super_block *sb;
IMustLock(dir);
/* all post-process will be done in do_rmdir_whtmp(). */
sb = dir->i_sb;
args->dir = au_igrab(dir);
args->br = au_sbr(sb, bindex);
au_lcnt_inc(&args->br->br_count);
args->wh_dentry = dget(wh_dentry);
wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
if (unlikely(wkq_err)) {
pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
au_whtmp_rmdir_free(args);
}
}
......@@ -12,20 +12,18 @@
#ifdef __KERNEL__
#include "dir.h"
/* whout.c */
struct qstr;
int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
struct dentry;
int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
int au_diropq_test(struct dentry *h_dentry);
struct au_branch;
struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
struct qstr *prefix);
struct inode;
struct path;
int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
struct dentry *dentry);
struct super_block;
int au_wh_init(struct au_branch *br, struct super_block *sb);
/* diropq flags */
......@@ -43,6 +41,21 @@ struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent);
/* real rmdir for the whiteout-ed dir */
struct au_whtmp_rmdir {
struct inode *dir;
struct au_branch *br;
struct dentry *wh_dentry;
struct au_nhash whlist;
};
struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct au_nhash *whlist);
void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
/* ---------------------------------------------------------------------- */
static inline struct dentry *au_diropq_create(struct dentry *dentry,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment