Commit 2a150b32 authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: xino truncation



As mentioned earlier, sometimes the size of XINO file is a problem.
Aufs has a feature to truncate it asynchronously using workqueue. But it
may not be so effective in some cases, and you may want to stop
discontiguous distribution of the inode numbers on branch fs.
See also the log in another commit.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent a3adef77
......@@ -35,6 +35,8 @@ struct au_xino {
struct mutex xi_mtx; /* protects xi_file array */
struct hlist_bl_head xi_writing;
atomic_t xi_truncating;
struct kref xi_kref;
};
......@@ -136,6 +138,9 @@ ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
size_t size, loff_t *pos);
int au_xib_trunc(struct super_block *sb);
int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin);
struct au_xino *au_xino_alloc(unsigned int nfile);
int au_xino_put(struct au_branch *br);
struct file *au_xino_file1(struct au_xino *xi);
......
......@@ -58,6 +58,12 @@ AuStubInt0(au_debug_test, void)
pr_warn(fmt, ##__VA_ARGS__); \
} while (0)
#define AuErr1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
pr_err(fmt, ##__VA_ARGS__); \
} while (0)
#define AuIOErr1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
......
......@@ -220,5 +220,11 @@ static inline int au_test_fs_bad_xino(struct super_block *sb)
|| au_test_nilfs(sb);
}
static inline int au_test_fs_trunc_xino(struct super_block *sb)
{
return au_test_tmpfs(sb)
|| au_test_ramfs(sb);
}
#endif /* __KERNEL__ */
#endif /* __AUFS_FSTYPE_H__ */
......@@ -19,6 +19,9 @@ enum {
Opt_br,
Opt_add,
Opt_xino, Opt_noxino,
Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
Opt_trunc_xino_path, Opt_itrunc_xino,
Opt_trunc_xib, Opt_notrunc_xib,
Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
};
......@@ -28,6 +31,14 @@ static match_table_t options = {
{Opt_xino, "xino=%s"},
{Opt_noxino, "noxino"},
{Opt_trunc_xino, "trunc_xino"},
{Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
{Opt_notrunc_xino, "notrunc_xino"},
{Opt_trunc_xino_path, "trunc_xino=%s"},
{Opt_itrunc_xino, "itrunc_xino=%d"},
/* {Opt_zxino, "zxino=%s"}, */
{Opt_trunc_xib, "trunc_xib"},
{Opt_notrunc_xib, "notrunc_xib"},
/* internal use for the scripts */
{Opt_ignore_silent, "si=%s"},
......@@ -191,6 +202,7 @@ static void dump_opts(struct au_opts *opts)
union {
struct au_opt_add *add;
struct au_opt_xino *xino;
struct au_opt_xino_itrunc *xino_itrunc;
} u;
struct au_opt *opt;
......@@ -207,9 +219,26 @@ static void dump_opts(struct au_opts *opts)
u.xino = &opt->xino;
AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
break;
case Opt_trunc_xino:
AuLabel(trunc_xino);
break;
case Opt_notrunc_xino:
AuLabel(notrunc_xino);
break;
case Opt_trunc_xino_path:
case Opt_itrunc_xino:
u.xino_itrunc = &opt->xino_itrunc;
AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
break;
case Opt_noxino:
AuLabel(noxino);
break;
case Opt_trunc_xib:
AuLabel(trunc_xib);
break;
case Opt_notrunc_xib:
AuLabel(notrunc_xib);
break;
default:
BUG();
}
......@@ -294,6 +323,46 @@ out:
return err;
}
static int noinline_for_stack
au_opts_parse_xino_itrunc_path(struct super_block *sb,
struct au_opt_xino_itrunc *xino_itrunc,
substring_t args[])
{
int err;
aufs_bindex_t bbot, bindex;
struct path path;
struct dentry *root;
err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
if (unlikely(err)) {
pr_err("lookup failed %s (%d)\n", args[0].from, err);
goto out;
}
xino_itrunc->bindex = -1;
root = sb->s_root;
si_read_lock(sb, AuLock_FLUSH);
di_read_lock_child(root, /*flags*/0);
bbot = au_sbbot(sb);
for (bindex = 0; bindex <= bbot; bindex++) {
if (au_h_dptr(root, bindex) == path.dentry) {
xino_itrunc->bindex = bindex;
break;
}
}
di_read_unlock(root, /*flags*/0);
si_read_unlock(sb);
path_put(&path);
if (unlikely(xino_itrunc->bindex < 0)) {
pr_err("no such branch %s\n", args[0].from);
err = -EINVAL;
}
out:
return err;
}
/* called without aufs lock */
int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
{
......@@ -304,6 +373,10 @@ int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
struct au_opt *opt, *opt_tail;
char *opt_str;
/* reduce the stack space */
union {
struct au_opt_xino_itrunc *xino_itrunc;
struct au_opt_wbr_create *create;
} u;
struct {
substring_t args[MAX_OPT_ARGS];
} *a;
......@@ -356,7 +429,39 @@ int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
opt->type = token;
break;
case Opt_trunc_xino_path:
err = au_opts_parse_xino_itrunc_path
(sb, &opt->xino_itrunc, a->args);
if (!err)
opt->type = token;
break;
case Opt_itrunc_xino:
u.xino_itrunc = &opt->xino_itrunc;
if (unlikely(match_int(&a->args[0], &n))) {
pr_err("bad integer in %s\n", opt_str);
break;
}
u.xino_itrunc->bindex = n;
si_read_lock(sb, AuLock_FLUSH);
di_read_lock_child(root, !AuLock_IR);
if (n < 0 || au_sbbot(sb) < n) {
pr_err("out of bounds, %d\n", n);
di_read_unlock(root, !AuLock_IR);
si_read_unlock(sb);
break;
}
di_read_unlock(root, !AuLock_IR);
si_read_unlock(sb);
err = 0;
opt->type = token;
break;
case Opt_trunc_xino:
case Opt_notrunc_xino:
case Opt_noxino:
case Opt_trunc_xib:
case Opt_notrunc_xib:
err = 0;
opt->type = token;
break;
......@@ -393,6 +498,52 @@ out:
return err;
}
/*
* returns,
* plus: processed without an error
* zero: unprocessed
*/
static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
struct au_opts *opts)
{
int err;
struct au_sbinfo *sbinfo;
SiMustWriteLock(sb);
err = 1; /* handled */
sbinfo = au_sbi(sb);
switch (opt->type) {
case Opt_trunc_xino:
au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
break;
case Opt_notrunc_xino:
au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
break;
case Opt_trunc_xino_path:
case Opt_itrunc_xino:
err = au_xino_trunc(sb, opt->xino_itrunc.bindex,
/*idx_begin*/0);
if (!err)
err = 1;
break;
case Opt_trunc_xib:
au_fset_opts(opts->flags, TRUNC_XIB);
break;
case Opt_notrunc_xib:
au_fclr_opts(opts->flags, TRUNC_XIB);
break;
default:
err = 0;
break;
}
return err;
}
/*
* returns tri-state.
* plus: processed without an error
......@@ -457,9 +608,7 @@ int au_opts_mount(struct super_block *sb, struct au_opts *opts)
opt_xino = NULL;
opt = opts->opt;
while (err >= 0 && opt->type != Opt_tail)
/* re-commit later */
/* err = au_opt_simple(sb, opt++, opts); */
err = 0;
err = au_opt_simple(sb, opt++, opts);
if (err > 0)
err = 0;
else if (unlikely(err < 0))
......
......@@ -21,6 +21,7 @@ struct file;
/* mount flags */
#define AuOpt_XINO 1 /* external inode number bitmap
and translation table */
#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
#define AuOpt_Def AuOpt_XINO
......@@ -46,19 +47,33 @@ struct au_opt_xino {
struct file *file;
};
struct au_opt_xino_itrunc {
aufs_bindex_t bindex;
};
struct au_opt {
int type;
union {
struct au_opt_xino xino;
struct au_opt_xino_itrunc xino_itrunc;
struct au_opt_add add;
/* add more later */
};
};
/* opts flags */
#define AuOpts_TRUNC_XIB (1 << 2)
#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
#define au_fset_opts(flags, name) \
do { (flags) |= AuOpts_##name; } while (0)
#define au_fclr_opts(flags, name) \
do { (flags) &= ~AuOpts_##name; } while (0)
struct au_opts {
struct au_opt *opt;
int max_opt;
unsigned int flags;
unsigned long sb_flags;
};
......
......@@ -58,6 +58,9 @@ int au_si_alloc(struct super_block *sb)
sbinfo->si_mntflags = AuOpt_Def;
sbinfo->si_xino_jiffy = jiffies;
sbinfo->si_xino_expire
= msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
mutex_init(&sbinfo->si_xib_mtx);
/* leave si_xib_last_pindex and si_xib_next_bit */
......
......@@ -53,6 +53,8 @@ struct au_sbinfo {
unsigned long si_xib_last_pindex;
int si_xib_next_bit;
unsigned long si_xino_jiffy;
unsigned long si_xino_expire;
/* reserved for future use */
/* unsigned long long si_xib_limit; */ /* Max xib file size */
......
......@@ -380,6 +380,205 @@ out:
/* ---------------------------------------------------------------------- */
/*
* truncate xino files
*/
static int au_xino_do_trunc(struct super_block *sb, aufs_bindex_t bindex,
int idx, struct kstatfs *st)
{
int err;
blkcnt_t blocks;
struct file *file, *new_xino;
struct au_xi_new xinew = {
.idx = idx
};
err = 0;
xinew.xi = au_sbr(sb, bindex)->br_xino;
file = au_xino_file(xinew.xi, idx);
if (!file)
goto out;
xinew.base = &file->f_path;
err = vfs_statfs(xinew.base, st);
if (unlikely(err)) {
AuErr1("statfs err %d, ignored\n", err);
err = 0;
goto out;
}
blocks = file_inode(file)->i_blocks;
pr_info("begin truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
bindex, idx, (u64)blocks, st->f_bfree, st->f_blocks);
xinew.copy_src = file;
new_xino = au_xi_new(sb, &xinew);
if (IS_ERR(new_xino)) {
err = PTR_ERR(new_xino);
pr_err("xino(b%d-%d), err %d, ignored\n", bindex, idx, err);
goto out;
}
err = vfs_statfs(&new_xino->f_path, st);
if (!err)
pr_info("end truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
bindex, idx, (u64)file_inode(new_xino)->i_blocks,
st->f_bfree, st->f_blocks);
else {
AuErr1("statfs err %d, ignored\n", err);
err = 0;
}
out:
return err;
}
int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin)
{
int err, i;
unsigned long jiffy;
aufs_bindex_t bbot;
struct kstatfs *st;
struct au_branch *br;
struct au_xino *xi;
err = -ENOMEM;
st = kmalloc(sizeof(*st), GFP_NOFS);
if (unlikely(!st))
goto out;
err = -EINVAL;
bbot = au_sbbot(sb);
if (unlikely(bindex < 0 || bbot < bindex))
goto out_st;
err = 0;
jiffy = jiffies;
br = au_sbr(sb, bindex);
xi = br->br_xino;
for (i = idx_begin; !err && i < xi->xi_nfile; i++)
err = au_xino_do_trunc(sb, bindex, i, st);
if (!err)
au_sbi(sb)->si_xino_jiffy = jiffy;
out_st:
au_kfree_rcu(st);
out:
return err;
}
struct xino_do_trunc_args {
struct super_block *sb;
struct au_branch *br;
int idx;
};
static void xino_do_trunc(void *_args)
{
struct xino_do_trunc_args *args = _args;
struct super_block *sb;
struct au_branch *br;
struct inode *dir;
int err, idx;
aufs_bindex_t bindex;
err = 0;
sb = args->sb;
dir = d_inode(sb->s_root);
br = args->br;
idx = args->idx;
si_noflush_write_lock(sb);
ii_read_lock_parent(dir);
bindex = au_br_index(sb, br->br_id);
err = au_xino_trunc(sb, bindex, idx);
ii_read_unlock(dir);
if (unlikely(err))
pr_warn("err b%d, (%d)\n", bindex, err);
atomic_dec(&br->br_xino->xi_truncating);
au_lcnt_dec(&br->br_count);
si_write_unlock(sb);
au_nwt_done(&au_sbi(sb)->si_nowait);
au_kfree_rcu(args);
}
/*
* returns the index in the xi_file array whose corresponding file is necessary
* to truncate, or -1 which means no need to truncate.
*/
static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
{
int err;
unsigned int u;
struct kstatfs st;
struct au_sbinfo *sbinfo;
struct au_xino *xi;
struct file *file;
/* todo: si_xino_expire and the ratio should be customizable */
sbinfo = au_sbi(sb);
if (time_before(jiffies,
sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
return -1;
/* truncation border */
xi = br->br_xino;
for (u = 0; u < xi->xi_nfile; u++) {
file = au_xino_file(xi, u);
if (!file)
continue;
err = vfs_statfs(&file->f_path, &st);
if (unlikely(err)) {
AuErr1("statfs err %d, ignored\n", err);
return -1;
}
if (div64_u64(st.f_bfree * 100, st.f_blocks)
>= AUFS_XINO_DEF_TRUNC)
return u;
}
return -1;
}
static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
{
int idx;
struct xino_do_trunc_args *args;
int wkq_err;
idx = xino_trunc_test(sb, br);
if (idx < 0)
return;
if (atomic_inc_return(&br->br_xino->xi_truncating) > 1)
goto out;
/* lock and kfree() will be called in trunc_xino() */
args = kmalloc(sizeof(*args), GFP_NOFS);
if (unlikely(!args)) {
AuErr1("no memory\n");
goto out;
}
au_lcnt_inc(&br->br_count);
args->sb = sb;
args->br = br;
args->idx = idx;
wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
if (!wkq_err)
return; /* success */
pr_err("wkq %d\n", wkq_err);
au_lcnt_dec(&br->br_count);
au_kfree_rcu(args);
out:
atomic_dec(&br->br_xino->xi_truncating);
}
/* ---------------------------------------------------------------------- */
struct au_xi_calc {
int idx;
loff_t pos;
......@@ -603,6 +802,8 @@ static int au_xino_do_write(vfs_writef_t write, struct file *file,
* write @ino to the xinofile for the specified branch{@sb, @bindex}
* at the position of @h_ino.
* even if @ino is zero, it is written to the xinofile and means no entry.
* if the size of the xino file on a specific filesystem exceeds the watermark,
* try truncating it.
*/
int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t ino)
......@@ -640,8 +841,13 @@ int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
}
err = au_xino_do_write(au_sbi(sb)->si_xwrite, file, &calc, ino);
if (!err)
if (!err) {
br = au_sbr(sb, bindex);
if (au_opt_test(mnt_flags, TRUNC_XINO)
&& au_test_fs_trunc_xino(au_br_sb(br)))
xino_try_trunc(sb, br);
return 0; /* success */
}
out:
AuIOErr("write failed (%d)\n", err);
......@@ -920,6 +1126,139 @@ static void au_xib_clear_bit(struct inode *inode)
/* ---------------------------------------------------------------------- */
/*
* truncate a xino bitmap file
*/
/* todo: slow */
static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
{
int err, bit;
ssize_t sz;
unsigned long pindex;
loff_t pos, pend;
struct au_sbinfo *sbinfo;
vfs_readf_t func;
ino_t *ino;
unsigned long *p;
err = 0;
sbinfo = au_sbi(sb);
MtxMustLock(&sbinfo->si_xib_mtx);
p = sbinfo->si_xib_buf;
func = sbinfo->si_xread;
pend = vfsub_f_size_read(file);
pos = 0;
while (pos < pend) {
sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
err = sz;
if (unlikely(sz <= 0))
goto out;
err = 0;
for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
if (unlikely(*ino < AUFS_FIRST_INO))
continue;
xib_calc_bit(*ino, &pindex, &bit);
AuDebugOn(page_bits <= bit);
err = xib_pindex(sb, pindex);
if (!err)
set_bit(bit, p);
else
goto out;
}
}
out:
return err;
}
static int xib_restore(struct super_block *sb)
{
int err, i;
unsigned int nfile;
aufs_bindex_t bindex, bbot;
void *page;
struct au_branch *br;
struct au_xino *xi;
struct file *file;
err = -ENOMEM;
page = (void *)__get_free_page(GFP_NOFS);
if (unlikely(!page))
goto out;
err = 0;
bbot = au_sbbot(sb);
for (bindex = 0; !err && bindex <= bbot; bindex++)
if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0) {
br = au_sbr(sb, bindex);
xi = br->br_xino;
nfile = xi->xi_nfile;
for (i = 0; i < nfile; i++) {
file = au_xino_file(xi, i);
if (file)
err = do_xib_restore(sb, file, page);
}
} else
AuDbg("skip shared b%d\n", bindex);
free_page((unsigned long)page);
out:
return err;
}
int au_xib_trunc(struct super_block *sb)
{
int err;
ssize_t sz;
loff_t pos;
struct au_sbinfo *sbinfo;
unsigned long *p;
struct file *file;
SiMustWriteLock(sb);
err = 0;
sbinfo = au_sbi(sb);
if (!au_opt_test(sbinfo->si_mntflags, XINO))
goto out;
file = sbinfo->si_xib;
if (vfsub_f_size_read(file) <= PAGE_SIZE)
goto out;
file = au_xino_create2(sb, &sbinfo->si_xib->f_path, NULL);
err = PTR_ERR(file);
if (IS_ERR(file))
goto out;
fput(sbinfo->si_xib);
sbinfo->si_xib = file;
p = sbinfo->si_xib_buf;
memset(p, 0, PAGE_SIZE);
pos = 0;
sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
if (unlikely(sz != PAGE_SIZE)) {
err = sz;
AuIOErr("err %d\n", err);
if (sz >= 0)
err = -EIO;
goto out;
}
mutex_lock(&sbinfo->si_xib_mtx);
/* mnt_want_write() is unnecessary here */
err = xib_restore(sb);
mutex_unlock(&sbinfo->si_xib_mtx);
out:
return err;
}
/* ---------------------------------------------------------------------- */
struct au_xino *au_xino_alloc(unsigned int nfile)
{
struct au_xino *xi;
......@@ -942,6 +1281,7 @@ struct au_xino *au_xino_alloc(unsigned int nfile)
init_waitqueue_head(&xi->xi_nondir.wqh);
mutex_init(&xi->xi_mtx);
INIT_HLIST_BL_HEAD(&xi->xi_writing);
atomic_set(&xi->xi_truncating, 0);
kref_init(&xi->xi_kref);
goto out; /* success */
......@@ -1407,6 +1747,7 @@ void au_xino_delete_inode(struct inode *inode, const int unlinked)
int err;
unsigned int mnt_flags;
aufs_bindex_t bindex, bbot, bi;
unsigned char try_trunc;
struct au_iinfo *iinfo;
struct super_block *sb;
struct au_hinode *hi;
......@@ -1433,6 +1774,7 @@ void au_xino_delete_inode(struct inode *inode, const int unlinked)
return;
xwrite = au_sbi(sb)->si_xwrite;
try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
hi = au_hinode(iinfo, bindex);
bbot = iinfo->ii_bbot;
for (; bindex <= bbot; bindex++, hi++) {
......@@ -1453,6 +1795,9 @@ void au_xino_delete_inode(struct inode *inode, const int unlinked)
continue;
err = au_xino_do_write(xwrite, file, &calc, /*ino*/0);
if (!err && try_trunc
&& au_test_fs_trunc_xino(au_br_sb(br)))
xino_try_trunc(sb, br);
}
}
......
......@@ -63,6 +63,8 @@ typedef int16_t aufs_bindex_t;
#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
#define AUFS_XINO_DEF_SEC 30 /* seconds */
#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
#define AUFS_WKQ_NAME AUFS_NAME "d"
/* branch permissions and attributes */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment