Commit 681a21c3 authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: ioctl, rdu (readdir in userspace)



For a directory which has millions of files, aufs VDIR consumes
much memory. In this case, RDU (readdir(3) in user-space) is definitely
better.
If you enable CONFIG_AUFS_RDU at compiling aufs, install libau.so from
aufs-util.git, and set some environment variables, then you can use this
feature. When readdir(3) in libau.so receives an aufs dir, it issues
ioctl(2) instead of regular readdir(3).
All merging and whiteout handling are done in userspace.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent 0d7d5cf2
......@@ -96,6 +96,18 @@ config AUFS_XATTR
branch attributes for EA.
See detail in aufs.5.
config AUFS_RDU
bool "Readdir in userspace"
help
Aufs has two methods to provide a merged view for a directory,
by a user-space library and by kernel-space natively. The latter
is always enabled but sometimes large and slow.
If you enable this option, install the library in aufs2-util
package, and set some environment variables for your readdir(3),
then the work will be handled in user-space which generally
shows better performance in most cases.
See detail in aufs.5.
config AUFS_DIRREN
bool "Workaround for rename(2)-ing a directory"
help
......
......@@ -30,5 +30,6 @@ aufs-$(CONFIG_AUFS_EXPORT) += export.o
aufs-$(CONFIG_AUFS_XATTR) += xattr.o
aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
aufs-$(CONFIG_AUFS_RDU) += rdu.o
aufs-$(CONFIG_AUFS_DEBUG) += debug.o
aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
......@@ -101,5 +101,21 @@ int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
/* ioctl.c */
long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_AUFS_RDU
/* rdu.c */
long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
#endif
#else
AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
unsigned int cmd, unsigned long arg)
#ifdef CONFIG_COMPAT
AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
unsigned int cmd, unsigned long arg)
#endif
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_DIR_H__ */
......@@ -111,6 +111,11 @@ long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
long err;
switch (cmd) {
case AUFS_CTL_RDU:
case AUFS_CTL_RDU_INO:
err = au_rdu_ioctl(file, cmd, arg);
break;
case AUFS_CTL_WBR_FD:
err = au_wbr_fd(&file->f_path, (void __user *)arg);
break;
......@@ -155,6 +160,11 @@ long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
long err;
switch (cmd) {
case AUFS_CTL_RDU:
case AUFS_CTL_RDU_INO:
err = au_rdu_compat_ioctl(file, cmd, arg);
break;
case AUFS_CTL_BRINFO:
err = au_brinfo_compat_ioctl(file, arg);
break;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2019 Junjiro R. Okajima
*/
/*
* readdir in userspace.
*/
#include <linux/compat.h>
#include <linux/fs_stack.h>
#include <linux/security.h>
#include "aufs.h"
/* bits for struct aufs_rdu.flags */
#define AuRdu_CALLED 1
#define AuRdu_CONT (1 << 1)
#define AuRdu_FULL (1 << 2)
#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
#define au_fset_rdu(flags, name) \
do { (flags) |= AuRdu_##name; } while (0)
#define au_fclr_rdu(flags, name) \
do { (flags) &= ~AuRdu_##name; } while (0)
struct au_rdu_arg {
struct dir_context ctx;
struct aufs_rdu *rdu;
union au_rdu_ent_ul ent;
unsigned long end;
struct super_block *sb;
int err;
};
static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
loff_t offset, u64 h_ino, unsigned int d_type)
{
int err, len;
struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
struct aufs_rdu *rdu = arg->rdu;
struct au_rdu_ent ent;
err = 0;
arg->err = 0;
au_fset_rdu(rdu->cookie.flags, CALLED);
len = au_rdu_len(nlen);
if (arg->ent.ul + len < arg->end) {
ent.ino = h_ino;
ent.bindex = rdu->cookie.bindex;
ent.type = d_type;
ent.nlen = nlen;
if (unlikely(nlen > AUFS_MAX_NAMELEN))
ent.type = DT_UNKNOWN;
/* unnecessary to support mmap_sem since this is a dir */
err = -EFAULT;
if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
goto out;
if (copy_to_user(arg->ent.e->name, name, nlen))
goto out;
/* the terminating NULL */
if (__put_user(0, arg->ent.e->name + nlen))
goto out;
err = 0;
/* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
arg->ent.ul += len;
rdu->rent++;
} else {
err = -EFAULT;
au_fset_rdu(rdu->cookie.flags, FULL);
rdu->full = 1;
rdu->tail = arg->ent;
}
out:
/* AuTraceErr(err); */
return err;
}
static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
{
int err;
loff_t offset;
struct au_rdu_cookie *cookie = &arg->rdu->cookie;
/* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
err = offset;
if (unlikely(offset != cookie->h_pos))
goto out;
err = 0;
do {
arg->err = 0;
au_fclr_rdu(cookie->flags, CALLED);
/* smp_mb(); */
err = vfsub_iterate_dir(h_file, &arg->ctx);
if (err >= 0)
err = arg->err;
} while (!err
&& au_ftest_rdu(cookie->flags, CALLED)
&& !au_ftest_rdu(cookie->flags, FULL));
cookie->h_pos = h_file->f_pos;
out:
AuTraceErr(err);
return err;
}
static int au_rdu(struct file *file, struct aufs_rdu *rdu)
{
int err;
aufs_bindex_t bbot;
struct au_rdu_arg arg = {
.ctx = {
.actor = au_rdu_fill
}
};
struct dentry *dentry;
struct inode *inode;
struct file *h_file;
struct au_rdu_cookie *cookie = &rdu->cookie;
/* VERIFY_WRITE */
err = !access_ok(rdu->ent.e, rdu->sz);
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out;
}
rdu->rent = 0;
rdu->tail = rdu->ent;
rdu->full = 0;
arg.rdu = rdu;
arg.ent = rdu->ent;
arg.end = arg.ent.ul;
arg.end += rdu->sz;
err = -ENOTDIR;
if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
goto out;
err = security_file_permission(file, MAY_READ);
AuTraceErr(err);
if (unlikely(err))
goto out;
dentry = file->f_path.dentry;
inode = d_inode(dentry);
inode_lock_shared(inode);
arg.sb = inode->i_sb;
err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out_mtx;
err = au_alive_dir(dentry);
if (unlikely(err))
goto out_si;
/* todo: reval? */
fi_read_lock(file);
err = -EAGAIN;
if (unlikely(au_ftest_rdu(cookie->flags, CONT)
&& cookie->generation != au_figen(file)))
goto out_unlock;
err = 0;
if (!rdu->blk) {
rdu->blk = au_sbi(arg.sb)->si_rdblk;
if (!rdu->blk)
rdu->blk = au_dir_size(file, /*dentry*/NULL);
}
bbot = au_fbtop(file);
if (cookie->bindex < bbot)
cookie->bindex = bbot;
bbot = au_fbbot_dir(file);
/* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
for (; !err && cookie->bindex <= bbot;
cookie->bindex++, cookie->h_pos = 0) {
h_file = au_hf_dir(file, cookie->bindex);
if (!h_file)
continue;
au_fclr_rdu(cookie->flags, FULL);
err = au_rdu_do(h_file, &arg);
AuTraceErr(err);
if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
break;
}
AuDbg("rent %llu\n", rdu->rent);
if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
au_fset_rdu(cookie->flags, CONT);
cookie->generation = au_figen(file);
}
ii_read_lock_child(inode);
fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
ii_read_unlock(inode);
out_unlock:
fi_read_unlock(file);
out_si:
si_read_unlock(arg.sb);
out_mtx:
inode_unlock_shared(inode);
out:
AuTraceErr(err);
return err;
}
static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
{
int err;
ino_t ino;
unsigned long long nent;
union au_rdu_ent_ul *u;
struct au_rdu_ent ent;
struct super_block *sb;
err = 0;
nent = rdu->nent;
u = &rdu->ent;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH);
while (nent-- > 0) {
/* unnecessary to support mmap_sem since this is a dir */
err = copy_from_user(&ent, u->e, sizeof(ent));
if (!err)
/* VERIFY_WRITE */
err = !access_ok(&u->e->ino, sizeof(ino));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
break;
}
/* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
if (!ent.wh)
err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
else
err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
&ino);
if (unlikely(err)) {
AuTraceErr(err);
break;
}
err = __put_user(ino, &u->e->ino);
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
break;
}
u->ul += au_rdu_len(ent.nlen);
}
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
static int au_rdu_verify(struct aufs_rdu *rdu)
{
AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
"%llu, b%d, 0x%x, g%u}\n",
rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
rdu->blk,
rdu->rent, rdu->shwh, rdu->full,
rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
rdu->cookie.generation);
if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
return 0;
AuDbg("%u:%u\n",
rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
return -EINVAL;
}
long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long err, e;
struct aufs_rdu rdu;
void __user *p = (void __user *)arg;
err = copy_from_user(&rdu, p, sizeof(rdu));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out;
}
err = au_rdu_verify(&rdu);
if (unlikely(err))
goto out;
switch (cmd) {
case AUFS_CTL_RDU:
err = au_rdu(file, &rdu);
if (unlikely(err))
break;
e = copy_to_user(p, &rdu, sizeof(rdu));
if (unlikely(e)) {
err = -EFAULT;
AuTraceErr(err);
}
break;
case AUFS_CTL_RDU_INO:
err = au_rdu_ino(file, &rdu);
break;
default:
/* err = -ENOTTY; */
err = -EINVAL;
}
out:
AuTraceErr(err);
return err;
}
#ifdef CONFIG_COMPAT
long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long err, e;
struct aufs_rdu rdu;
void __user *p = compat_ptr(arg);
/* todo: get_user()? */
err = copy_from_user(&rdu, p, sizeof(rdu));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out;
}
rdu.ent.e = compat_ptr(rdu.ent.ul);
err = au_rdu_verify(&rdu);
if (unlikely(err))
goto out;
switch (cmd) {
case AUFS_CTL_RDU:
err = au_rdu(file, &rdu);
if (unlikely(err))
break;
rdu.ent.ul = ptr_to_compat(rdu.ent.e);
rdu.tail.ul = ptr_to_compat(rdu.tail.e);
e = copy_to_user(p, &rdu, sizeof(rdu));
if (unlikely(e)) {
err = -EFAULT;
AuTraceErr(err);
}
break;
case AUFS_CTL_RDU_INO:
err = au_rdu_ino(file, &rdu);
break;
default:
/* err = -ENOTTY; */
err = -EINVAL;
}
out:
AuTraceErr(err);
return err;
}
#endif
......@@ -193,6 +193,10 @@ static inline int au_br_wh_linkable(int brperm)
/* ioctl */
enum {
/* readdir in userspace */
AuCtl_RDU,
AuCtl_RDU_INO,
AuCtl_WBR_FD, /* pathconf wrapper */
AuCtl_BR /* info about branches */
};
......@@ -214,6 +218,62 @@ enum {
#endif
#endif
struct au_rdu_cookie {
uint64_t h_pos;
int16_t bindex;
uint8_t flags;
uint8_t pad;
uint32_t generation;
} __aligned(8);
struct au_rdu_ent {
uint64_t ino;
int16_t bindex;
uint8_t type;
uint8_t nlen;
uint8_t wh;
char name[0];
} __aligned(8);
static inline int au_rdu_len(int nlen)
{
/* include the terminating NULL */
return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
sizeof(uint64_t));
}
union au_rdu_ent_ul {
struct au_rdu_ent __user *e;
uint64_t ul;
};
enum {
AufsCtlRduV_SZ,
AufsCtlRduV_End
};
struct aufs_rdu {
/* input */
union {
uint64_t sz; /* AuCtl_RDU */
uint64_t nent; /* AuCtl_RDU_INO */
};
union au_rdu_ent_ul ent;
uint16_t verify[AufsCtlRduV_End];
/* input/output */
uint32_t blk;
/* output */
union au_rdu_ent_ul tail;
/* number of entries which were added in a single call */
uint64_t rent;
uint8_t full;
uint8_t shwh;
struct au_rdu_cookie cookie;
} __aligned(8);
/* ---------------------------------------------------------------------- */
/* dirren. the branch is identified by the filename who contains this */
......@@ -257,6 +317,8 @@ union aufs_brinfo {
/* ---------------------------------------------------------------------- */
#define AuCtlType 'A'
#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
struct aufs_wbr_fd)
#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment