Commit 08ecca6f authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: fuse branch (including poll(2))



Fuse doesn't want the callers to access the inode attributes without
issuing stat, and it is not assured that they are valid after lookup or
iget().
The inode attribute is critical for aufs, and aufs decided to call stat
every time for fuse.
Of course, it makes aufs slow. But when the branch fs is not fuse, stat
is not called.

Currently, only FUSE implements ->poll(), and aufs supports it.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent ae320f04
......@@ -151,6 +151,21 @@ config AUFS_BR_RAMFS
Unless you are going to use ramfs as an aufs branch fs without
switch_root or something, leave it N.
config AUFS_BR_FUSE
bool "Fuse fs as an aufs branch"
depends on FUSE_FS
select AUFS_POLL
help
If you want to use fuse-based userspace filesystem as an aufs
branch fs, then enable this option.
It implements the internal poll(2) operation which is
implemented by fuse only (curretnly).
config AUFS_POLL
bool
help
Automatic configuration for internal use.
config AUFS_BR_HFSPLUS
bool "Hfsplus as an aufs branch"
depends on HFSPLUS_FS
......
......@@ -32,6 +32,7 @@ aufs-$(CONFIG_AUFS_XATTR) += xattr.o
aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
aufs-$(CONFIG_AUFS_POLL) += poll.o
aufs-$(CONFIG_AUFS_RDU) += rdu.o
aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
aufs-$(CONFIG_AUFS_DEBUG) += debug.o
......
......@@ -349,8 +349,9 @@ static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
if (d_is_positive(h_dentry)) {
h_inode = d_inode(h_dentry);
au_iattr_save(&ia, h_inode);
} else if (au_test_nfs(h_sb))
} else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
/* nfs d_revalidate may return 0 for negative dentry */
/* fuse d_revalidate always return 0 for negative dentry */
goto out;
/* main purpose is namei.c:cached_lookup() and d_revalidate */
......
......@@ -257,6 +257,9 @@ static int do_open_dir(struct file *file, int flags, struct file *h_file)
if (!h_dentry)
continue;
err = vfsub_test_mntns(mnt, h_dentry->d_sb);
if (unlikely(err))
break;
h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
if (IS_ERR(h_file)) {
err = PTR_ERR(h_file);
......
......@@ -17,7 +17,7 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
{
int err;
aufs_bindex_t bindex;
struct dentry *dentry;
struct dentry *dentry, *h_dentry;
struct au_finfo *finfo;
struct inode *h_inode;
......@@ -30,27 +30,38 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
atomic_set(&finfo->fi_mmapped, 0);
bindex = au_dbtop(dentry);
if (!h_file)
if (!h_file) {
h_dentry = au_h_dptr(dentry, bindex);
err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
if (unlikely(err))
goto out;
h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
else
get_file(h_file);
if (IS_ERR(h_file))
err = PTR_ERR(h_file);
else {
if ((flags & __O_TMPFILE)
&& !(flags & O_EXCL)) {
h_inode = file_inode(h_file);
spin_lock(&h_inode->i_lock);
h_inode->i_state |= I_LINKABLE;
spin_unlock(&h_inode->i_lock);
if (IS_ERR(h_file)) {
err = PTR_ERR(h_file);
goto out;
}
au_set_fbtop(file, bindex);
au_set_h_fptr(file, bindex, h_file);
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
} else {
h_dentry = h_file->f_path.dentry;
err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
if (unlikely(err))
goto out;
/* br ref is already inc-ed */
}
if ((flags & __O_TMPFILE)
&& !(flags & O_EXCL)) {
h_inode = file_inode(h_file);
spin_lock(&h_inode->i_lock);
h_inode->i_state |= I_LINKABLE;
spin_unlock(&h_inode->i_lock);
}
au_set_fbtop(file, bindex);
au_set_h_fptr(file, bindex, h_file);
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
out:
return err;
}
......@@ -769,6 +780,9 @@ const struct file_operations aufs_file_fop = {
.read_iter = aufs_read_iter,
.write_iter = aufs_write_iter,
#ifdef CONFIG_AUFS_POLL
.poll = aufs_poll,
#endif
.unlocked_ioctl = aufs_ioctl_nondir,
#ifdef CONFIG_COMPAT
.compat_ioctl = aufs_compat_ioctl_nondir,
......
......@@ -15,6 +15,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/poll.h>
#include "rwsem.h"
struct au_branch;
......@@ -78,6 +79,11 @@ int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
int au_do_flush(struct file *file, fl_owner_t id,
int (*flush)(struct file *file, fl_owner_t id));
/* poll.c */
#ifdef CONFIG_AUFS_POLL
__poll_t aufs_poll(struct file *file, struct poll_table_struct *pt);
#endif
#ifdef CONFIG_AUFS_BR_HFSPLUS
/* hfsplus.c */
struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
......
......@@ -62,6 +62,15 @@ static inline int au_test_nfs(struct super_block *sb __maybe_unused)
#endif
}
static inline int au_test_fuse(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_FUSE_FS)
return sb->s_magic == FUSE_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_xfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_XFS_FS)
......@@ -262,6 +271,20 @@ static inline int au_test_fs_remote(struct super_block *sb)
* filesystems under linux/fs. it means we have to do so in every update...
*/
/*
* some filesystems require getattr to refresh the inode attributes before
* referencing.
* in most cases, we can rely on the inode attribute in NFS (or every remote fs)
* and leave the work for d_revalidate()
*/
static inline int au_test_fs_refresh_iattr(struct super_block *sb)
{
return au_test_nfs(sb)
|| au_test_fuse(sb)
/* || au_test_btrfs(sb) */ /* untested */
;
}
/*
* filesystems which don't maintain i_size or i_blocks.
*/
......@@ -304,6 +327,7 @@ static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
static inline int au_test_fs_notime(struct super_block *sb)
{
return au_test_nfs(sb)
|| au_test_fuse(sb)
|| au_test_ubifs(sb)
;
}
......
......@@ -354,9 +354,11 @@ int aufs_unlink(struct inode *dir, struct dentry *dentry)
epilog(dir, dentry, bindex);
/* update target timestamps */
if (bindex == btop)
if (bindex == btop) {
vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
/*ignore*/
inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
else
} else
/* todo: this timestamp may be reverted later */
inode->i_ctime = h_dir->i_ctime;
goto out_unpin; /* success */
......
......@@ -420,8 +420,15 @@ static int do_rename(struct au_ren_args *a)
goto out_rename;
/* update target timestamps */
if (a->exchange) {
AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
}
AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
if (!a->exchange) {
......
# SPDX-License-Identifier: GPL-2.0
# defined in ${srctree}/fs/fuse/inode.c
# tristate
ifdef CONFIG_FUSE_FS
ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
endif
# defined in ${srctree}/fs/xfs/xfs_sb.h
# tristate
ifdef CONFIG_XFS_FS
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2019 Junjiro R. Okajima
*/
/*
* poll operation
* There is only one filesystem which implements ->poll operation, currently.
*/
#include "aufs.h"
__poll_t aufs_poll(struct file *file, struct poll_table_struct *pt)
{
__poll_t mask;
struct file *h_file;
struct super_block *sb;
/* We should pretend an error happened. */
mask = EPOLLERR /* | EPOLLIN | EPOLLOUT */;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
if (IS_ERR(h_file)) {
AuDbg("h_file %ld\n", PTR_ERR(h_file));
goto out;
}
mask = vfs_poll(h_file, pt);
fput(h_file); /* instead of au_read_post() */
out:
si_read_unlock(sb);
if (mask & EPOLLERR)
AuDbg("mask 0x%x\n", mask);
return mask;
}
......@@ -7,11 +7,23 @@
* sub-routines for VFS
*/
#include <linux/mnt_namespace.h>
#include <linux/namei.h>
#include <linux/nsproxy.h>
#include <linux/security.h>
#include <linux/splice.h>
#include "aufs.h"
#ifdef CONFIG_AUFS_BR_FUSE
int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
{
if (!au_test_fuse(h_sb) || !au_userns)
return 0;
return is_current_mnt_ns(mnt) ? 0 : -EACCES;
}
#endif
int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
{
int err;
......@@ -27,6 +39,28 @@ int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
/* ---------------------------------------------------------------------- */
int vfsub_update_h_iattr(struct path *h_path, int *did)
{
int err;
struct kstat st;
struct super_block *h_sb;
/* for remote fs, leave work for its getattr or d_revalidate */
/* for bad i_attr fs, handle them in aufs_getattr() */
/* still some fs may acquire i_mutex. we need to skip them */
err = 0;
if (!did)
did = &err;
h_sb = h_path->dentry->d_sb;
*did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
if (*did)
err = vfsub_getattr(h_path, &st);
return err;
}
/* ---------------------------------------------------------------------- */
struct file *vfsub_dentry_open(struct path *path, int flags)
{
struct file *file;
......@@ -49,7 +83,11 @@ struct file *vfsub_filp_open(const char *path, int oflags, int mode)
oflags /* | __FMODE_NONOTIFY */,
mode);
lockdep_on();
if (IS_ERR(file))
goto out;
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
out:
return file;
}
......@@ -116,7 +154,8 @@ int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
int err;
err = kern_path(name, flags, path);
/* add more later */
if (!err && d_is_positive(path->dentry))
vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
return err;
}
......@@ -128,6 +167,12 @@ struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
};
path.dentry = lookup_one_len_unlocked(name, parent, len);
if (IS_ERR(path.dentry))
goto out;
if (d_is_positive(path.dentry))
vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
out:
AuTraceErrPtr(path.dentry);
return path.dentry;
}
......@@ -145,6 +190,8 @@ struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
path.dentry = lookup_one_len(name, parent, len);
if (IS_ERR(path.dentry))
goto out;
if (d_is_positive(path.dentry))
vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
out:
AuTraceErrPtr(path.dentry);
......@@ -204,6 +251,17 @@ int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
lockdep_off();
err = vfs_create(dir, path->dentry, mode, want_excl);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
......@@ -226,6 +284,17 @@ int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
lockdep_off();
err = vfs_symlink(dir, path->dentry, symname);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
......@@ -248,6 +317,17 @@ int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
lockdep_off();
err = vfs_mknod(dir, path->dentry, mode, dev);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
......@@ -286,6 +366,20 @@ int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
lockdep_off();
err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
/* fuse has different memory inode for the same inumber */
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
tmp.dentry = src_dentry;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
......@@ -316,6 +410,19 @@ int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
delegated_inode, flags);
lockdep_on();
if (!err) {
int did;
tmp.dentry = d->d_parent;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = src_dentry;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
tmp.dentry = src_dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
......@@ -338,6 +445,17 @@ int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
lockdep_off();
err = vfs_mkdir(dir, path->dentry, mode);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
......@@ -360,6 +478,14 @@ int vfsub_rmdir(struct inode *dir, struct path *path)
lockdep_off();
err = vfs_rmdir(dir, path->dentry);
lockdep_on();
if (!err) {
struct path tmp = {
.dentry = path->dentry->d_parent,
.mnt = path->mnt
};
vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
}
out:
return err;
......@@ -376,6 +502,8 @@ ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
lockdep_off();
err = vfs_read(file, ubuf, count, ppos);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
return err;
}
......@@ -406,6 +534,8 @@ ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
lockdep_off();
err = vfs_write(file, ubuf, count, ppos);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
return err;
}
......@@ -439,6 +569,9 @@ int vfsub_flush(struct file *file, fl_owner_t id)
err = file->f_op->flush(file, id);
lockdep_on();
}
if (!err)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
/*ignore*/
}
return err;
}
......@@ -452,6 +585,8 @@ int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
lockdep_off();
err = iterate_dir(file, ctx);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
return err;
}
......@@ -466,6 +601,8 @@ long vfsub_splice_to(struct file *in, loff_t *ppos,
err = do_splice_to(in, ppos, pipe, len, flags);
lockdep_on();
file_accessed(in);
if (err >= 0)
vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
return err;
}
......@@ -477,6 +614,8 @@ long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
lockdep_off();
err = do_splice_from(pipe, out, ppos, len, flags);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
return err;
}
......@@ -488,6 +627,13 @@ int vfsub_fsync(struct file *file, struct path *path, int datasync)
lockdep_off();
err = vfs_fsync(file, datasync);
lockdep_on();
if (!err) {
if (!path) {
AuDebugOn(!file);
path = &file->f_path;
}
vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
}
return err;
}
......@@ -622,6 +768,8 @@ static void call_notify_change(void *args)
*a->errp = notify_change(a->path->dentry, a->ia,
a->delegated_inode);
lockdep_on();
if (!*a->errp)
vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
}
AuTraceErr(*a->errp);
}
......@@ -696,6 +844,13 @@ static void call_unlink(void *args)
lockdep_off();
*a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
lockdep_on();
if (!*a->errp) {
struct path tmp = {
.dentry = d->d_parent,
.mnt = a->path->mnt
};
vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
}
if (!stop_sillyrename)
dput(d);
......
......@@ -65,10 +65,17 @@ static inline int vfsub_native_ro(struct inode *inode)
|| IS_IMMUTABLE(inode);
}
#ifdef CONFIG_AUFS_BR_FUSE
int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
#else
AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
#endif
int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
/* ---------------------------------------------------------------------- */
int vfsub_update_h_iattr(struct path *h_path, int *did);
struct file *vfsub_dentry_open(struct path *path, int flags);
struct file *vfsub_filp_open(const char *path, int oflags, int mode);
struct au_branch;
......@@ -202,6 +209,7 @@ static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
.mnt = h_mnt
};
touch_atime(&h_path);
vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
}
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment