Commit 9dc58c2b authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: hnotify 1/3, headers



This is the hardest test to support UDBA (users' direct branch access).
It uses 'fsnotify' internally.  Detecting UDBA, decrements the
generation of the cached aufs objects.  In the next access to the file,
aufs detects the generation is obsoleted and tries refreshing it.
Eventually aufs cache will be updated to latest status.

The fsnotify is set on the cached dirs on the non-RR branches.
The RR (real readonly) branches will never be modified and it is
unnecessary to set fsnotify for them.

This commit is for the declarations mainly, and the body parts will be
in succeeding commits.

This feature is compiled only when CONFIG_AUFS_HNOTIFY is enabled.
See also the document in this commit.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent a030bc2a
......@@ -15,3 +15,57 @@ When a branch is a remote filesystem, aufs basically relies upon its
them.
For d_revalidate, aufs implements three levels of revalidate tests. See
"Revalidate Dentry and UDBA" in detail.
Revalidate Dentry and UDBA (User's Direct Branch Access)
----------------------------------------------------------------------
Generally VFS helpers re-validate a dentry as a part of lookup.
0. digging down the directory hierarchy.
1. lock the parent dir by its i_mutex.
2. lookup the final (child) entry.
3. revalidate it.
4. call the actual operation (create, unlink, etc.)
5. unlock the parent dir
If the filesystem implements its ->d_revalidate() (step 3), then it is
called. Actually aufs implements it and checks the dentry on a branch is
still valid.
But it is not enough. Because aufs has to release the lock for the
parent dir on a branch at the end of ->lookup() (step 2) and
->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
held by VFS.
If the file on a branch is changed directly, eg. bypassing aufs, after
aufs released the lock, then the subsequent operation may cause
something unpleasant result.
This situation is a result of VFS architecture, ->lookup() and
->d_revalidate() is separated. But I never say it is wrong. It is a good
design from VFS's point of view. It is just not suitable for sub-VFS
character in aufs.
Aufs supports such case by three level of revalidation which is
selectable by user.
1. Simple Revalidate
Addition to the native flow in VFS's, confirm the child-parent
relationship on the branch just after locking the parent dir on the
branch in the "actual operation" (step 4). When this validation
fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
checks the validation of the dentry on branches.
2. Monitor Changes Internally by Inotify/Fsnotify
Addition to above, in the "actual operation" (step 4) aufs re-lookup
the dentry on the branch, and returns EBUSY if it finds different
dentry.
Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
during it is in cache. When the event is notified, aufs registers a
function to kernel 'events' thread by schedule_work(). And the
function sets some special status to the cached aufs dentry and inode
private data. If they are not cached, then aufs has nothing to
do. When the same file is accessed through aufs (step 0-3) later,
aufs will detect the status and refresh all necessary data.
In this mode, aufs has to ignore the event which is fired by aufs
itself.
3. No Extra Validation
This is the simplest test and doesn't add any additional revalidation
test, and skip the revalidation in step 4. It is useful and improves
aufs performance when system surely hide the aufs branches from user,
by over-mounting something (or another method).
......@@ -52,6 +52,24 @@ config AUFS_SBILIST
Automatic configuration for internal use.
When aufs supports Magic SysRq or /proc, enabled automatically.
config AUFS_HNOTIFY
bool "Detect direct branch access (bypassing aufs)"
help
If you want to modify files on branches directly, eg. bypassing aufs,
and want aufs to detect the changes of them fully, then enable this
option and use 'udba=notify' mount option.
Currently there is only one available configuration, "fsnotify".
It will have a negative impact to the performance.
See detail in aufs.5.
choice
prompt "method" if AUFS_HNOTIFY
default AUFS_HFSNOTIFY
config AUFS_HFSNOTIFY
bool "fsnotify"
select FSNOTIFY
endchoice
config AUFS_EXPORT
bool "NFS-exportable aufs"
depends on EXPORTFS = y
......
......@@ -59,6 +59,13 @@ struct au_wbr {
/* ext2 has 3 types of operations at least, ext3 has 4 */
#define AuBrDynOp (AuDyLast * 4)
#ifdef CONFIG_AUFS_HFSNOTIFY
/* support for asynchronous destruction */
struct au_br_hfsnotify {
struct fsnotify_group *hfsn_group;
};
#endif
/* sysfs entries */
struct au_brsysfs {
char name[16];
......@@ -86,6 +93,10 @@ struct au_branch {
struct au_wbr *br_wbr;
#ifdef CONFIG_AUFS_HFSNOTIFY
struct au_br_hfsnotify *br_hfsn;
#endif
#ifdef CONFIG_SYSFS
/* entries under sysfs per mount-point */
struct au_brsysfs br_sysfs[AuBrSysfs_Last];
......@@ -116,6 +127,15 @@ static inline int au_br_rdonly(struct au_branch *br)
? -EROFS : 0;
}
static inline int au_br_hnotifyable(int brperm __maybe_unused)
{
#ifdef CONFIG_AUFS_HNOTIFY
return !(brperm & AuBrPerm_RR);
#else
return 0;
#endif
}
static inline int au_br_test_oflag(int oflag, struct au_branch *br)
{
int err, exec_flag;
......
......@@ -214,5 +214,21 @@ static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
au_di(dentry)->di_bdiropq = bindex;
}
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_HNOTIFY
static inline void au_digen_dec(struct dentry *d)
{
atomic_dec(&au_di(d)->di_generation);
}
static inline void au_hn_di_reinit(struct dentry *dentry)
{
dentry->d_fsdata = NULL;
}
#else
AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
#endif /* CONFIG_AUFS_HNOTIFY */
#endif /* __KERNEL__ */
#endif /* __AUFS_DENTRY_H__ */
......@@ -10,7 +10,6 @@
#ifdef CONFIG_AUFS_DEBUG
#include <linux/migrate.h>
#endif
#include <linux/fsnotify.h>
#include <linux/pagemap.h>
#include "aufs.h"
......
......@@ -7,7 +7,6 @@
* inode functions
*/
#include <linux/cred.h>
#include "aufs.h"
struct inode *au_igrab(struct inode *inode)
......
......@@ -12,14 +12,28 @@
#ifdef __KERNEL__
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include "rwsem.h"
struct vfsmount;
struct au_hnotify {
#ifdef CONFIG_AUFS_HNOTIFY
#ifdef CONFIG_AUFS_HFSNOTIFY
/* never use fsnotify_add_vfsmount_mark() */
struct fsnotify_mark hn_mark;
#endif
struct inode *hn_aufs_inode; /* no get/put */
struct rcu_head rcu;
#endif
} ____cacheline_aligned_in_smp;
struct au_hinode {
struct inode *hi_inode;
aufs_bindex_t hi_id;
#ifdef CONFIG_AUFS_HNOTIFY
struct au_hnotify *hi_notify;
#endif
/* reference to the copied-up whiteout with get/put */
struct dentry *hi_whdentry;
......@@ -116,12 +130,18 @@ unsigned int au_hi_flags(struct inode *inode, int isdir);
/* hinode flags */
#define AuHi_XINO 1
#define AuHi_HNOTIFY (1 << 1)
#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
#define au_fset_hi(flags, name) \
do { (flags) |= AuHi_##name; } while (0)
#define au_fclr_hi(flags, name) \
do { (flags) &= ~AuHi_##name; } while (0)
#ifndef CONFIG_AUFS_HNOTIFY
#undef AuHi_HNOTIFY
#define AuHi_HNOTIFY 0
#endif
void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
struct inode *h_inode, unsigned int flags);
......@@ -388,5 +408,115 @@ static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
}
#endif
/* ---------------------------------------------------------------------- */
struct au_branch;
#ifdef CONFIG_AUFS_HNOTIFY
struct au_hnotify_op {
void (*ctl)(struct au_hinode *hinode, int do_set);
int (*alloc)(struct au_hinode *hinode);
/*
* if it returns true, the the caller should free hinode->hi_notify,
* otherwise ->free() frees it.
*/
int (*free)(struct au_hinode *hinode,
struct au_hnotify *hn) __must_check;
void (*fin)(void);
int (*init)(void);
int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
void (*fin_br)(struct au_branch *br);
int (*init_br)(struct au_branch *br, int perm);
};
/* hnotify.c */
int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
void au_hn_free(struct au_hinode *hinode);
void au_hn_ctl(struct au_hinode *hinode, int do_set);
void au_hn_reset(struct inode *inode, unsigned int flags);
int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
struct qstr *h_child_qstr, struct inode *h_child_inode);
int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
int au_hnotify_init_br(struct au_branch *br, int perm);
void au_hnotify_fin_br(struct au_branch *br);
int __init au_hnotify_init(void);
void au_hnotify_fin(void);
/* hfsnotify.c */
extern const struct au_hnotify_op au_hnotify_op;
static inline
void au_hn_init(struct au_hinode *hinode)
{
hinode->hi_notify = NULL;
}
static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
{
return hinode->hi_notify;
}
#else
AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
struct au_hinode *hinode __maybe_unused,
struct inode *inode __maybe_unused)
AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
int do_set __maybe_unused)
AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
unsigned int flags __maybe_unused)
AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
struct au_branch *br __maybe_unused,
int perm __maybe_unused)
AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
int perm __maybe_unused)
AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
AuStubInt0(__init au_hnotify_init, void)
AuStubVoid(au_hnotify_fin, void)
AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
#endif /* CONFIG_AUFS_HNOTIFY */
static inline void au_hn_suspend(struct au_hinode *hdir)
{
au_hn_ctl(hdir, /*do_set*/0);
}
static inline void au_hn_resume(struct au_hinode *hdir)
{
au_hn_ctl(hdir, /*do_set*/1);
}
static inline void au_hn_inode_lock(struct au_hinode *hdir)
{
inode_lock(hdir->hi_inode);
au_hn_suspend(hdir);
}
static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
unsigned int sc __maybe_unused)
{
inode_lock_nested(hdir->hi_inode, sc);
au_hn_suspend(hdir);
}
#if 0 /* unused */
#include "vfsub.h"
static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir,
unsigned int sc)
{
inode_lock_shared_nested(hdir->hi_inode, sc);
au_hn_suspend(hdir);
}
#endif
static inline void au_hn_inode_unlock(struct au_hinode *hdir)
{
au_hn_resume(hdir);
inode_unlock(hdir->hi_inode);
}
#endif /* __KERNEL__ */
#endif /* __AUFS_INODE_H__ */
......@@ -104,6 +104,7 @@ AuStubVoid(au_procfs_fin, void);
enum {
AuCache_DINFO,
AuCache_ICNTNR,
AuCache_HNOTIFY, /* must be last */
AuCache_Last
};
......@@ -135,6 +136,9 @@ extern struct kmem_cache *au_cache[AuCache_Last];
AuCacheFuncs(dinfo, DINFO);
AuCacheFuncs(icntnr, ICNTNR);
#ifdef CONFIG_AUFS_HNOTIFY
AuCacheFuncs(hnotify, HNOTIFY);
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_MODULE_H__ */
......@@ -24,14 +24,21 @@ struct file;
#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
#define AuOpt_UDBA_REVAL (1 << 3)
#define AuOpt_UDBA_HNOTIFY (1 << 4)
#define AuOpt_PLINK (1 << 6) /* pseudo-link */
#define AuOpt_DIO (1 << 14) /* direct io */
#ifndef CONFIG_AUFS_HNOTIFY
#undef AuOpt_UDBA_HNOTIFY
#define AuOpt_UDBA_HNOTIFY 0
#endif
#define AuOpt_Def (AuOpt_XINO \
| AuOpt_UDBA_REVAL \
| AuOpt_PLINK)
#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
| AuOpt_UDBA_REVAL)
| AuOpt_UDBA_REVAL \
| AuOpt_UDBA_HNOTIFY)
#define au_opt_test(flags, name) (flags & AuOpt_##name)
#define au_opt_set(flags, name) do { \
......
......@@ -7,7 +7,6 @@
* pseudo-link
*/
#include <linux/cred.h>
#include "aufs.h"
/*
......
......@@ -8,7 +8,6 @@
*/
#include <linux/namei.h>
#include <linux/cred.h>
#include <linux/security.h>
#include <linux/uaccess.h>
#include "aufs.h"
......
......@@ -7,7 +7,6 @@
* whiteout for logical deletion and opaque directory
*/
#include <linux/cred.h>
#include "aufs.h"
#define WH_MASK 0444
......
......@@ -9,7 +9,6 @@
*/
#include <linux/module.h>
#include <linux/sched/signal.h>
#include "aufs.h"
/* internal workqueue named AUFS_WKQ_NAME */
......
......@@ -22,7 +22,6 @@
* + remount with xino/noxino options
*/
#include <linux/sched/signal.h>
#include <linux/seq_file.h>
#include <linux/statfs.h>
#include <linux/uaccess.h>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment