Commit c7ae8357 authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: pseudo-link and procfs support



Aufs pseudo-link (plink) represents a virtual hardlink across the
branches. To implement the plink maintenance mode, aufs uses procfs.
See also the document in this commit.

There is an external user-space utility called 'auplink' in
aufs-util.git, which has these features.
- 'list' shows the pseudo-linked inode numbers and filenames.
- 'cpup' copies-up all pseudo-link to the writable branch.
- 'flush' calls 'cpup', and then 'mount -o remount,clean_plink=inum'
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent a35f55f4
...@@ -90,6 +90,31 @@ dynamically, a branch has its own id. When the branch order changes, ...@@ -90,6 +90,31 @@ dynamically, a branch has its own id. When the branch order changes,
aufs finds the new index by searching the branch id. aufs finds the new index by searching the branch id.
Pseudo-link
----------------------------------------------------------------------
Assume "fileA" exists on the lower readonly branch only and it is
hardlinked to "fileB" on the branch. When you write something to fileA,
aufs copies-up it to the upper writable branch. Additionally aufs
creates a hardlink under the Pseudo-link Directory of the writable
branch. The inode of a pseudo-link is kept in aufs super_block as a
simple list. If fileB is read after unlinking fileA, aufs returns
filedata from the pseudo-link instead of the lower readonly
branch. Because the pseudo-link is based upon the inode, to keep the
inode number by xino (see above) is essentially necessary.
All the hardlinks under the Pseudo-link Directory of the writable branch
should be restored in a proper location later. Aufs provides a utility
to do this. The userspace helpers executed at remounting and unmounting
aufs by default.
During this utility is running, it puts aufs into the pseudo-link
maintenance mode. In this mode, only the process which began the
maintenance mode (and its child processes) is allowed to operate in
aufs. Some other processes which are not related to the pseudo-link will
be allowed to run too, but the rest have to return an error or wait
until the maintenance mode ends. If a process already acquires an inode
mutex (in VFS), it has to return an error.
XIB(external inode number bitmap) XIB(external inode number bitmap)
---------------------------------------------------------------------- ----------------------------------------------------------------------
Addition to the xino file per a branch, aufs has an external inode number Addition to the xino file per a branch, aufs has an external inode number
......
...@@ -17,5 +17,6 @@ aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \ ...@@ -17,5 +17,6 @@ aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
iinfo.o inode.o iinfo.o inode.o
# all are boolean # all are boolean
aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
aufs-$(CONFIG_SYSFS) += sysfs.o aufs-$(CONFIG_SYSFS) += sysfs.o
aufs-$(CONFIG_AUFS_DEBUG) += debug.o aufs-$(CONFIG_AUFS_DEBUG) += debug.o
...@@ -204,6 +204,13 @@ static inline int au_test_fs_bad_iattr_size(struct super_block *sb) ...@@ -204,6 +204,13 @@ static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
; ;
} }
/* they don't check i_nlink in link(2) */
static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
{
return au_test_tmpfs(sb)
|| au_test_ubifs(sb);
}
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
/* /*
......
...@@ -38,6 +38,7 @@ struct au_iinfo { ...@@ -38,6 +38,7 @@ struct au_iinfo {
struct au_icntnr { struct au_icntnr {
struct au_iinfo iinfo; struct au_iinfo iinfo;
struct inode vfs_inode; struct inode vfs_inode;
struct hlist_bl_node plink;
struct rcu_head rcu; struct rcu_head rcu;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
...@@ -78,6 +79,37 @@ int au_iinfo_init(struct inode *inode); ...@@ -78,6 +79,37 @@ int au_iinfo_init(struct inode *inode);
void au_iinfo_fin(struct inode *inode); void au_iinfo_fin(struct inode *inode);
int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink); int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
#ifdef CONFIG_PROC_FS
/* plink.c */
int au_plink_maint(struct super_block *sb, int flags);
struct au_sbinfo;
void au_plink_maint_leave(struct au_sbinfo *sbinfo);
int au_plink_maint_enter(struct super_block *sb);
#ifdef CONFIG_AUFS_DEBUG
void au_plink_list(struct super_block *sb);
#else
AuStubVoid(au_plink_list, struct super_block *sb)
#endif
int au_plink_test(struct inode *inode);
struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_dentry);
void au_plink_put(struct super_block *sb, int verbose);
void au_plink_clean(struct super_block *sb, int verbose);
#else
AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
AuStubInt0(au_plink_maint_enter, struct super_block *sb);
AuStubVoid(au_plink_list, struct super_block *sb);
AuStubInt0(au_plink_test, struct inode *inode);
AuStub(struct dentry *, au_plink_lkup, return NULL,
struct inode *inode, aufs_bindex_t bindex);
AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_dentry);
AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
#endif /* CONFIG_PROC_FS */
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
/* lock subclass for iinfo */ /* lock subclass for iinfo */
......
...@@ -164,9 +164,12 @@ static int __init aufs_init(void) ...@@ -164,9 +164,12 @@ static int __init aufs_init(void)
err = sysaufs_init(); err = sysaufs_init();
if (unlikely(err)) if (unlikely(err))
goto out; goto out;
err = au_wkq_init(); err = au_procfs_init();
if (unlikely(err)) if (unlikely(err))
goto out_sysaufs; goto out_sysaufs;
err = au_wkq_init();
if (unlikely(err))
goto out_procfs;
err = au_cache_init(); err = au_cache_init();
if (unlikely(err)) if (unlikely(err))
goto out_wkq; goto out_wkq;
...@@ -177,6 +180,8 @@ static int __init aufs_init(void) ...@@ -177,6 +180,8 @@ static int __init aufs_init(void)
out_wkq: out_wkq:
au_wkq_fin(); au_wkq_fin();
out_procfs:
au_procfs_fin();
out_sysaufs: out_sysaufs:
sysaufs_fin(); sysaufs_fin();
out: out:
......
...@@ -87,6 +87,15 @@ static inline int au_kmidx_sub(size_t sz, size_t new_sz) ...@@ -87,6 +87,15 @@ static inline int au_kmidx_sub(size_t sz, size_t new_sz)
int au_seq_path(struct seq_file *seq, struct path *path); int au_seq_path(struct seq_file *seq, struct path *path);
#ifdef CONFIG_PROC_FS
/* procfs.c */
int __init au_procfs_init(void);
void au_procfs_fin(void);
#else
AuStubInt0(au_procfs_init, void);
AuStubVoid(au_procfs_fin, void);
#endif
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
/* kmem cache */ /* kmem cache */
......
...@@ -22,6 +22,7 @@ enum { ...@@ -22,6 +22,7 @@ enum {
Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino, Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
Opt_trunc_xino_path, Opt_itrunc_xino, Opt_trunc_xino_path, Opt_itrunc_xino,
Opt_trunc_xib, Opt_notrunc_xib, Opt_trunc_xib, Opt_notrunc_xib,
Opt_plink, Opt_noplink, Opt_list_plink,
Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
}; };
...@@ -40,6 +41,18 @@ static match_table_t options = { ...@@ -40,6 +41,18 @@ static match_table_t options = {
{Opt_trunc_xib, "trunc_xib"}, {Opt_trunc_xib, "trunc_xib"},
{Opt_notrunc_xib, "notrunc_xib"}, {Opt_notrunc_xib, "notrunc_xib"},
#ifdef CONFIG_PROC_FS
{Opt_plink, "plink"},
#else
{Opt_ignore_silent, "plink"},
#endif
{Opt_noplink, "noplink"},
#ifdef CONFIG_AUFS_DEBUG
{Opt_list_plink, "list_plink"},
#endif
/* internal use for the scripts */ /* internal use for the scripts */
{Opt_ignore_silent, "si=%s"}, {Opt_ignore_silent, "si=%s"},
...@@ -239,6 +252,15 @@ static void dump_opts(struct au_opts *opts) ...@@ -239,6 +252,15 @@ static void dump_opts(struct au_opts *opts)
case Opt_notrunc_xib: case Opt_notrunc_xib:
AuLabel(notrunc_xib); AuLabel(notrunc_xib);
break; break;
case Opt_plink:
AuLabel(plink);
break;
case Opt_noplink:
AuLabel(noplink);
break;
case Opt_list_plink:
AuLabel(list_plink);
break;
default: default:
BUG(); BUG();
} }
...@@ -462,6 +484,9 @@ int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts) ...@@ -462,6 +484,9 @@ int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
case Opt_noxino: case Opt_noxino:
case Opt_trunc_xib: case Opt_trunc_xib:
case Opt_notrunc_xib: case Opt_notrunc_xib:
case Opt_plink:
case Opt_noplink:
case Opt_list_plink:
err = 0; err = 0;
opt->type = token; opt->type = token;
break; break;
...@@ -514,6 +539,19 @@ static int au_opt_simple(struct super_block *sb, struct au_opt *opt, ...@@ -514,6 +539,19 @@ static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
err = 1; /* handled */ err = 1; /* handled */
sbinfo = au_sbi(sb); sbinfo = au_sbi(sb);
switch (opt->type) { switch (opt->type) {
case Opt_plink:
au_opt_set(sbinfo->si_mntflags, PLINK);
break;
case Opt_noplink:
if (au_opt_test(sbinfo->si_mntflags, PLINK))
au_plink_put(sb, /*verbose*/1);
au_opt_clr(sbinfo->si_mntflags, PLINK);
break;
case Opt_list_plink:
if (au_opt_test(sbinfo->si_mntflags, PLINK))
au_plink_list(sb);
break;
case Opt_trunc_xino: case Opt_trunc_xino:
au_opt_set(sbinfo->si_mntflags, TRUNC_XINO); au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
break; break;
......
...@@ -22,8 +22,10 @@ struct file; ...@@ -22,8 +22,10 @@ struct file;
#define AuOpt_XINO 1 /* external inode number bitmap #define AuOpt_XINO 1 /* external inode number bitmap
and translation table */ and translation table */
#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */ #define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
#define AuOpt_PLINK (1 << 6) /* pseudo-link */
#define AuOpt_Def AuOpt_XINO #define AuOpt_Def (AuOpt_XINO \
| AuOpt_PLINK)
#define au_opt_test(flags, name) (flags & AuOpt_##name) #define au_opt_test(flags, name) (flags & AuOpt_##name)
#define au_opt_set(flags, name) do { \ #define au_opt_set(flags, name) do { \
...@@ -33,6 +35,15 @@ struct file; ...@@ -33,6 +35,15 @@ struct file;
((flags) &= ~AuOpt_##name); \ ((flags) &= ~AuOpt_##name); \
} while (0) } while (0)
static inline unsigned int au_opts_plink(unsigned int mntflags)
{
#ifdef CONFIG_PROC_FS
return mntflags;
#else
return mntflags & ~AuOpt_PLINK;
#endif
}
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
struct au_opt_add { struct au_opt_add {
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2019 Junjiro R. Okajima
*/
/*
* pseudo-link
*/
#include <linux/cred.h>
#include "aufs.h"
/*
* the pseudo-link maintenance mode.
* during a user process maintains the pseudo-links,
* prohibit adding a new plink and branch manipulation.
*
* Flags
* NOPLM:
* For entry functions which will handle plink, and i_mutex is already held
* in VFS.
* They cannot wait and should return an error at once.
* Callers has to check the error.
* NOPLMW:
* For entry functions which will handle plink, but i_mutex is not held
* in VFS.
* They can wait the plink maintenance mode to finish.
*
* They behave like F_SETLK and F_SETLKW.
* If the caller never handle plink, then both flags are unnecessary.
*/
int au_plink_maint(struct super_block *sb, int flags)
{
int err;
pid_t pid, ppid;
struct task_struct *parent, *prev;
struct au_sbinfo *sbi;
SiMustAnyLock(sb);
err = 0;
if (!au_opt_test(au_mntflags(sb), PLINK))
goto out;
sbi = au_sbi(sb);
pid = sbi->si_plink_maint_pid;
if (!pid || pid == current->pid)
goto out;
/* todo: it highly depends upon /sbin/mount.aufs */
prev = NULL;
parent = current;
ppid = 0;
rcu_read_lock();
while (1) {
parent = rcu_dereference(parent->real_parent);
if (parent == prev)
break;
ppid = task_pid_vnr(parent);
if (pid == ppid) {
rcu_read_unlock();
goto out;
}
prev = parent;
}
rcu_read_unlock();
if (au_ftest_lock(flags, NOPLMW)) {
/* if there is no i_mutex lock in VFS, we don't need to wait */
/* AuDebugOn(!lockdep_depth(current)); */
while (sbi->si_plink_maint_pid) {
si_read_unlock(sb);
/* gave up wake_up_bit() */
wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&sbi->si_nowait);
si_noflush_read_lock(sb);
}
} else if (au_ftest_lock(flags, NOPLM)) {
AuDbg("ppid %d, pid %d\n", ppid, pid);
err = -EAGAIN;
}
out:
return err;
}
void au_plink_maint_leave(struct au_sbinfo *sbinfo)
{
spin_lock(&sbinfo->si_plink_maint_lock);
sbinfo->si_plink_maint_pid = 0;
spin_unlock(&sbinfo->si_plink_maint_lock);
wake_up_all(&sbinfo->si_plink_wq);
}
int au_plink_maint_enter(struct super_block *sb)
{
int err;
struct au_sbinfo *sbinfo;
err = 0;
sbinfo = au_sbi(sb);
/* make sure i am the only one in this fs */
si_write_lock(sb, AuLock_FLUSH);
if (au_opt_test(au_mntflags(sb), PLINK)) {
spin_lock(&sbinfo->si_plink_maint_lock);
if (!sbinfo->si_plink_maint_pid)
sbinfo->si_plink_maint_pid = current->pid;
else
err = -EBUSY;
spin_unlock(&sbinfo->si_plink_maint_lock);
}
si_write_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_DEBUG
void au_plink_list(struct super_block *sb)
{
int i;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_icntnr *icntnr;
SiMustAnyLock(sb);
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
for (i = 0; i < AuPlink_NHASH; i++) {
hbl = sbinfo->si_plink + i;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
hlist_bl_unlock(hbl);
}
}
#endif
/* is the inode pseudo-linked? */
int au_plink_test(struct inode *inode)
{
int found, i;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_icntnr *icntnr;
sbinfo = au_sbi(inode->i_sb);
AuRwMustAnyLock(&sbinfo->si_rwsem);
AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
found = 0;
i = au_plink_hash(inode->i_ino);
hbl = sbinfo->si_plink + i;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
if (&icntnr->vfs_inode == inode) {
found = 1;
break;
}
hlist_bl_unlock(hbl);
return found;
}
/* ---------------------------------------------------------------------- */
/*
* generate a name for plink.
* the file will be stored under AUFS_WH_PLINKDIR.
*/
/* 20 is max digits length of ulong 64 */
#define PLINK_NAME_LEN ((20 + 1) * 2)
/* re-commit later */ __maybe_unused
static int plink_name(char *name, int len, struct inode *inode,
aufs_bindex_t bindex)
{
int rlen;
struct inode *h_inode;
h_inode = au_h_iptr(inode, bindex);
rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
return rlen;
}
struct au_do_plink_lkup_args {
struct dentry **errp;
struct qstr *tgtname;
struct dentry *h_parent;
struct au_branch *br;
};
static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
struct dentry *h_parent,
struct au_branch *br)
{
struct dentry *h_dentry;
struct inode *h_inode;
h_inode = d_inode(h_parent);
inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
h_dentry = vfsub_lkup_one(tgtname, h_parent);
inode_unlock_shared(h_inode);
return h_dentry;
}
static void au_call_do_plink_lkup(void *args)
{
struct au_do_plink_lkup_args *a = args;
*a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
}
/* lookup the plink-ed @inode under the branch at @bindex */
struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
{
struct dentry *h_dentry, *h_parent;
struct au_branch *br;
int wkq_err;
char a[PLINK_NAME_LEN];
struct qstr tgtname = QSTR_INIT(a, 0);
AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
br = au_sbr(inode->i_sb, bindex);
/* h_parent = br->br_wbr->wbr_plink; */ /* re-commit later */
h_parent = NULL;
tgtname.len = plink_name(a, sizeof(a), inode, bindex);
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
struct au_do_plink_lkup_args args = {
.errp = &h_dentry,
.tgtname = &tgtname,
.h_parent = h_parent,
.br = br
};
wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
if (unlikely(wkq_err))
h_dentry = ERR_PTR(wkq_err);
} else
h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
return h_dentry;
}
/* create a pseudo-link */
/* re-commit later */ __maybe_unused
static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
struct dentry *h_dentry, struct au_branch *br)
{
int err;
struct path h_path = {
.mnt = au_br_mnt(br)
};
struct inode *h_dir, *delegated;
h_dir = d_inode(h_parent);
inode_lock_nested(h_dir, AuLsc_I_CHILD2);
again:
h_path.dentry = vfsub_lkup_one(tgt, h_parent);
err = PTR_ERR(h_path.dentry);
if (IS_ERR(h_path.dentry))
goto out;
err = 0;
/* wh.plink dir is not monitored */
/* todo: is it really safe? */
if (d_is_positive(h_path.dentry)
&& d_inode(h_path.dentry) != d_inode(h_dentry)) {
delegated = NULL;
err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
dput(h_path.dentry);
h_path.dentry = NULL;
if (!err)
goto again;
}
if (!err && d_is_negative(h_path.dentry)) {
delegated = NULL;
err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal link\n");
iput(delegated);
}
}
dput(h_path.dentry);
out:
inode_unlock(h_dir);
return err;
}
struct do_whplink_args {
int *errp;
struct qstr *tgt;
struct dentry *h_parent;
struct dentry *h_dentry;
struct au_branch *br;
};
/* re-commit later */ __maybe_unused
static void call_do_whplink(void *args)
{
struct do_whplink_args *a = args;
*a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
}
/*
* create a new pseudo-link for @h_dentry on @bindex.
* the linked inode is held in aufs @inode.
*/
void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_dentry)
{
struct super_block *sb;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_icntnr *icntnr;
int found, err, cnt, i;
sb = inode->i_sb;
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
found = au_plink_test(inode);
if (found)
return;
i = au_plink_hash(inode->i_ino);
hbl = sbinfo->si_plink + i;
au_igrab(inode);
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
if (&icntnr->vfs_inode == inode) {
found = 1;
break;
}
}
if (!found) {
icntnr = container_of(inode, struct au_icntnr, vfs_inode);
hlist_bl_add_head(&icntnr->plink, hbl);
}
hlist_bl_unlock(hbl);
if (!found) {
cnt = au_hbl_count(hbl);
#define msg "unexpectedly unbalanced or too many pseudo-links"
if (cnt > AUFS_PLINK_WARN)
AuWarn1(msg ", %d\n", cnt);
#undef msg
/* err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex)); re-commit later */
err = 0;
if (unlikely(err)) {
pr_warn("err %d, damaged pseudo link.\n", err);
au_hbl_del(&icntnr->plink, hbl);
iput(&icntnr->vfs_inode);
}
} else
iput(&icntnr->vfs_inode);
}
/* free all plinks */
void au_plink_put(struct super_block *sb, int verbose)
{
int i, warned;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos, *tmp;
struct au_icntnr *icntnr;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
/* no spin_lock since sbinfo is write-locked */
warned = 0;
for (i = 0; i < AuPlink_NHASH; i++) {
hbl = sbinfo->si_plink + i;
if (!warned && verbose && !hlist_bl_empty(hbl)) {
pr_warn("pseudo-link is not flushed");
warned = 1;
}
hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
iput(&icntnr->vfs_inode);
INIT_HLIST_BL_HEAD(hbl);
}
}
void au_plink_clean(struct super_block *sb, int verbose)
{
si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); /* replaced later */
if (au_opt_test(au_mntflags(sb), PLINK))
au_plink_put(sb, verbose);
si_write_unlock(sb);
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010-2019 Junjiro R. Okajima
*/
/*
* procfs interfaces
*/
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include "aufs.h"
static int au_procfs_plm_release(struct inode *inode, struct file *file)
{
struct au_sbinfo *sbinfo;
sbinfo = file->private_data;
if (sbinfo) {
au_plink_maint_leave(sbinfo);
kobject_put(&sbinfo->si_kobj);
}
return 0;
}
static void au_procfs_plm_write_clean(struct file *file)
{
struct au_sbinfo *sbinfo;
sbinfo = file->private_data;
if (sbinfo)
au_plink_clean(sbinfo->si_sb, /*verbose*/0);
}
static int au_procfs_plm_write_si(struct file *file, unsigned long id)
{
int err;
struct super_block *sb;
struct au_sbinfo *sbinfo;
struct hlist_bl_node *pos;
err = -EBUSY;
if (unlikely(file->private_data))
goto out;
sb = NULL;
/* don't use au_sbilist_lock() here */
hlist_bl_lock(&au_sbilist);
hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
if (id == sysaufs_si_id(sbinfo)) {
kobject_get(&sbinfo->si_kobj);
sb = sbinfo->si_sb;
break;
}
hlist_bl_unlock(&au_sbilist);
err = -EINVAL;
if (unlikely(!sb))
goto out;
err = au_plink_maint_enter(sb);
if (!err)
/* keep kobject_get() */
file->private_data = sbinfo;
else
kobject_put(&sbinfo->si_kobj);
out:
return err;
}
/*
* Accept a valid "si=xxxx" only.
* Once it is accepted successfully, accept "clean" too.
*/
static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
ssize_t err;
unsigned long id;
/* last newline is allowed */
char buf[3 + sizeof(unsigned long) * 2 + 1];
err = -EACCES;
if (unlikely(!capable(CAP_SYS_ADMIN)))
goto out;
err = -EINVAL;
if (unlikely(count > sizeof(buf)))
goto out;
err = copy_from_user(buf, ubuf, count);
if (unlikely(err)) {
err = -EFAULT;
goto out;
}
buf[count] = 0;
err = -EINVAL;
if (!strcmp("clean", buf)) {
au_procfs_plm_write_clean(file);
goto out_success;
} else if (unlikely(strncmp("si=", buf, 3)))
goto out;
err = kstrtoul(buf + 3, 16, &id);
if (unlikely(err))
goto out;
err = au_procfs_plm_write_si(file, id);
if (unlikely(err))
goto out;
out_success:
err = count; /* success */
out:
return err;
}
static const struct file_operations au_procfs_plm_fop = {
.write = au_procfs_plm_write,
.release = au_procfs_plm_release,
.owner = THIS_MODULE
};
/* ---------------------------------------------------------------------- */
static struct proc_dir_entry *au_procfs_dir;
void au_procfs_fin(void)
{
remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
}
int __init au_procfs_init(void)
{
int err;
struct proc_dir_entry *entry;
err = -ENOMEM;
au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
if (unlikely(!au_procfs_dir))
goto out;
entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | 0200,
au_procfs_dir, &au_procfs_plm_fop);
if (unlikely(!entry))
goto out_dir;
err = 0;
goto out; /* success */
out_dir:
remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
out:
return err;
}
...@@ -15,9 +15,12 @@ ...@@ -15,9 +15,12 @@
*/ */
void au_si_free(struct kobject *kobj) void au_si_free(struct kobject *kobj)
{ {
int i;
struct au_sbinfo *sbinfo; struct au_sbinfo *sbinfo;
sbinfo = container_of(kobj, struct au_sbinfo, si_kobj); sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
for (i = 0; i < AuPlink_NHASH; i++)
AuDebugOn(!hlist_bl_empty(sbinfo->si_plink + i));
AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len)); AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
au_rw_write_lock(&sbinfo->si_rwsem); au_rw_write_lock(&sbinfo->si_rwsem);
...@@ -33,7 +36,7 @@ void au_si_free(struct kobject *kobj) ...@@ -33,7 +36,7 @@ void au_si_free(struct kobject *kobj)
int au_si_alloc(struct super_block *sb) int au_si_alloc(struct super_block *sb)
{ {
int err; int err, i;
struct au_sbinfo *sbinfo; struct au_sbinfo *sbinfo;
err = -ENOMEM; err = -ENOMEM;
...@@ -56,7 +59,7 @@ int au_si_alloc(struct super_block *sb) ...@@ -56,7 +59,7 @@ int au_si_alloc(struct super_block *sb)
sbinfo->si_bbot = -1; sbinfo->si_bbot = -1;
sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2; sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
sbinfo->si_mntflags = AuOpt_Def; sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
sbinfo->si_xino_jiffy = jiffies; sbinfo->si_xino_jiffy = jiffies;
sbinfo->si_xino_expire sbinfo->si_xino_expire
...@@ -64,6 +67,11 @@ int au_si_alloc(struct super_block *sb) ...@@ -64,6 +67,11 @@ int au_si_alloc(struct super_block *sb)
mutex_init(&sbinfo->si_xib_mtx); mutex_init(&sbinfo->si_xib_mtx);
/* leave si_xib_last_pindex and si_xib_next_bit */ /* leave si_xib_last_pindex and si_xib_next_bit */
for (i = 0; i < AuPlink_NHASH; i++)
INIT_HLIST_BL_HEAD(sbinfo->si_plink + i);
init_waitqueue_head(&sbinfo->si_plink_wq);
spin_lock_init(&sbinfo->si_plink_maint_lock);
/* leave other members for sysaufs and si_mnt. */ /* leave other members for sysaufs and si_mnt. */
sbinfo->si_sb = sb; sbinfo->si_sb = sb;
sb->s_fs_info = sbinfo; sb->s_fs_info = sbinfo;
...@@ -139,20 +147,31 @@ aufs_bindex_t au_new_br_id(struct super_block *sb) ...@@ -139,20 +147,31 @@ aufs_bindex_t au_new_br_id(struct super_block *sb)
/* it is ok that new 'nwt' tasks are appended while we are sleeping */ /* it is ok that new 'nwt' tasks are appended while we are sleeping */
int si_read_lock(struct super_block *sb, int flags) int si_read_lock(struct super_block *sb, int flags)
{ {
int err;
err = 0;
if (au_ftest_lock(flags, FLUSH)) if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&au_sbi(sb)->si_nowait); au_nwt_flush(&au_sbi(sb)->si_nowait);
si_noflush_read_lock(sb); si_noflush_read_lock(sb);
err = au_plink_maint(sb, flags);
if (unlikely(err))
si_read_unlock(sb);
return 0; /* re-commit later */ return err;
} }
int si_write_lock(struct super_block *sb, int flags) int si_write_lock(struct super_block *sb, int flags)
{ {
int err;
if (au_ftest_lock(flags, FLUSH)) if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&au_sbi(sb)->si_nowait); au_nwt_flush(&au_sbi(sb)->si_nowait);
si_noflush_write_lock(sb); si_noflush_write_lock(sb);
err = au_plink_maint(sb, flags);
if (unlikely(err))
si_write_unlock(sb);
return 0; /* re-commit later */ return err;
} }
...@@ -18,6 +18,12 @@ ...@@ -18,6 +18,12 @@
#include "rwsem.h" #include "rwsem.h"
#include "wkq.h" #include "wkq.h"
#define AuPlink_NHASH 100
static inline int au_plink_hash(ino_t ino)
{
return ino % AuPlink_NHASH;
}
struct au_branch; struct au_branch;
struct au_sbinfo { struct au_sbinfo {
/* nowait tasks in the system-wide workqueue */ /* nowait tasks in the system-wide workqueue */
...@@ -59,6 +65,12 @@ struct au_sbinfo { ...@@ -59,6 +65,12 @@ struct au_sbinfo {
/* reserved for future use */ /* reserved for future use */
/* unsigned long long si_xib_limit; */ /* Max xib file size */ /* unsigned long long si_xib_limit; */ /* Max xib file size */
/* pseudo_link list */
struct hlist_bl_head si_plink[AuPlink_NHASH];
wait_queue_head_t si_plink_wq;
spinlock_t si_plink_maint_lock;
pid_t si_plink_maint_pid;
/* /*
* sysfs and lifetime management. * sysfs and lifetime management.
* this is not a small structure and it may be a waste of memory in case * this is not a small structure and it may be a waste of memory in case
...@@ -82,6 +94,8 @@ struct au_sbinfo { ...@@ -82,6 +94,8 @@ struct au_sbinfo {
#define AuLock_IR (1 << 1) /* read-lock inode */ #define AuLock_IR (1 << 1) /* read-lock inode */
#define AuLock_IW (1 << 2) /* write-lock inode */ #define AuLock_IW (1 << 2) /* write-lock inode */
#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */ #define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
#define au_ftest_lock(flags, name) ((flags) & AuLock_##name) #define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
#define au_fset_lock(flags, name) \ #define au_fset_lock(flags, name) \
do { (flags) |= AuLock_##name; } while (0) do { (flags) |= AuLock_##name; } while (0)
......
...@@ -68,6 +68,46 @@ out: ...@@ -68,6 +68,46 @@ out:
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
static int au_test_nlink(struct inode *inode)
{
const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
if (!au_test_fs_no_limit_nlink(inode->i_sb)
|| inode->i_nlink < link_max)
return 0;
return -EMLINK;
}
int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
struct inode **delegated_inode)
{
int err;
struct dentry *d;
IMustLock(dir);
err = au_test_nlink(d_inode(src_dentry));
if (unlikely(err))
return err;
/* we don't call may_linkat() */
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_link(src_dentry, path, d);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
lockdep_on();
out:
return err;
}
/* ---------------------------------------------------------------------- */
struct unlink_args { struct unlink_args {
int *errp; int *errp;
struct inode *dir; struct inode *dir;
......
...@@ -51,6 +51,11 @@ static inline struct dentry *vfsub_lkup_one(struct qstr *name, ...@@ -51,6 +51,11 @@ static inline struct dentry *vfsub_lkup_one(struct qstr *name,
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
int vfsub_link(struct dentry *src_dentry, struct inode *dir,
struct path *path, struct inode **delegated_inode);
/* ---------------------------------------------------------------------- */
static inline loff_t vfsub_f_size_read(struct file *file) static inline loff_t vfsub_f_size_read(struct file *file)
{ {
return i_size_read(file_inode(file)); return i_size_read(file_inode(file));
......
...@@ -69,6 +69,12 @@ typedef int16_t aufs_bindex_t; ...@@ -69,6 +69,12 @@ typedef int16_t aufs_bindex_t;
#define AUFS_XINO_DEF_SEC 30 /* seconds */ #define AUFS_XINO_DEF_SEC 30 /* seconds */
#define AUFS_XINO_DEF_TRUNC 45 /* percentage */ #define AUFS_XINO_DEF_TRUNC 45 /* percentage */
#define AUFS_WKQ_NAME AUFS_NAME "d" #define AUFS_WKQ_NAME AUFS_NAME "d"
#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
/* pseudo-link maintenace under /proc */
#define AUFS_PLINK_MAINT_NAME "plink_maint"
#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
/* branch permissions and attributes */ /* branch permissions and attributes */
#define AUFS_BRPERM_RO "ro" #define AUFS_BRPERM_RO "ro"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment