// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2019 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* inode operations (except add/del/rename)
*/
#include
#include
#include
#include
#include
#include "aufs.h"
static int h_permission(struct inode *h_inode, int mask,
struct path *h_path, int brperm)
{
int err;
const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
err = -EPERM;
if (write_mask && IS_IMMUTABLE(h_inode))
goto out;
err = -EACCES;
if (((mask & MAY_EXEC)
&& S_ISREG(h_inode->i_mode)
&& (path_noexec(h_path)
|| !(h_inode->i_mode & 0111))))
goto out;
/*
* - skip the lower fs test in the case of write to ro branch.
* - nfs dir permission write check is optimized, but a policy for
* link/rename requires a real check.
* - nfs always sets SB_POSIXACL regardless its mount option 'noacl.'
* in this case, generic_permission() returns -EOPNOTSUPP.
*/
if ((write_mask && !au_br_writable(brperm))
|| (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
&& write_mask && !(mask & MAY_READ))
|| !h_inode->i_op->permission) {
/* AuLabel(generic_permission); */
/* AuDbg("get_acl %ps\n", h_inode->i_op->get_acl); */
err = generic_permission(h_inode, mask);
if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
err = h_inode->i_op->permission(h_inode, mask);
AuTraceErr(err);
} else {
/* AuLabel(h_inode->permission); */
err = h_inode->i_op->permission(h_inode, mask);
AuTraceErr(err);
}
if (!err)
err = devcgroup_inode_permission(h_inode, mask);
if (!err)
err = security_inode_permission(h_inode, mask);
out:
return err;
}
static int aufs_permission(struct inode *inode, int mask)
{
int err;
aufs_bindex_t bindex, bbot;
const unsigned char isdir = !!S_ISDIR(inode->i_mode),
write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
struct inode *h_inode;
struct super_block *sb;
struct au_branch *br;
/* todo: support rcu-walk? */
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH);
ii_read_lock_child(inode);
#if 0 /* reserved for future use */
/*
* This test may be rather 'too much' since the test is essentially done
* in the aufs_lookup(). Theoretically it is possible that the inode
* generation doesn't match to the superblock's here. But it isn't a
* big deal I suppose.
*/
err = au_iigen_test(inode, au_sigen(sb));
if (unlikely(err))
goto out;
#endif
if (!isdir
|| write_mask
|| au_opt_test(au_mntflags(sb), DIRPERM1)) {
err = au_busy_or_stale();
h_inode = au_h_iptr(inode, au_ibtop(inode));
if (unlikely(!h_inode
|| (h_inode->i_mode & S_IFMT)
!= (inode->i_mode & S_IFMT)))
goto out;
err = 0;
bindex = au_ibtop(inode);
br = au_sbr(sb, bindex);
err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
if (write_mask
&& !err
&& !special_file(h_inode->i_mode)) {
/* test whether the upper writable branch exists */
err = -EROFS;
for (; bindex >= 0; bindex--)
if (!au_br_rdonly(au_sbr(sb, bindex))) {
err = 0;
break;
}
}
goto out;
}
/* non-write to dir */
err = 0;
bbot = au_ibbot(inode);
for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
h_inode = au_h_iptr(inode, bindex);
if (h_inode) {
err = au_busy_or_stale();
if (unlikely(!S_ISDIR(h_inode->i_mode)))
break;
br = au_sbr(sb, bindex);
err = h_permission(h_inode, mask, &br->br_path,
br->br_perm);
}
}
out:
ii_read_unlock(inode);
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *ret, *parent;
struct inode *inode;
struct super_block *sb;
int err, npositive;
IMustLock(dir);
/* todo: support rcu-walk? */
ret = ERR_PTR(-ECHILD);
if (flags & LOOKUP_RCU)
goto out;
ret = ERR_PTR(-ENAMETOOLONG);
if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
goto out;
sb = dir->i_sb;
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
ret = ERR_PTR(err);
if (unlikely(err))
goto out;
err = au_di_init(dentry);
ret = ERR_PTR(err);
if (unlikely(err))
goto out_si;
inode = NULL;
npositive = 0; /* suppress a warning */
parent = dentry->d_parent; /* dir inode is locked */
di_read_lock_parent(parent, AuLock_IR);
err = au_alive_dir(parent);
if (!err)
err = au_digen_test(parent, au_sigen(sb));
if (!err) {
/* regardless LOOKUP_CREATE, always ALLOW_NEG */
npositive = au_lkup_dentry(dentry, au_dbtop(parent),
AuLkup_ALLOW_NEG);
err = npositive;
}
di_read_unlock(parent, AuLock_IR);
ret = ERR_PTR(err);
if (unlikely(err < 0))
goto out_unlock;
if (npositive) {
inode = au_new_inode(dentry, /*must_new*/0);
if (IS_ERR(inode)) {
ret = (void *)inode;
inode = NULL;
goto out_unlock;
}
}
if (inode)
atomic_inc(&inode->i_count);
ret = d_splice_alias(inode, dentry);
#if 0 /* reserved for future use */
if (unlikely(d_need_lookup(dentry))) {
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
spin_unlock(&dentry->d_lock);
} else
#endif
if (inode) {
if (!IS_ERR(ret)) {
iput(inode);
if (ret && ret != dentry)
ii_write_unlock(inode);
} else {
ii_write_unlock(inode);
iput(inode);
inode = NULL;
}
}
out_unlock:
di_write_unlock(dentry);
out_si:
si_read_unlock(sb);
out:
return ret;
}
/* ---------------------------------------------------------------------- */
/*
* very dirty and complicated aufs ->atomic_open().
* aufs_atomic_open()
* + au_aopen_or_create()
* + add_simple()
* + vfsub_atomic_open()
* + branch fs ->atomic_open()
* may call the actual 'open' for h_file
* + inc br_nfiles only if opened
* + au_aopen_no_open() or au_aopen_do_open()
*
* au_aopen_do_open()
* + finish_open()
* + au_do_aopen()
* + au_do_open() the body of all 'open'
* + au_do_open_nondir()
* set the passed h_file
*
* au_aopen_no_open()
* + finish_no_open()
*/
struct aopen_node {
struct hlist_bl_node hblist;
struct file *file, *h_file;
};
static int au_do_aopen(struct inode *inode, struct file *file)
{
struct hlist_bl_head *aopen;
struct hlist_bl_node *pos;
struct aopen_node *node;
struct au_do_open_args args = {
.aopen = 1,
.open = au_do_open_nondir
};
aopen = &au_sbi(inode->i_sb)->si_aopen;
hlist_bl_lock(aopen);
hlist_bl_for_each_entry(node, pos, aopen, hblist)
if (node->file == file) {
args.h_file = node->h_file;
break;
}
hlist_bl_unlock(aopen);
/* AuDebugOn(!args.h_file); */
return au_do_open(file, &args);
}
static int au_aopen_do_open(struct file *file, struct dentry *dentry,
struct aopen_node *aopen_node)
{
int err;
struct hlist_bl_head *aopen;
AuLabel(here);
aopen = &au_sbi(dentry->d_sb)->si_aopen;
au_hbl_add(&aopen_node->hblist, aopen);
err = finish_open(file, dentry, au_do_aopen);
au_hbl_del(&aopen_node->hblist, aopen);
/* AuDbgFile(file); */
AuDbg("%pd%s%s\n", dentry,
(file->f_mode & FMODE_CREATED) ? " created" : "",
(file->f_mode & FMODE_OPENED) ? " opened" : "");
AuTraceErr(err);
return err;
}
static int au_aopen_no_open(struct file *file, struct dentry *dentry)
{
int err;
AuLabel(here);
dget(dentry);
err = finish_no_open(file, dentry);
AuTraceErr(err);
return err;
}
static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned int open_flag,
umode_t create_mode)
{
int err, did_open;
unsigned int lkup_flags;
aufs_bindex_t bindex;
struct super_block *sb;
struct dentry *parent, *d;
struct vfsub_aopen_args args = {
.open_flag = open_flag,
.create_mode = create_mode
};
struct aopen_node aopen_node = {
.file = file
};
IMustLock(dir);
AuDbg("open_flag 0%o\n", open_flag);
AuDbgDentry(dentry);
err = 0;
if (!au_di(dentry)) {
lkup_flags = LOOKUP_OPEN;
if (open_flag & O_CREAT)
lkup_flags |= LOOKUP_CREATE;
d = aufs_lookup(dir, dentry, lkup_flags);
if (IS_ERR(d)) {
err = PTR_ERR(d);
AuTraceErr(err);
goto out;
} else if (d) {
/*
* obsoleted dentry found.
* another error will be returned later.
*/
d_drop(d);
AuDbgDentry(d);
dput(d);
}
AuDbgDentry(dentry);
}
if (d_is_positive(dentry)
|| d_unhashed(dentry)
|| d_unlinked(dentry)
|| !(open_flag & O_CREAT)) {
err = au_aopen_no_open(file, dentry);
goto out; /* success */
}
err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
if (unlikely(err))
goto out;
sb = dentry->d_sb;
parent = dentry->d_parent; /* dir is locked */
di_write_lock_parent(parent);
err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
if (unlikely(err < 0))
goto out_parent;
AuDbgDentry(dentry);
if (d_is_positive(dentry)) {
err = au_aopen_no_open(file, dentry);
goto out_parent; /* success */
}
args.file = alloc_empty_file(file->f_flags, current_cred());
err = PTR_ERR(args.file);
if (IS_ERR(args.file))
goto out_parent;
bindex = au_dbtop(dentry);
err = au_aopen_or_create(dir, dentry, &args);
AuTraceErr(err);
AuDbgFile(args.file);
file->f_mode = args.file->f_mode & ~FMODE_OPENED;
did_open = !!(args.file->f_mode & FMODE_OPENED);
if (!did_open) {
fput(args.file);
args.file = NULL;
}
di_write_unlock(parent);
di_write_unlock(dentry);
if (unlikely(err < 0)) {
if (args.file)
fput(args.file);
goto out_sb;
}
if (!did_open)
err = au_aopen_no_open(file, dentry);
else {
aopen_node.h_file = args.file;
err = au_aopen_do_open(file, dentry, &aopen_node);
}
if (unlikely(err < 0)) {
if (args.file)
fput(args.file);
if (did_open)
au_lcnt_dec(&args.br->br_nfiles);
}
goto out_sb; /* success */
out_parent:
di_write_unlock(parent);
di_write_unlock(dentry);
out_sb:
si_read_unlock(sb);
out:
AuTraceErr(err);
AuDbgFile(file);
return err;
}
/* ---------------------------------------------------------------------- */
static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
const unsigned char add_entry, aufs_bindex_t bcpup,
aufs_bindex_t btop)
{
int err;
struct dentry *h_parent;
struct inode *h_dir;
if (add_entry)
IMustLock(d_inode(parent));
else
di_write_lock_parent(parent);
err = 0;
if (!au_h_dptr(parent, bcpup)) {
if (btop > bcpup)
err = au_cpup_dirs(dentry, bcpup);
else if (btop < bcpup)
err = au_cpdown_dirs(dentry, bcpup);
else
BUG();
}
if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
h_parent = au_h_dptr(parent, bcpup);
h_dir = d_inode(h_parent);
inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
err = au_lkup_neg(dentry, bcpup, /*wh*/0);
/* todo: no unlock here */
inode_unlock_shared(h_dir);
AuDbg("bcpup %d\n", bcpup);
if (!err) {
if (d_really_is_negative(dentry))
au_set_h_dptr(dentry, btop, NULL);
au_update_dbrange(dentry, /*do_put_zero*/0);
}
}
if (!add_entry)
di_write_unlock(parent);
if (!err)
err = bcpup; /* success */
AuTraceErr(err);
return err;
}
/*
* decide the branch and the parent dir where we will create a new entry.
* returns new bindex or an error.
* copyup the parent dir if needed.
*/
int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
struct au_wr_dir_args *args)
{
int err;
unsigned int flags;
aufs_bindex_t bcpup, btop, src_btop;
const unsigned char add_entry
= au_ftest_wrdir(args->flags, ADD_ENTRY)
| au_ftest_wrdir(args->flags, TMPFILE);
struct super_block *sb;
struct dentry *parent;
struct au_sbinfo *sbinfo;
sb = dentry->d_sb;
sbinfo = au_sbi(sb);
parent = dget_parent(dentry);
btop = au_dbtop(dentry);
bcpup = btop;
if (args->force_btgt < 0) {
if (src_dentry) {
src_btop = au_dbtop(src_dentry);
if (src_btop < btop)
bcpup = src_btop;
} else if (add_entry) {
flags = 0;
if (au_ftest_wrdir(args->flags, ISDIR))
au_fset_wbr(flags, DIR);
err = AuWbrCreate(sbinfo, dentry, flags);
bcpup = err;
}
if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
if (add_entry)
err = AuWbrCopyup(sbinfo, dentry);
else {
if (!IS_ROOT(dentry)) {
di_read_lock_parent(parent, !AuLock_IR);
err = AuWbrCopyup(sbinfo, dentry);
di_read_unlock(parent, !AuLock_IR);
} else
err = AuWbrCopyup(sbinfo, dentry);
}
bcpup = err;
if (unlikely(err < 0))
goto out;
}
} else {
bcpup = args->force_btgt;
AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
}
AuDbg("btop %d, bcpup %d\n", btop, bcpup);
err = bcpup;
if (bcpup == btop)
goto out; /* success */
/* copyup the new parent into the branch we process */
err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
if (err >= 0) {
if (d_really_is_negative(dentry)) {
au_set_h_dptr(dentry, btop, NULL);
au_set_dbtop(dentry, bcpup);
au_set_dbbot(dentry, bcpup);
}
AuDebugOn(add_entry
&& !au_ftest_wrdir(args->flags, TMPFILE)
&& !au_h_dptr(dentry, bcpup));
}
out:
dput(parent);
return err;
}
/* ---------------------------------------------------------------------- */
void au_pin_hdir_unlock(struct au_pin *p)
{
if (p->hdir)
au_hn_inode_unlock(p->hdir);
}
int au_pin_hdir_lock(struct au_pin *p)
{
int err;
err = 0;
if (!p->hdir)
goto out;
/* even if an error happens later, keep this lock */
au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
err = -EBUSY;
if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
goto out;
err = 0;
if (p->h_dentry)
err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
p->h_parent, p->br);
out:
return err;
}
int au_pin_hdir_relock(struct au_pin *p)
{
int err, i;
struct inode *h_i;
struct dentry *h_d[] = {
p->h_dentry,
p->h_parent
};
err = au_pin_hdir_lock(p);
if (unlikely(err))
goto out;
for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
if (!h_d[i])
continue;
if (d_is_positive(h_d[i])) {
h_i = d_inode(h_d[i]);
err = !h_i->i_nlink;
}
}
out:
return err;
}
static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
{
atomic_long_set(&p->hdir->hi_inode->i_rwsem.owner, (long)task);
}
void au_pin_hdir_acquire_nest(struct au_pin *p)
{
if (p->hdir) {
rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
p->lsc_hi, 0, NULL, _RET_IP_);
au_pin_hdir_set_owner(p, current);
}
}
void au_pin_hdir_release(struct au_pin *p)
{
if (p->hdir) {
au_pin_hdir_set_owner(p, p->task);
rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, _RET_IP_);
}
}
struct dentry *au_pinned_h_parent(struct au_pin *pin)
{
if (pin && pin->parent)
return au_h_dptr(pin->parent, pin->bindex);
return NULL;
}
void au_unpin(struct au_pin *p)
{
if (p->hdir)
au_pin_hdir_unlock(p);
if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
vfsub_mnt_drop_write(p->h_mnt);
if (!p->hdir)
return;
if (!au_ftest_pin(p->flags, DI_LOCKED))
di_read_unlock(p->parent, AuLock_IR);
iput(p->hdir->hi_inode);
dput(p->parent);
p->parent = NULL;
p->hdir = NULL;
p->h_mnt = NULL;
/* do not clear p->task */
}
int au_do_pin(struct au_pin *p)
{
int err;
struct super_block *sb;
struct inode *h_dir;
err = 0;
sb = p->dentry->d_sb;
p->br = au_sbr(sb, p->bindex);
if (IS_ROOT(p->dentry)) {
if (au_ftest_pin(p->flags, MNT_WRITE)) {
p->h_mnt = au_br_mnt(p->br);
err = vfsub_mnt_want_write(p->h_mnt);
if (unlikely(err)) {
au_fclr_pin(p->flags, MNT_WRITE);
goto out_err;
}
}
goto out;
}
p->h_dentry = NULL;
if (p->bindex <= au_dbbot(p->dentry))
p->h_dentry = au_h_dptr(p->dentry, p->bindex);
p->parent = dget_parent(p->dentry);
if (!au_ftest_pin(p->flags, DI_LOCKED))
di_read_lock(p->parent, AuLock_IR, p->lsc_di);
h_dir = NULL;
p->h_parent = au_h_dptr(p->parent, p->bindex);
p->hdir = au_hi(d_inode(p->parent), p->bindex);
if (p->hdir)
h_dir = p->hdir->hi_inode;
/*
* udba case, or
* if DI_LOCKED is not set, then p->parent may be different
* and h_parent can be NULL.
*/
if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
err = -EBUSY;
if (!au_ftest_pin(p->flags, DI_LOCKED))
di_read_unlock(p->parent, AuLock_IR);
dput(p->parent);
p->parent = NULL;
goto out_err;
}
if (au_ftest_pin(p->flags, MNT_WRITE)) {
p->h_mnt = au_br_mnt(p->br);
err = vfsub_mnt_want_write(p->h_mnt);
if (unlikely(err)) {
au_fclr_pin(p->flags, MNT_WRITE);
if (!au_ftest_pin(p->flags, DI_LOCKED))
di_read_unlock(p->parent, AuLock_IR);
dput(p->parent);
p->parent = NULL;
goto out_err;
}
}
au_igrab(h_dir);
err = au_pin_hdir_lock(p);
if (!err)
goto out; /* success */
au_unpin(p);
out_err:
pr_err("err %d\n", err);
err = au_busy_or_stale();
out:
return err;
}
void au_pin_init(struct au_pin *p, struct dentry *dentry,
aufs_bindex_t bindex, int lsc_di, int lsc_hi,
unsigned int udba, unsigned char flags)
{
p->dentry = dentry;
p->udba = udba;
p->lsc_di = lsc_di;
p->lsc_hi = lsc_hi;
p->flags = flags;
p->bindex = bindex;
p->parent = NULL;
p->hdir = NULL;
p->h_mnt = NULL;
p->h_dentry = NULL;
p->h_parent = NULL;
p->br = NULL;
p->task = current;
}
int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
unsigned int udba, unsigned char flags)
{
au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
udba, flags);
return au_do_pin(pin);
}
/* ---------------------------------------------------------------------- */
/*
* ->setattr() and ->getattr() are called in various cases.
* chmod, stat: dentry is revalidated.
* fchmod, fstat: file and dentry are not revalidated, additionally they may be
* unhashed.
* for ->setattr(), ia->ia_file is passed from ftruncate only.
*/
/* todo: consolidate with do_refresh() and simple_reval_dpath() */
int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
{
int err;
struct dentry *parent;
err = 0;
if (au_digen_test(dentry, sigen)) {
parent = dget_parent(dentry);
di_read_lock_parent(parent, AuLock_IR);
err = au_refresh_dentry(dentry, parent);
di_read_unlock(parent, AuLock_IR);
dput(parent);
}
AuTraceErr(err);
return err;
}
int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
struct au_icpup_args *a)
{
int err;
loff_t sz;
aufs_bindex_t btop, ibtop;
struct dentry *hi_wh, *parent;
struct inode *inode;
struct au_wr_dir_args wr_dir_args = {
.force_btgt = -1,
.flags = 0
};
if (d_is_dir(dentry))
au_fset_wrdir(wr_dir_args.flags, ISDIR);
/* plink or hi_wh() case */
btop = au_dbtop(dentry);
inode = d_inode(dentry);
ibtop = au_ibtop(inode);
if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
wr_dir_args.force_btgt = ibtop;
err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
if (unlikely(err < 0))
goto out;
a->btgt = err;
if (err != btop)
au_fset_icpup(a->flags, DID_CPUP);
err = 0;
a->pin_flags = AuPin_MNT_WRITE;
parent = NULL;
if (!IS_ROOT(dentry)) {
au_fset_pin(a->pin_flags, DI_LOCKED);
parent = dget_parent(dentry);
di_write_lock_parent(parent);
}
err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
if (unlikely(err))
goto out_parent;
sz = -1;
a->h_path.dentry = au_h_dptr(dentry, btop);
a->h_inode = d_inode(a->h_path.dentry);
if (ia && (ia->ia_valid & ATTR_SIZE)) {
inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
if (ia->ia_size < i_size_read(a->h_inode))
sz = ia->ia_size;
inode_unlock_shared(a->h_inode);
}
hi_wh = NULL;
if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
hi_wh = au_hi_wh(inode, a->btgt);
if (!hi_wh) {
struct au_cp_generic cpg = {
.dentry = dentry,
.bdst = a->btgt,
.bsrc = -1,
.len = sz,
.pin = &a->pin
};
err = au_sio_cpup_wh(&cpg, /*file*/NULL);
if (unlikely(err))
goto out_unlock;
hi_wh = au_hi_wh(inode, a->btgt);
/* todo: revalidate hi_wh? */
}
}
if (parent) {
au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
di_downgrade_lock(parent, AuLock_IR);
dput(parent);
parent = NULL;
}
if (!au_ftest_icpup(a->flags, DID_CPUP))
goto out; /* success */
if (!d_unhashed(dentry)) {
struct au_cp_generic cpg = {
.dentry = dentry,
.bdst = a->btgt,
.bsrc = btop,
.len = sz,
.pin = &a->pin,
.flags = AuCpup_DTIME | AuCpup_HOPEN
};
err = au_sio_cpup_simple(&cpg);
if (!err)
a->h_path.dentry = au_h_dptr(dentry, a->btgt);
} else if (!hi_wh)
a->h_path.dentry = au_h_dptr(dentry, a->btgt);
else
a->h_path.dentry = hi_wh; /* do not dget here */
out_unlock:
a->h_inode = d_inode(a->h_path.dentry);
if (!err)
goto out; /* success */
au_unpin(&a->pin);
out_parent:
if (parent) {
di_write_unlock(parent);
dput(parent);
}
out:
if (!err)
inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
return err;
}
static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
{
int err;
struct inode *inode, *delegated;
struct super_block *sb;
struct file *file;
struct au_icpup_args *a;
inode = d_inode(dentry);
IMustLock(inode);
err = setattr_prepare(dentry, ia);
if (unlikely(err))
goto out;
err = -ENOMEM;
a = kzalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
ia->ia_valid &= ~ATTR_MODE;
file = NULL;
sb = dentry->d_sb;
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out_kfree;
if (ia->ia_valid & ATTR_FILE) {
/* currently ftruncate(2) only */
AuDebugOn(!d_is_reg(dentry));
file = ia->ia_file;
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1,
/*fi_lsc*/0);
if (unlikely(err))
goto out_si;
ia->ia_file = au_hf_top(file);
a->udba = AuOpt_UDBA_NONE;
} else {
/* fchmod() doesn't pass ia_file */
a->udba = au_opt_udba(sb);
di_write_lock_child(dentry);
/* no d_unlinked(), to set UDBA_NONE for root */
if (d_unhashed(dentry))
a->udba = AuOpt_UDBA_NONE;
if (a->udba != AuOpt_UDBA_NONE) {
AuDebugOn(IS_ROOT(dentry));
err = au_reval_for_attr(dentry, au_sigen(sb));
if (unlikely(err))
goto out_dentry;
}
}
err = au_pin_and_icpup(dentry, ia, a);
if (unlikely(err < 0))
goto out_dentry;
if (au_ftest_icpup(a->flags, DID_CPUP)) {
ia->ia_file = NULL;
ia->ia_valid &= ~ATTR_FILE;
}
a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
== (ATTR_MODE | ATTR_CTIME)) {
err = security_path_chmod(&a->h_path, ia->ia_mode);
if (unlikely(err))
goto out_unlock;
} else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
&& (ia->ia_valid & ATTR_CTIME)) {
err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
if (unlikely(err))
goto out_unlock;
}
if (ia->ia_valid & ATTR_SIZE) {
struct file *f;
if (ia->ia_size < i_size_read(inode))
/* unmap only */
truncate_setsize(inode, ia->ia_size);
f = NULL;
if (ia->ia_valid & ATTR_FILE)
f = ia->ia_file;
inode_unlock(a->h_inode);
err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
} else {
delegated = NULL;
while (1) {
err = vfsub_notify_change(&a->h_path, ia, &delegated);
if (delegated) {
err = break_deleg_wait(&delegated);
if (!err)
continue;
}
break;
}
}
/*
* regardless aufs 'acl' option setting.
* why don't all acl-aware fs call this func from their ->setattr()?
*/
if (!err && (ia->ia_valid & ATTR_MODE))
err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
if (!err)
au_cpup_attr_changeable(inode);
out_unlock:
inode_unlock(a->h_inode);
au_unpin(&a->pin);
if (unlikely(err))
au_update_dbtop(dentry);
out_dentry:
di_write_unlock(dentry);
if (file) {
fi_write_unlock(file);
ia->ia_file = file;
ia->ia_valid |= ATTR_FILE;
}
out_si:
si_read_unlock(sb);
out_kfree:
au_kfree_rcu(a);
out:
AuTraceErr(err);
return err;
}
#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
static int au_h_path_to_set_attr(struct dentry *dentry,
struct au_icpup_args *a, struct path *h_path)
{
int err;
struct super_block *sb;
sb = dentry->d_sb;
a->udba = au_opt_udba(sb);
/* no d_unlinked(), to set UDBA_NONE for root */
if (d_unhashed(dentry))
a->udba = AuOpt_UDBA_NONE;
if (a->udba != AuOpt_UDBA_NONE) {
AuDebugOn(IS_ROOT(dentry));
err = au_reval_for_attr(dentry, au_sigen(sb));
if (unlikely(err))
goto out;
}
err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
if (unlikely(err < 0))
goto out;
h_path->dentry = a->h_path.dentry;
h_path->mnt = au_sbr_mnt(sb, a->btgt);
out:
return err;
}
ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
struct au_sxattr *arg)
{
int err;
struct path h_path;
struct super_block *sb;
struct au_icpup_args *a;
struct inode *h_inode;
IMustLock(inode);
err = -ENOMEM;
a = kzalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
sb = dentry->d_sb;
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out_kfree;
h_path.dentry = NULL; /* silence gcc */
di_write_lock_child(dentry);
err = au_h_path_to_set_attr(dentry, a, &h_path);
if (unlikely(err))
goto out_di;
inode_unlock(a->h_inode);
switch (arg->type) {
case AU_XATTR_SET:
AuDebugOn(d_is_negative(h_path.dentry));
err = vfsub_setxattr(h_path.dentry,
arg->u.set.name, arg->u.set.value,
arg->u.set.size, arg->u.set.flags);
break;
case AU_ACL_SET:
err = -EOPNOTSUPP;
h_inode = d_inode(h_path.dentry);
if (h_inode->i_op->set_acl)
/* this will call posix_acl_update_mode */
err = h_inode->i_op->set_acl(h_inode,
arg->u.acl_set.acl,
arg->u.acl_set.type);
break;
}
if (!err)
au_cpup_attr_timesizes(inode);
au_unpin(&a->pin);
if (unlikely(err))
au_update_dbtop(dentry);
out_di:
di_write_unlock(dentry);
si_read_unlock(sb);
out_kfree:
au_kfree_rcu(a);
out:
AuTraceErr(err);
return err;
}
#endif
static void au_refresh_iattr(struct inode *inode, struct kstat *st,
unsigned int nlink)
{
unsigned int n;
inode->i_mode = st->mode;
/* don't i_[ug]id_write() here */
inode->i_uid = st->uid;
inode->i_gid = st->gid;
inode->i_atime = st->atime;
inode->i_mtime = st->mtime;
inode->i_ctime = st->ctime;
au_cpup_attr_nlink(inode, /*force*/0);
if (S_ISDIR(inode->i_mode)) {
n = inode->i_nlink;
n -= nlink;
n += st->nlink;
smp_mb(); /* for i_nlink */
/* 0 can happen */
set_nlink(inode, n);
}
spin_lock(&inode->i_lock);
inode->i_blocks = st->blocks;
i_size_write(inode, st->size);
spin_unlock(&inode->i_lock);
}
/*
* common routine for aufs_getattr() and au_getxattr().
* returns zero or negative (an error).
* @dentry will be read-locked in success.
*/
int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path,
int locked)
{
int err;
unsigned int mnt_flags, sigen;
unsigned char udba_none;
aufs_bindex_t bindex;
struct super_block *sb, *h_sb;
struct inode *inode;
h_path->mnt = NULL;
h_path->dentry = NULL;
err = 0;
sb = dentry->d_sb;
mnt_flags = au_mntflags(sb);
udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
if (unlikely(locked))
goto body; /* skip locking dinfo */
/* support fstat(2) */
if (!d_unlinked(dentry) && !udba_none) {
sigen = au_sigen(sb);
err = au_digen_test(dentry, sigen);
if (!err) {
di_read_lock_child(dentry, AuLock_IR);
err = au_dbrange_test(dentry);
if (unlikely(err)) {
di_read_unlock(dentry, AuLock_IR);
goto out;
}
} else {
AuDebugOn(IS_ROOT(dentry));
di_write_lock_child(dentry);
err = au_dbrange_test(dentry);
if (!err)
err = au_reval_for_attr(dentry, sigen);
if (!err)
di_downgrade_lock(dentry, AuLock_IR);
else {
di_write_unlock(dentry);
goto out;
}
}
} else
di_read_lock_child(dentry, AuLock_IR);
body:
inode = d_inode(dentry);
bindex = au_ibtop(inode);
h_path->mnt = au_sbr_mnt(sb, bindex);
h_sb = h_path->mnt->mnt_sb;
if (!force
&& !au_test_fs_bad_iattr(h_sb)
&& udba_none)
goto out; /* success */
if (au_dbtop(dentry) == bindex)
h_path->dentry = au_h_dptr(dentry, bindex);
else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
h_path->dentry = au_plink_lkup(inode, bindex);
if (IS_ERR(h_path->dentry))
/* pretending success */
h_path->dentry = NULL;
else
dput(h_path->dentry);
}
out:
return err;
}
static int aufs_getattr(const struct path *path, struct kstat *st,
u32 request, unsigned int query)
{
int err;
unsigned char positive;
struct path h_path;
struct dentry *dentry;
struct inode *inode;
struct super_block *sb;
dentry = path->dentry;
inode = d_inode(dentry);
sb = dentry->d_sb;
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out;
err = au_h_path_getattr(dentry, /*force*/0, &h_path, /*locked*/0);
if (unlikely(err))
goto out_si;
if (unlikely(!h_path.dentry))
/* illegally overlapped or something */
goto out_fill; /* pretending success */
positive = d_is_positive(h_path.dentry);
if (positive)
/* no vfsub version */
err = vfs_getattr(&h_path, st, request, query);
if (!err) {
if (positive)
au_refresh_iattr(inode, st,
d_inode(h_path.dentry)->i_nlink);
goto out_fill; /* success */
}
AuTraceErr(err);
goto out_di;
out_fill:
generic_fillattr(inode, st);
out_di:
di_read_unlock(dentry, AuLock_IR);
out_si:
si_read_unlock(sb);
out:
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *done)
{
const char *ret;
struct dentry *h_dentry;
struct inode *h_inode;
int err;
aufs_bindex_t bindex;
ret = NULL; /* suppress a warning */
err = -ECHILD;
if (!dentry)
goto out;
err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
if (unlikely(err))
goto out;
err = au_d_hashed_positive(dentry);
if (unlikely(err))
goto out_unlock;
err = -EINVAL;
inode = d_inode(dentry);
bindex = au_ibtop(inode);
h_inode = au_h_iptr(inode, bindex);
if (unlikely(!h_inode->i_op->get_link))
goto out_unlock;
err = -EBUSY;
h_dentry = NULL;
if (au_dbtop(dentry) <= bindex) {
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry)
dget(h_dentry);
}
if (!h_dentry) {
h_dentry = d_find_any_alias(h_inode);
if (IS_ERR(h_dentry)) {
err = PTR_ERR(h_dentry);
goto out_unlock;
}
}
if (unlikely(!h_dentry))
goto out_unlock;
err = 0;
AuDbg("%ps\n", h_inode->i_op->get_link);
AuDbgDentry(h_dentry);
ret = vfs_get_link(h_dentry, done);
dput(h_dentry);
if (IS_ERR(ret))
err = PTR_ERR(ret);
out_unlock:
aufs_read_unlock(dentry, AuLock_IR);
out:
if (unlikely(err))
ret = ERR_PTR(err);
AuTraceErrPtr(ret);
return ret;
}
/* ---------------------------------------------------------------------- */
static int au_is_special(struct inode *inode)
{
return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
}
static int aufs_update_time(struct inode *inode, struct timespec64 *ts,
int flags)
{
int err;
aufs_bindex_t bindex;
struct super_block *sb;
struct inode *h_inode;
struct vfsmount *h_mnt;
sb = inode->i_sb;
WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
"unexpected s_flags 0x%lx", sb->s_flags);
/* mmap_sem might be acquired already, cf. aufs_mmap() */
lockdep_off();
si_read_lock(sb, AuLock_FLUSH);
ii_write_lock_child(inode);
err = 0;
bindex = au_ibtop(inode);
h_inode = au_h_iptr(inode, bindex);
if (!au_test_ro(sb, bindex, inode)) {
h_mnt = au_sbr_mnt(sb, bindex);
err = vfsub_mnt_want_write(h_mnt);
if (!err) {
err = vfsub_update_time(h_inode, ts, flags);
vfsub_mnt_drop_write(h_mnt);
}
} else if (au_is_special(h_inode)) {
/*
* Never copy-up here.
* These special files may already be opened and used for
* communicating. If we copied it up, then the communication
* would be corrupted.
*/
AuWarn1("timestamps for i%lu are ignored "
"since it is on readonly branch (hi%lu).\n",
inode->i_ino, h_inode->i_ino);
} else if (flags & ~S_ATIME) {
err = -EIO;
AuIOErr1("unexpected flags 0x%x\n", flags);
AuDebugOn(1);
}
if (!err)
au_cpup_attr_timesizes(inode);
ii_write_unlock(inode);
si_read_unlock(sb);
lockdep_on();
if (!err && (flags & S_VERSION))
inode_inc_iversion(inode);
return err;
}
/* ---------------------------------------------------------------------- */
/* no getattr version will be set by module.c:aufs_init() */
struct inode_operations aufs_iop_nogetattr[AuIop_Last],
aufs_iop[] = {
[AuIop_SYMLINK] = {
.permission = aufs_permission,
#ifdef CONFIG_FS_POSIX_ACL
.get_acl = aufs_get_acl,
.set_acl = aufs_set_acl, /* unsupport for symlink? */
#endif
.setattr = aufs_setattr,
.getattr = aufs_getattr,
#ifdef CONFIG_AUFS_XATTR
.listxattr = aufs_listxattr,
#endif
.get_link = aufs_get_link,
/* .update_time = aufs_update_time */
},
[AuIop_DIR] = {
.create = aufs_create,
.lookup = aufs_lookup,
.link = aufs_link,
.unlink = aufs_unlink,
.symlink = aufs_symlink,
.mkdir = aufs_mkdir,
.rmdir = aufs_rmdir,
.mknod = aufs_mknod,
.rename = aufs_rename,
.permission = aufs_permission,
#ifdef CONFIG_FS_POSIX_ACL
.get_acl = aufs_get_acl,
.set_acl = aufs_set_acl,
#endif
.setattr = aufs_setattr,
.getattr = aufs_getattr,
#ifdef CONFIG_AUFS_XATTR
.listxattr = aufs_listxattr,
#endif
.update_time = aufs_update_time,
.atomic_open = aufs_atomic_open,
.tmpfile = aufs_tmpfile
},
[AuIop_OTHER] = {
.permission = aufs_permission,
#ifdef CONFIG_FS_POSIX_ACL
.get_acl = aufs_get_acl,
.set_acl = aufs_set_acl,
#endif
.setattr = aufs_setattr,
.getattr = aufs_getattr,
#ifdef CONFIG_AUFS_XATTR
.listxattr = aufs_listxattr,
#endif
.update_time = aufs_update_time
}
};