Commit 6fe05098 authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: xino 1/2, core



XINO and XIB files are to maintain the inode numbers in aufs
(cf. struct.txt and aufs manual in aufs-util.git).

XINO file contains just a sequence of the inode numbers, and their
offset in the file is real_inum x sizeof(inum).  So the size is limited
by s_maxbytes of the filesystem where XINO file is located.  In order to
support the larger inum, aufs stores XINO files as an internal array.

Sometimes the size of XINO file can be a problem, ie. too big,
particularly when XINO files are located on tmpfs. In this case, another
separate patch tmpfs-ino.patch in aufs4-standalone.git is recommended
(as well as vfs-ino.patch). The patch makes tmpfs to maintain inode
number within itself and suppress its discontiguous distribution.

See also the document in next commit.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent f04356cb
......@@ -10,7 +10,7 @@ ccflags-y += -DDEBUG
ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
obj-$(CONFIG_AUFS_FS) += aufs.o
aufs-y := module.o sbinfo.o super.o branch.o opts.o \
aufs-y := module.o sbinfo.o super.o branch.o xino.o opts.o \
wkq.o vfsub.o dcsub.o \
cpup.o \
dinfo.o \
......
......@@ -27,6 +27,7 @@
#include "dcsub.h"
#include "dentry.h"
#include "fstype.h"
#include "hbl.h"
#include "inode.h"
#include "lcnt.h"
#include "module.h"
......
......@@ -18,8 +18,30 @@
/* ---------------------------------------------------------------------- */
/* a xino file */
struct au_xino {
struct file **xi_file;
unsigned int xi_nfile;
struct {
spinlock_t spin;
ino_t *array;
int total;
/* reserved for future use */
/* unsigned long *bitmap; */
wait_queue_head_t wqh;
} xi_nondir;
struct mutex xi_mtx; /* protects xi_file array */
struct hlist_bl_head xi_writing;
struct kref xi_kref;
};
/* protected by superblock rwsem */
struct au_branch {
struct au_xino *br_xino;
aufs_bindex_t br_id;
int br_perm;
......@@ -44,6 +66,28 @@ static inline struct super_block *au_br_sb(struct au_branch *br)
return au_br_mnt(br)->mnt_sb;
}
static inline void au_xino_get(struct au_branch *br)
{
struct au_xino *xi;
xi = br->br_xino;
if (xi)
kref_get(&xi->xi_kref);
}
static inline int au_xino_count(struct au_branch *br)
{
int v;
struct au_xino *xi;
v = 0;
xi = br->br_xino;
if (xi)
v = kref_read(&xi->xi_kref);
return v;
}
/* ---------------------------------------------------------------------- */
/* branch.c */
......@@ -53,6 +97,56 @@ int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
struct au_opt_add;
int au_br_add(struct super_block *sb, struct au_opt_add *add);
/* xino.c */
aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry);
struct file *au_xino_create(struct super_block *sb, char *fpath, int silent);
struct file *au_xino_create2(struct super_block *sb, struct path *base,
struct file *copy_src);
struct au_xi_new {
struct au_xino *xi; /* switch between xino and xigen */
int idx;
struct path *base;
struct file *copy_src;
};
struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew);
int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t *ino);
int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t ino);
ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
loff_t *pos);
ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
size_t size, loff_t *pos);
struct au_xino *au_xino_alloc(unsigned int nfile);
int au_xino_put(struct au_branch *br);
struct file *au_xino_file1(struct au_xino *xi);
ino_t au_xino_new_ino(struct super_block *sb);
void au_xino_delete_inode(struct inode *inode, const int unlinked);
/* ---------------------------------------------------------------------- */
/* @idx is signed to accept -1 meaning the first file */
static inline struct file *au_xino_file(struct au_xino *xi, int idx)
{
struct file *file;
file = NULL;
if (!xi)
goto out;
if (idx >= 0) {
if (idx < xi->xi_nfile)
file = xi->xi_file[idx];
} else
file = au_xino_file1(xi);
out:
return file;
}
/* ---------------------------------------------------------------------- */
/* Superblock to branch */
......
......@@ -51,12 +51,19 @@ AuStubInt0(au_debug_test, void)
pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
} while (0)
#define AuLabel(l) AuDbg(#l "\n")
#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
#define AuWarn1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
pr_warn(fmt, ##__VA_ARGS__); \
} while (0)
#define AuIOErr1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
AuIOErr(fmt, ##__VA_ARGS__); \
} while (0)
#define AuTraceErr(e) do { \
if (unlikely((e) < 0)) \
AuDbg("err %d\n", (int)(e)); \
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2017-2019 Junjiro R. Okajima
*/
/*
* helpers for hlist_bl.h
*/
#ifndef __AUFS_HBL_H__
#define __AUFS_HBL_H__
#ifdef __KERNEL__
#include <linux/list_bl.h>
static inline void au_hbl_add(struct hlist_bl_node *node,
struct hlist_bl_head *hbl)
{
hlist_bl_lock(hbl);
hlist_bl_add_head(node, hbl);
hlist_bl_unlock(hbl);
}
static inline void au_hbl_del(struct hlist_bl_node *node,
struct hlist_bl_head *hbl)
{
hlist_bl_lock(hbl);
hlist_bl_del(node);
hlist_bl_unlock(hbl);
}
#define au_hbl_for_each(pos, head) \
for (pos = hlist_bl_first(head); \
pos; \
pos = pos->next)
static inline unsigned long au_hbl_count(struct hlist_bl_head *hbl)
{
unsigned long cnt;
struct hlist_bl_node *pos;
cnt = 0;
hlist_bl_lock(hbl);
au_hbl_for_each(pos, hbl)
cnt++;
hlist_bl_unlock(hbl);
return cnt;
}
#endif /* __KERNEL__ */
#endif /* __AUFS_HBL_H__ */
......@@ -14,6 +14,24 @@
#include <linux/path.h>
/* ---------------------------------------------------------------------- */
/* mount flags */
#define AuOpt_XINO 1 /* external inode number bitmap
and translation table */
#define AuOpt_Def AuOpt_XINO
#define au_opt_test(flags, name) (flags & AuOpt_##name)
#define au_opt_set(flags, name) do { \
((flags) |= AuOpt_##name); \
} while (0)
#define au_opt_clr(flags, name) do { \
((flags) &= ~AuOpt_##name); \
} while (0)
/* ---------------------------------------------------------------------- */
struct au_opt_add {
aufs_bindex_t bindex;
char *pathname;
......
......@@ -42,6 +42,20 @@ struct au_sbinfo {
/* include/asm-ia64/siginfo.h defines a macro named si_flags */
unsigned int si_mntflags;
/* external inode number (bitmap and translation table) */
vfs_readf_t si_xread;
vfs_writef_t si_xwrite;
loff_t si_ximaxent; /* max entries in a xino */
struct file *si_xib;
struct mutex si_xib_mtx; /* protect xib members */
unsigned long *si_xib_buf;
unsigned long si_xib_last_pindex;
int si_xib_next_bit;
/* reserved for future use */
/* unsigned long long si_xib_limit; */ /* Max xib file size */
/*
* sysfs and lifetime management.
* this is not a small structure and it may be a waste of memory in case
......@@ -195,5 +209,11 @@ static inline struct au_branch *au_sbr(struct super_block *sb,
return au_sbi(sb)->si_branch[0 + bindex];
}
static inline loff_t au_xi_maxent(struct super_block *sb)
{
SiMustAnyLock(sb);
return au_sbi(sb)->si_ximaxent;
}
#endif /* __KERNEL__ */
#endif /* __AUFS_SUPER_H__ */
......@@ -8,8 +8,36 @@
*/
#include <linux/namei.h>
#include <linux/cred.h>
#include <linux/security.h>
#include "aufs.h"
struct file *vfsub_dentry_open(struct path *path, int flags)
{
struct file *file;
file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
current_cred());
if (!IS_ERR_OR_NULL(file)
&& (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(d_inode(path->dentry));
return file;
}
struct file *vfsub_filp_open(const char *path, int oflags, int mode)
{
struct file *file;
lockdep_off();
file = filp_open(path,
oflags /* | __FMODE_NONOTIFY */,
mode);
lockdep_on();
return file;
}
int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
{
int err;
......@@ -18,3 +46,96 @@ int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
/* add more later */
return err;
}
struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
int len)
{
struct path path = {
.mnt = NULL
};
/* VFS checks it too, but by WARN_ON_ONCE() */
IMustLock(d_inode(parent));
path.dentry = lookup_one_len(name, parent, len);
if (IS_ERR(path.dentry))
goto out;
out:
AuTraceErrPtr(path.dentry);
return path.dentry;
}
/* ---------------------------------------------------------------------- */
struct unlink_args {
int *errp;
struct inode *dir;
struct path *path;
struct inode **delegated_inode;
};
static void call_unlink(void *args)
{
struct unlink_args *a = args;
struct dentry *d = a->path->dentry;
struct inode *h_inode;
/* re-commit later */
const int stop_sillyrename = 0; /* (au_test_nfs(d->d_sb)
* && au_dcount(d) == 1); */
IMustLock(a->dir);
a->path->dentry = d->d_parent;
*a->errp = security_path_unlink(a->path, d);
a->path->dentry = d;
if (unlikely(*a->errp))
return;
if (!stop_sillyrename)
dget(d);
h_inode = NULL;
if (d_is_positive(d)) {
h_inode = d_inode(d);
ihold(h_inode);
}
lockdep_off();
*a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
lockdep_on();
if (!stop_sillyrename)
dput(d);
if (h_inode)
iput(h_inode);
AuTraceErr(*a->errp);
}
/*
* @dir: must be locked.
* @dentry: target dentry.
*/
int vfsub_unlink(struct inode *dir, struct path *path,
struct inode **delegated_inode, int force)
{
int err;
struct unlink_args args = {
.errp = &err,
.dir = dir,
.path = path,
.delegated_inode = delegated_inode
};
if (!force)
call_unlink(&args);
else {
int wkq_err;
wkq_err = au_wkq_wait(call_unlink, &args);
if (unlikely(wkq_err))
err = wkq_err;
}
return err;
}
......@@ -15,11 +15,45 @@
#include <linux/fs.h>
#include "debug.h"
/* ---------------------------------------------------------------------- */
/* lock subclass for lower inode */
/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
/* reduce? gave up. */
enum {
AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
AuLsc_I_PARENT, /* lower inode, parent first */
AuLsc_I_PARENT2, /* copyup dirs */
AuLsc_I_PARENT3, /* copyup wh */
AuLsc_I_CHILD,
AuLsc_I_CHILD2,
AuLsc_I_End
};
/* to debug easier, do not make them inlined functions */
#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
/* ---------------------------------------------------------------------- */
struct file *vfsub_dentry_open(struct path *path, int flags);
struct file *vfsub_filp_open(const char *path, int oflags, int mode);
int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
int len);
/* ---------------------------------------------------------------------- */
static inline loff_t vfsub_f_size_read(struct file *file)
{
return i_size_read(file_inode(file));
}
/* ---------------------------------------------------------------------- */
int vfsub_unlink(struct inode *dir, struct path *path,
struct inode **delegated_inode, int force);
#endif /* __KERNEL__ */
#endif /* __AUFS_VFSUB_H__ */
......@@ -33,6 +33,7 @@ typedef void (*au_wkq_func_t)(void *args);
/* wkq flags */
#define AuWkq_WAIT 1
#define AuWkq_NEST (1 << 1)
#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
#define au_fset_wkq(flags, name) \
do { (flags) |= AuWkq_##name; } while (0)
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2019 Junjiro R. Okajima
*/
/*
* external inode number translation table and bitmap
*
* things to consider
* - the lifetime
* + au_xino object
* + XINO files (xino, xib, xigen)
* + dynamic debugfs entries (xiN)
* + static debugfs entries (xib, xigen)
* + static sysfs entry (xi_path)
* - several entry points to handle them.
* + mount(2) without xino option (default)
* + mount(2) with xino option
* + mount(2) with noxino option
* + umount(2)
* + remount with add/del branches
* + remount with xino/noxino options
*/
#include <linux/file.h>
#include <linux/sched/signal.h>
#include <linux/statfs.h>
#include <linux/uaccess.h>
#include "aufs.h"
/*
* stop unnecessary notify events at creating xino files
*/
aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry)
{
aufs_bindex_t bfound, bindex, bbot;
struct dentry *parent;
struct au_branch *br;
bfound = -1;
parent = dentry->d_parent; /* safe d_parent access */
bbot = au_sbbot(sb);
for (bindex = 0; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_dentry(br) == parent) {
bfound = bindex;
break;
}
}
AuDbg("bfound b%d\n", bfound);
return bfound;
}
struct au_xino_lock_dir {
struct au_hinode *hdir;
struct dentry *parent;
struct inode *dir;
};
static struct dentry *au_dget_parent_lock(struct dentry *dentry,
unsigned int lsc)
{
struct dentry *parent;
struct inode *dir;
parent = dget_parent(dentry);
dir = d_inode(parent);
inode_lock_nested(dir, lsc);
#if 0 /* it should not happen */
spin_lock(&dentry->d_lock);
if (unlikely(dentry->d_parent != parent)) {
spin_unlock(&dentry->d_lock);
inode_unlock(dir);
dput(parent);
parent = NULL;
goto out;
}
spin_unlock(&dentry->d_lock);
out:
#endif
return parent;
}
static void au_xino_lock_dir(struct super_block *sb, struct path *xipath,
struct au_xino_lock_dir *ldir)
{
aufs_bindex_t bindex;
ldir->hdir = NULL;
bindex = au_xi_root(sb, xipath->dentry);
if (bindex >= 0) {
/* rw branch root */
ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
inode_lock_nested(ldir->hdir->hi_inode, AuLsc_I_PARENT);
} else {
/* other */
ldir->parent = au_dget_parent_lock(xipath->dentry,
AuLsc_I_PARENT);
ldir->dir = d_inode(ldir->parent);
}
}
static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
{
if (ldir->hdir)
inode_unlock(ldir->hdir->hi_inode);
else {
inode_unlock(ldir->dir);
dput(ldir->parent);
}
}
/* ---------------------------------------------------------------------- */
/*
* create and set a new xino file
*/
struct file *au_xino_create(struct super_block *sb, char *fpath, int silent)
{
struct file *file;
struct dentry *h_parent, *d;
struct inode *h_dir, *inode;
int err;
/*
* at mount-time, and the xino file is the default path,
* hnotify is disabled so we have no notify events to ignore.
* when a user specified the xino, we cannot get au_hdir to be ignored.
*/
file = vfsub_filp_open(fpath, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
/* | __FMODE_NONOTIFY */,
0666);
if (IS_ERR(file)) {
if (!silent)
pr_err("open %s(%ld)\n", fpath, PTR_ERR(file));
return file;
}
/* keep file count */
err = 0;
d = file->f_path.dentry;
h_parent = au_dget_parent_lock(d, AuLsc_I_PARENT);
/* mnt_want_write() is unnecessary here */
h_dir = d_inode(h_parent);
inode = file_inode(file);
/* no delegation since it is just created */
if (inode->i_nlink)
err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
/*force*/0);
inode_unlock(h_dir);
dput(h_parent);
if (unlikely(err)) {
if (!silent)
pr_err("unlink %s(%d)\n", fpath, err);
goto out;
}
err = -EINVAL;
if (unlikely(sb == d->d_sb)) {
if (!silent)
pr_err("%s must be outside\n", fpath);
goto out;
}
#if 0 /* re-commit later */
if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
if (!silent)
pr_err("xino doesn't support %s(%s)\n",
fpath, au_sbtype(d->d_sb));
goto out;
}
#endif
return file; /* success */
out:
fput(file);
file = ERR_PTR(err);
return file;
}
/*
* create a new xinofile at the same place/path as @base.
*/
struct file *au_xino_create2(struct super_block *sb, struct path *base,
struct file *copy_src)
{
struct file *file;
struct dentry *dentry, *parent;
struct inode *dir, *delegated;
struct qstr *name;
struct path path;
int err, do_unlock;
struct au_xino_lock_dir ldir;
do_unlock = 1;
au_xino_lock_dir(sb, base, &ldir);
dentry = base->dentry;
parent = dentry->d_parent; /* dir inode is locked */
dir = d_inode(parent);
IMustLock(dir);
name = &dentry->d_name;
path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
if (IS_ERR(path.dentry)) {
file = (void *)path.dentry;
pr_err("%pd lookup err %ld\n", dentry, PTR_ERR(path.dentry));
goto out;
}
/* no need to mnt_want_write() since we call dentry_open() later */
err = vfs_create(dir, path.dentry, 0666, NULL);
if (unlikely(err)) {
file = ERR_PTR(err);
pr_err("%pd create err %d\n", dentry, err);
goto out_dput;
}
path.mnt = base->mnt;
file = vfsub_dentry_open(&path,
O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
/* | __FMODE_NONOTIFY */);
if (IS_ERR(file)) {
pr_err("%pd open err %ld\n", dentry, PTR_ERR(file));
goto out_dput;
}
delegated = NULL;
err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
au_xino_unlock_dir(&ldir);
do_unlock = 0;
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
if (unlikely(err)) {
pr_err("%pd unlink err %d\n", dentry, err);
goto out_fput;
}
if (copy_src) {
/* no one can touch copy_src xino */
//err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
if (unlikely(err)) {
pr_err("%pd copy err %d\n", dentry, err);
goto out_fput;
}
}
goto out_dput; /* success */
out_fput:
fput(file);
file = ERR_PTR(err);
out_dput:
dput(path.dentry);
out:
if (do_unlock)
au_xino_unlock_dir(&ldir);
return file;
}
struct file *au_xino_file1(struct au_xino *xi)
{
struct file *file;
unsigned int u, nfile;
file = NULL;
nfile = xi->xi_nfile;
for (u = 0; u < nfile; u++) {
file = xi->xi_file[u];
if (file)
break;
}
return file;
}
static int au_xino_file_set(struct au_xino *xi, int idx, struct file *file)
{
int err;
struct file *f;
void *p;
if (file)
get_file(file);
err = 0;
f = NULL;
if (idx < xi->xi_nfile) {
f = xi->xi_file[idx];
if (f)
fput(f);
} else {
p = au_kzrealloc(xi->xi_file,
sizeof(*xi->xi_file) * xi->xi_nfile,
sizeof(*xi->xi_file) * (idx + 1),
GFP_NOFS, /*may_shrink*/0);
if (p) {
MtxMustLock(&xi->xi_mtx);
xi->xi_file = p;
xi->xi_nfile = idx + 1;
} else {
err = -ENOMEM;
if (file)
fput(file);
goto out;
}
}
xi->xi_file[idx] = file;
out:
return err;
}
/*
* if @xinew->xi is not set, then create new xigen file.
*/
struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew)
{
struct file *file;
int err;
SiMustAnyLock(sb);
file = au_xino_create2(sb, xinew->base, xinew->copy_src);
if (IS_ERR(file)) {
err = PTR_ERR(file);
pr_err("%s[%d], err %d\n",
xinew->xi ? "xino" : "xigen",
xinew->idx, err);
goto out;
}
if (xinew->xi)
err = au_xino_file_set(xinew->xi, xinew->idx, file);
else {
BUG();
/* todo: make xigen file an array */
/* err = au_xigen_file_set(sb, xinew->idx, file); */
}
fput(file);
if (unlikely(err))
file = ERR_PTR(err);
out:
return file;
}
/* ---------------------------------------------------------------------- */
struct au_xi_calc {
int idx;
loff_t pos;
};
static void au_xi_calc(struct super_block *sb, ino_t h_ino,
struct au_xi_calc *calc)
{
loff_t maxent;
maxent = au_xi_maxent(sb);
calc->idx = div64_u64_rem(h_ino, maxent, &calc->pos);
calc->pos *= sizeof(ino_t);
}
static int au_xino_do_new_async(struct super_block *sb, struct au_branch *br,
struct au_xi_calc *calc)
{
int err;
struct file *file;
struct au_xino *xi = br->br_xino;
struct au_xi_new xinew = {
.xi = xi
};
SiMustAnyLock(sb);
err = 0;
if (!xi)
goto out;
mutex_lock(&xi->xi_mtx);
file = au_xino_file(xi, calc->idx);
if (file)
goto out_mtx;
file = au_xino_file(xi, /*idx*/-1);
AuDebugOn(!file);
xinew.idx = calc->idx;
xinew.base = &file->f_path;
/* xinew.copy_src = NULL; */
file = au_xi_new(sb, &xinew);
if (IS_ERR(file))
err = PTR_ERR(file);
out_mtx:
mutex_unlock(&xi->xi_mtx);
out:
return err;
}
struct au_xino_do_new_async_args {
struct super_block *sb;
struct au_branch *br;
struct au_xi_calc calc;
ino_t ino;
};
struct au_xi_writing {
struct hlist_bl_node node;
ino_t h_ino, ino;
};
static int au_xino_do_write(vfs_writef_t write, struct file *file,
struct au_xi_calc *calc, ino_t ino);
static void au_xino_call_do_new_async(void *args)
{
struct au_xino_do_new_async_args *a = args;
struct au_branch *br;
struct super_block *sb;
struct au_sbinfo *sbi;
struct inode *root;
struct file *file;
struct au_xi_writing *del, *p;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
int err;
br = a->br;
sb = a->sb;
sbi = au_sbi(sb);
si_noflush_read_lock(sb);
root = d_inode(sb->s_root);
ii_read_lock_child(root);
err = au_xino_do_new_async(sb, br, &a->calc);
if (unlikely(err)) {
AuIOErr("err %d\n", err);
goto out;
}
file = au_xino_file(br->br_xino, a->calc.idx);
AuDebugOn(!file);
err = au_xino_do_write(sbi->si_xwrite, file, &a->calc, a->ino);
if (unlikely(err)) {
AuIOErr("err %d\n", err);
goto out;
}
del = NULL;
hbl = &br->br_xino->xi_writing;
hlist_bl_lock(hbl);
au_hbl_for_each(pos, hbl) {
p = container_of(pos, struct au_xi_writing, node);
if (p->ino == a->ino) {
del = p;
hlist_bl_del(&p->node);
break;
}
}
hlist_bl_unlock(hbl);
au_kfree_rcu(del);
out:
au_lcnt_dec(&br->br_count);
ii_read_unlock(root);
si_read_unlock(sb);
au_nwt_done(&sbi->si_nowait);
au_kfree_rcu(a);
}
/*
* create a new xino file asynchronously
*/
static int au_xino_new_async(struct super_block *sb, struct au_branch *br,
struct au_xi_calc *calc, ino_t ino)
{
int err;
struct au_xino_do_new_async_args *arg;
err = -ENOMEM;
arg = kmalloc(sizeof(*arg), GFP_NOFS);
if (unlikely(!arg))
goto out;
arg->sb = sb;
arg->br = br;
arg->calc = *calc;
arg->ino = ino;
au_lcnt_inc(&br->br_count);
err = au_wkq_nowait(au_xino_call_do_new_async, arg, sb, AuWkq_NEST);
if (unlikely(err)) {
pr_err("wkq %d\n", err);
au_lcnt_dec(&br->br_count);
au_kfree_rcu(arg);
}
out:
return err;
}
/*
* read @ino from xinofile for the specified branch{@sb, @bindex}
* at the position of @h_ino.
*/
int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t *ino)
{
int err;
ssize_t sz;
struct au_xi_calc calc;
struct au_sbinfo *sbinfo;
struct file *file;
struct au_xino *xi;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_xi_writing *p;
*ino = 0;
if (!au_opt_test(au_mntflags(sb), XINO))
return 0; /* no xino */
err = 0;
au_xi_calc(sb, h_ino, &calc);
xi = au_sbr(sb, bindex)->br_xino;
file = au_xino_file(xi, calc.idx);
if (!file) {
hbl = &xi->xi_writing;
hlist_bl_lock(hbl);
au_hbl_for_each(pos, hbl) {
p = container_of(pos, struct au_xi_writing, node);
if (p->h_ino == h_ino) {
AuDbg("hi%llu, i%llu, found\n",
(u64)p->h_ino, (u64)p->ino);
*ino = p->ino;
break;
}
}
hlist_bl_unlock(hbl);
return 0;
} else if (vfsub_f_size_read(file) < calc.pos + sizeof(*ino))
return 0; /* no xino */
sbinfo = au_sbi(sb);
sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &calc.pos);
if (sz == sizeof(*ino))
return 0; /* success */
err = sz;
if (unlikely(sz >= 0)) {
err = -EIO;
AuIOErr("xino read error (%zd)\n", sz);
}
return err;
}
static int au_xino_do_write(vfs_writef_t write, struct file *file,
struct au_xi_calc *calc, ino_t ino)
{
ssize_t sz;
sz = xino_fwrite(write, file, &ino, sizeof(ino), &calc->pos);
if (sz == sizeof(ino))
return 0; /* success */
AuIOErr("write failed (%zd)\n", sz);
return -EIO;
}
/*
* write @ino to the xinofile for the specified branch{@sb, @bindex}
* at the position of @h_ino.
* even if @ino is zero, it is written to the xinofile and means no entry.
*/
int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t ino)
{
int err;
unsigned int mnt_flags;
struct au_xi_calc calc;
struct file *file;
struct au_branch *br;
struct au_xino *xi;
struct au_xi_writing *p;
SiMustAnyLock(sb);
mnt_flags = au_mntflags(sb);
if (!au_opt_test(mnt_flags, XINO))
return 0;
au_xi_calc(sb, h_ino, &calc);
br = au_sbr(sb, bindex);
xi = br->br_xino;
file = au_xino_file(xi, calc.idx);
if (!file) {
/* store the inum pair into the list */
p = kmalloc(sizeof(*p), GFP_NOFS | __GFP_NOFAIL);
p->h_ino = h_ino;
p->ino = ino;
au_hbl_add(&p->node, &xi->xi_writing);
/* create and write a new xino file asynchronously */
err = au_xino_new_async(sb, br, &calc, ino);
if (!err)
return 0; /* success */
goto out;
}
err = au_xino_do_write(au_sbi(sb)->si_xwrite, file, &calc, ino);
if (!err)
return 0; /* success */
out:
AuIOErr("write failed (%d)\n", err);
return -EIO;
}
static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
size_t size, loff_t *pos);
/* todo: unnecessary to support mmap_sem since kernel-space? */
ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
loff_t *pos)
{
ssize_t err;
mm_segment_t oldfs;
union {
void *k;
char __user *u;
} buf;
int i;
const int prevent_endless = 10;
i = 0;
buf.k = kbuf;
oldfs = get_fs();
set_fs(KERNEL_DS);
do {
err = func(file, buf.u, size, pos);
if (err == -EINTR
&& !au_wkq_test()
&& fatal_signal_pending(current)) {
set_fs(oldfs);
err = xino_fread_wkq(func, file, kbuf, size, pos);
BUG_ON(err == -EINTR);
oldfs = get_fs();
set_fs(KERNEL_DS);
}
} while (i++ < prevent_endless
&& (err == -EAGAIN || err == -EINTR));
set_fs(oldfs);
#if 0 /* reserved for future use */
if (err > 0)
fsnotify_access(file->f_path.dentry);
#endif
return err;
}
struct xino_fread_args {
ssize_t *errp;
vfs_readf_t func;
struct file *file;
void *buf;
size_t size;
loff_t *pos;
};
static void call_xino_fread(void *args)
{
struct xino_fread_args *a = args;
*a->errp = xino_fread(a->func, a->file, a->buf, a->size, a->pos);
}
static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
size_t size, loff_t *pos)
{
ssize_t err;
int wkq_err;
struct xino_fread_args args = {
.errp = &err,
.func = func,
.file = file,
.buf = buf,
.size = size,
.pos = pos
};
wkq_err = au_wkq_wait(call_xino_fread, &args);
if (unlikely(wkq_err))
err = wkq_err;
return err;
}
static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
size_t size, loff_t *pos);
static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
size_t size, loff_t *pos)
{
ssize_t err;
mm_segment_t oldfs;
union {
void *k;
const char __user *u;
} buf;
int i;
const int prevent_endless = 10;
i = 0;
buf.k = kbuf;
oldfs = get_fs();
set_fs(KERNEL_DS);
do {
err = func(file, buf.u, size, pos);
if (err == -EINTR
&& !au_wkq_test()
&& fatal_signal_pending(current)) {
set_fs(oldfs);
err = xino_fwrite_wkq(func, file, kbuf, size, pos);
BUG_ON(err == -EINTR);
oldfs = get_fs();
set_fs(KERNEL_DS);
}
} while (i++ < prevent_endless
&& (err == -EAGAIN || err == -EINTR));
set_fs(oldfs);
#if 0 /* reserved for future use */
if (err > 0)
fsnotify_modify(file->f_path.dentry);
#endif
return err;
}
struct do_xino_fwrite_args {
ssize_t *errp;
vfs_writef_t func;
struct file *file;
void *buf;
size_t size;
loff_t *pos;
};
static void call_do_xino_fwrite(void *args)
{
struct do_xino_fwrite_args *a = args;
*a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
}
static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
size_t size, loff_t *pos)
{
ssize_t err;
int wkq_err;
struct do_xino_fwrite_args args = {
.errp = &err,
.func = func,
.file = file,
.buf = buf,
.size = size,
.pos = pos
};
/*
* it breaks RLIMIT_FSIZE and normal user's limit,
* users should care about quota and real 'filesystem full.'
*/
wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
if (unlikely(wkq_err))
err = wkq_err;
return err;
}
ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
size_t size, loff_t *pos)
{
ssize_t err;
if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
lockdep_off();
err = do_xino_fwrite(func, file, buf, size, pos);
lockdep_on();
} else {
lockdep_off();
err = xino_fwrite_wkq(func, file, buf, size, pos);
lockdep_on();
}
return err;
}
/* ---------------------------------------------------------------------- */
/*
* inode number bitmap
*/
static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
static ino_t xib_calc_ino(unsigned long pindex, int bit)
{
ino_t ino;
AuDebugOn(bit < 0 || page_bits <= bit);
ino = AUFS_FIRST_INO + pindex * page_bits + bit;
return ino;
}
static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
{
AuDebugOn(ino < AUFS_FIRST_INO);
ino -= AUFS_FIRST_INO;
*pindex = ino / page_bits;
*bit = ino % page_bits;
}
static int xib_pindex(struct super_block *sb, unsigned long pindex)
{
int err;
loff_t pos;
ssize_t sz;
struct au_sbinfo *sbinfo;
struct file *xib;
unsigned long *p;
sbinfo = au_sbi(sb);
MtxMustLock(&sbinfo->si_xib_mtx);
AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
|| !au_opt_test(sbinfo->si_mntflags, XINO));
if (pindex == sbinfo->si_xib_last_pindex)
return 0;
xib = sbinfo->si_xib;
p = sbinfo->si_xib_buf;
pos = sbinfo->si_xib_last_pindex;
pos *= PAGE_SIZE;
sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
if (unlikely(sz != PAGE_SIZE))
goto out;
pos = pindex;
pos *= PAGE_SIZE;
if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
else {
memset(p, 0, PAGE_SIZE);
sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
}
if (sz == PAGE_SIZE) {
sbinfo->si_xib_last_pindex = pindex;
return 0; /* success */
}
out:
AuIOErr1("write failed (%zd)\n", sz);
err = sz;
if (sz >= 0)
err = -EIO;
return err;
}
static void au_xib_clear_bit(struct inode *inode)
{
int err, bit;
unsigned long pindex;
struct super_block *sb;
struct au_sbinfo *sbinfo;
AuDebugOn(inode->i_nlink);
sb = inode->i_sb;
xib_calc_bit(inode->i_ino, &pindex, &bit);
AuDebugOn(page_bits <= bit);
sbinfo = au_sbi(sb);
mutex_lock(&sbinfo->si_xib_mtx);
err = xib_pindex(sb, pindex);
if (!err) {
clear_bit(bit, sbinfo->si_xib_buf);
sbinfo->si_xib_next_bit = bit;
}
mutex_unlock(&sbinfo->si_xib_mtx);
}
/* ---------------------------------------------------------------------- */
struct au_xino *au_xino_alloc(unsigned int nfile)
{
struct au_xino *xi;
xi = kzalloc(sizeof(*xi), GFP_NOFS);
if (unlikely(!xi))
goto out;
xi->xi_nfile = nfile;
xi->xi_file = kcalloc(nfile, sizeof(*xi->xi_file), GFP_NOFS);
if (unlikely(!xi->xi_file))
goto out_free;
xi->xi_nondir.total = 8; /* initial size */
xi->xi_nondir.array = kcalloc(xi->xi_nondir.total, sizeof(ino_t),
GFP_NOFS);
if (unlikely(!xi->xi_nondir.array))
goto out_file;
spin_lock_init(&xi->xi_nondir.spin);
init_waitqueue_head(&xi->xi_nondir.wqh);
mutex_init(&xi->xi_mtx);
INIT_HLIST_BL_HEAD(&xi->xi_writing);
kref_init(&xi->xi_kref);
goto out; /* success */
out_file:
au_kfree_try_rcu(xi->xi_file);
out_free:
au_kfree_rcu(xi);
xi = NULL;
out:
return xi;
}
/* re-commit later */ __maybe_unused
static int au_xino_init(struct au_branch *br, int idx, struct file *file)
{
int err;
struct au_xino *xi;
err = 0;
xi = au_xino_alloc(idx + 1);
if (unlikely(!xi)) {
err = -ENOMEM;
goto out;
}
if (file)
get_file(file);
xi->xi_file[idx] = file;
AuDebugOn(br->br_xino);
br->br_xino = xi;
out:
return err;
}
static void au_xino_release(struct kref *kref)
{
struct au_xino *xi;
int i;
unsigned long ul;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos, *n;
struct au_xi_writing *p;
xi = container_of(kref, struct au_xino, xi_kref);
for (i = 0; i < xi->xi_nfile; i++)
if (xi->xi_file[i])
fput(xi->xi_file[i]);
for (i = xi->xi_nondir.total - 1; i >= 0; i--)
AuDebugOn(xi->xi_nondir.array[i]);
mutex_destroy(&xi->xi_mtx);
hbl = &xi->xi_writing;
ul = au_hbl_count(hbl);
if (unlikely(ul)) {
pr_warn("xi_writing %lu\n", ul);
hlist_bl_lock(hbl);
hlist_bl_for_each_entry_safe (p, pos, n, hbl, node) {
hlist_bl_del(&p->node);
au_kfree_rcu(p);
}
hlist_bl_unlock(hbl);
}
au_kfree_try_rcu(xi->xi_file);
au_kfree_try_rcu(xi->xi_nondir.array);
au_kfree_rcu(xi);
}
int au_xino_put(struct au_branch *br)
{
int ret;
struct au_xino *xi;
ret = 0;
xi = br->br_xino;
if (xi) {
br->br_xino = NULL;
ret = kref_put(&xi->xi_kref, au_xino_release);
}
return ret;
}
/* ---------------------------------------------------------------------- */
/*
* get an unused inode number from bitmap
*/
ino_t au_xino_new_ino(struct super_block *sb)
{
ino_t ino;
unsigned long *p, pindex, ul, pend;
struct au_sbinfo *sbinfo;
struct file *file;
int free_bit, err;
if (!au_opt_test(au_mntflags(sb), XINO))
return iunique(sb, AUFS_FIRST_INO);
sbinfo = au_sbi(sb);
mutex_lock(&sbinfo->si_xib_mtx);
p = sbinfo->si_xib_buf;
free_bit = sbinfo->si_xib_next_bit;
if (free_bit < page_bits && !test_bit(free_bit, p))
goto out; /* success */
free_bit = find_first_zero_bit(p, page_bits);
if (free_bit < page_bits)
goto out; /* success */
pindex = sbinfo->si_xib_last_pindex;
for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
err = xib_pindex(sb, ul);
if (unlikely(err))
goto out_err;
free_bit = find_first_zero_bit(p, page_bits);
if (free_bit < page_bits)
goto out; /* success */
}
file = sbinfo->si_xib;
pend = vfsub_f_size_read(file) / PAGE_SIZE;
for (ul = pindex + 1; ul <= pend; ul++) {
err = xib_pindex(sb, ul);
if (unlikely(err))
goto out_err;
free_bit = find_first_zero_bit(p, page_bits);
if (free_bit < page_bits)
goto out; /* success */
}
BUG();
out:
set_bit(free_bit, p);
sbinfo->si_xib_next_bit = free_bit + 1;
pindex = sbinfo->si_xib_last_pindex;
mutex_unlock(&sbinfo->si_xib_mtx);
ino = xib_calc_ino(pindex, free_bit);
AuDbg("i%lu\n", (unsigned long)ino);
return ino;
out_err:
mutex_unlock(&sbinfo->si_xib_mtx);
AuDbg("i0\n");
return 0;
}
/* for s_op->delete_inode() */
void au_xino_delete_inode(struct inode *inode, const int unlinked)
{
int err;
unsigned int mnt_flags;
aufs_bindex_t bindex, bbot, bi;
struct au_iinfo *iinfo;
struct super_block *sb;
struct au_hinode *hi;
struct inode *h_inode;
struct au_branch *br;
vfs_writef_t xwrite;
struct au_xi_calc calc;
struct file *file;
AuDebugOn(au_is_bad_inode(inode));
sb = inode->i_sb;
mnt_flags = au_mntflags(sb);
if (!au_opt_test(mnt_flags, XINO)
|| inode->i_ino == AUFS_ROOT_INO)
return;
if (unlinked)
au_xib_clear_bit(inode);
iinfo = au_ii(inode);
bindex = iinfo->ii_btop;
if (bindex < 0)
return;
xwrite = au_sbi(sb)->si_xwrite;
hi = au_hinode(iinfo, bindex);
bbot = iinfo->ii_bbot;
for (; bindex <= bbot; bindex++, hi++) {
h_inode = hi->hi_inode;
if (!h_inode
|| (!unlinked && h_inode->i_nlink))
continue;
/* inode may not be revalidated */
bi = au_br_index(sb, hi->hi_id);
if (bi < 0)
continue;
br = au_sbr(sb, bi);
au_xi_calc(sb, h_inode->i_ino, &calc);
file = au_xino_file(br->br_xino, calc.idx);
if (IS_ERR_OR_NULL(file))
continue;
err = au_xino_do_write(xwrite, file, &calc, /*ino*/0);
}
}
......@@ -58,6 +58,9 @@ typedef int16_t aufs_bindex_t;
#define AUFS_FSTYPE AUFS_NAME
#define AUFS_ROOT_INO 2
#define AUFS_FIRST_INO 11
#define AUFS_WKQ_NAME AUFS_NAME "d"
/* branch permissions and attributes */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment