Commit a3387b95 authored by J. R. Okajima's avatar J. R. Okajima
Browse files

aufs: file op



Implement several f_op functions for non-dir.
Signed-off-by: default avatarJ. R. Okajima <hooanon05g@gmail.com>
parent 8d319094
......@@ -7,8 +7,10 @@
* file and vm operations
*/
#include <linux/aio.h>
#include <linux/fs_stack.h>
#include <linux/mman.h>
#include <linux/security.h>
#include "aufs.h"
int au_do_open_nondir(struct file *file, int flags)
......@@ -84,6 +86,58 @@ int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
/* ---------------------------------------------------------------------- */
static int au_do_flush_nondir(struct file *file, fl_owner_t id)
{
int err;
struct file *h_file;
err = 0;
h_file = au_hf_top(file);
if (h_file)
err = vfsub_flush(h_file, id);
return err;
}
static int aufs_flush_nondir(struct file *file, fl_owner_t id)
{
return au_do_flush(file, id, au_do_flush_nondir);
}
/* ---------------------------------------------------------------------- */
/*
* read and write functions acquire [fdi]_rwsem once, but release before
* mmap_sem. This is because to stop a race condition between mmap(2).
* Releasing these aufs-rwsem should be safe, no branch-management (by keeping
* si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
* read functions after [fdi]_rwsem are released, but it should be harmless.
*/
/* Callers should call au_read_post() or fput() in the end */
struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc)
{
struct file *h_file;
int err;
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc);
if (!err) {
di_read_unlock(file->f_path.dentry, AuLock_IR);
h_file = au_hf_top(file);
get_file(h_file);
if (!keep_fi)
fi_read_unlock(file);
} else
h_file = ERR_PTR(err);
return h_file;
}
static void au_read_post(struct inode *inode, struct file *h_file)
{
/* update without lock, I don't think it a problem */
fsstack_copy_attr_atime(inode, file_inode(h_file));
fput(h_file);
}
struct au_write_pre {
/* input */
unsigned int lsc;
......@@ -142,6 +196,350 @@ out:
return h_file;
}
static void au_write_post(struct inode *inode, struct file *h_file,
struct au_write_pre *wpre)
{
struct inode *h_inode;
au_cpup_attr_timesizes(inode);
AuDebugOn(au_ibtop(inode) != wpre->btop);
h_inode = file_inode(h_file);
inode->i_mode = h_inode->i_mode;
ii_write_unlock(inode);
fput(h_file);
}
static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
ssize_t err;
struct inode *inode;
struct file *h_file;
struct super_block *sb;
inode = file_inode(file);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
/* filedata may be obsoleted by concurrent copyup, but no problem */
err = vfsub_read_u(h_file, buf, count, ppos);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
au_read_post(inode, h_file);
out:
si_read_unlock(sb);
return err;
}
/*
* todo: very ugly
* it locks both of i_mutex and si_rwsem for read in safe.
* if the plink maintenance mode continues forever (that is the problem),
* may loop forever.
*/
static void au_mtx_and_read_lock(struct inode *inode)
{
int err;
struct super_block *sb = inode->i_sb;
while (1) {
inode_lock(inode);
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (!err)
break;
inode_unlock(inode);
si_read_lock(sb, AuLock_NOPLMW);
si_read_unlock(sb);
}
}
static ssize_t aufs_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
ssize_t err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
char __user *buf = (char __user *)ubuf;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = vfsub_write_u(h_file, buf, count, ppos);
au_write_post(inode, h_file, &wpre);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
struct iov_iter *iov_iter)
{
ssize_t err;
struct file *file;
ssize_t (*iter)(struct kiocb *, struct iov_iter *);
err = security_file_permission(h_file, rw);
if (unlikely(err))
goto out;
err = -ENOSYS;
iter = NULL;
if (rw == MAY_READ)
iter = h_file->f_op->read_iter;
else if (rw == MAY_WRITE)
iter = h_file->f_op->write_iter;
file = kio->ki_filp;
kio->ki_filp = h_file;
if (iter) {
lockdep_off();
err = iter(kio, iov_iter);
lockdep_on();
} else
/* currently there is no such fs */
WARN_ON_ONCE(1);
kio->ki_filp = file;
out:
return err;
}
static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
{
ssize_t err;
struct file *file, *h_file;
struct inode *inode;
struct super_block *sb;
file = kio->ki_filp;
inode = file_inode(file);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
fi_read_unlock(file);
err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
au_read_post(inode, h_file);
out:
si_read_unlock(sb);
return err;
}
static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
{
ssize_t err;
struct au_write_pre wpre;
struct inode *inode;
struct file *file, *h_file;
file = kio->ki_filp;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
au_write_post(inode, h_file, &wpre);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
ssize_t err;
struct file *h_file;
struct inode *inode;
struct super_block *sb;
inode = file_inode(file);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
au_read_post(inode, h_file);
out:
si_read_unlock(sb);
return err;
}
static ssize_t
aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
size_t len, unsigned int flags)
{
ssize_t err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
au_write_post(inode, h_file, &wpre);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static long aufs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
long err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
lockdep_off();
err = vfs_fallocate(h_file, mode, offset, len);
lockdep_on();
au_write_post(inode, h_file, &wpre);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos,
struct file *dst, loff_t dst_pos,
size_t len, unsigned int flags)
{
ssize_t err;
struct au_write_pre wpre;
enum { SRC, DST };
struct {
struct inode *inode;
struct file *h_file;
struct super_block *h_sb;
} a[2];
#define a_src a[SRC]
#define a_dst a[DST]
err = -EINVAL;
a_src.inode = file_inode(src);
if (unlikely(!S_ISREG(a_src.inode->i_mode)))
goto out;
a_dst.inode = file_inode(dst);
if (unlikely(!S_ISREG(a_dst.inode->i_mode)))
goto out;
au_mtx_and_read_lock(a_dst.inode);
/*
* in order to match the order in di_write_lock2_{child,parent}(),
* use f_path.dentry for this comparison.
*/
if (src->f_path.dentry < dst->f_path.dentry) {
a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1);
err = PTR_ERR(a_src.h_file);
if (IS_ERR(a_src.h_file))
goto out_si;
wpre.lsc = AuLsc_FI_2;
a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
err = PTR_ERR(a_dst.h_file);
if (IS_ERR(a_dst.h_file)) {
au_read_post(a_src.inode, a_src.h_file);
goto out_si;
}
} else {
wpre.lsc = AuLsc_FI_1;
a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
err = PTR_ERR(a_dst.h_file);
if (IS_ERR(a_dst.h_file))
goto out_si;
a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2);
err = PTR_ERR(a_src.h_file);
if (IS_ERR(a_src.h_file)) {
au_write_post(a_dst.inode, a_dst.h_file, &wpre);
goto out_si;
}
}
err = -EXDEV;
a_src.h_sb = file_inode(a_src.h_file)->i_sb;
a_dst.h_sb = file_inode(a_dst.h_file)->i_sb;
if (unlikely(a_src.h_sb != a_dst.h_sb)) {
AuDbgFile(src);
AuDbgFile(dst);
goto out_file;
}
err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file,
dst_pos, len, flags);
out_file:
au_write_post(a_dst.inode, a_dst.h_file, &wpre);
fi_read_unlock(src);
au_read_post(a_src.inode, a_src.h_file);
out_si:
si_read_unlock(a_dst.inode->i_sb);
inode_unlock(a_dst.inode);
out:
return err;
#undef a_src
#undef a_dst
}
/* ---------------------------------------------------------------------- */
/*
* The locking order around current->mmap_sem.
* - in most and regular cases
......@@ -254,10 +652,120 @@ out:
/* ---------------------------------------------------------------------- */
static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
int datasync)
{
int err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
err = 0; /* -EBADF; */ /* posix? */
if (unlikely(!(file->f_mode & FMODE_WRITE)))
goto out;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out_unlock;
err = vfsub_fsync(h_file, &h_file->f_path, datasync);
au_write_post(inode, h_file, &wpre);
out_unlock:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
out:
return err;
}
static int aufs_fasync(int fd, struct file *file, int flag)
{
int err;
struct file *h_file;
struct super_block *sb;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
if (h_file->f_op->fasync)
err = h_file->f_op->fasync(fd, h_file, flag);
fput(h_file); /* instead of au_read_post() */
out:
si_read_unlock(sb);
return err;
}
static int aufs_setfl(struct file *file, unsigned long arg)
{
int err;
struct file *h_file;
struct super_block *sb;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
/* stop calling h_file->fasync */
arg |= vfsub_file_flags(file) & FASYNC;
err = setfl(/*unused fd*/-1, h_file, arg);
fput(h_file); /* instead of au_read_post() */
out:
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
/* no one supports this operation, currently */
#if 0
static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
size_t len, loff_t *pos, int more)
{
}
#endif
/* ---------------------------------------------------------------------- */
const struct file_operations aufs_file_fop = {
.owner = THIS_MODULE,
.llseek = default_llseek,
.read = aufs_read,
.write = aufs_write,
.read_iter = aufs_read_iter,
.write_iter = aufs_write_iter,
.mmap = aufs_mmap,
.open = aufs_open_nondir,
.release = aufs_release_nondir
.flush = aufs_flush_nondir,
.release = aufs_release_nondir,
.fsync = aufs_fsync_nondir,
.fasync = aufs_fasync,
/* .sendpage = aufs_sendpage, */
.setfl = aufs_setfl,
.splice_write = aufs_splice_write,
.splice_read = aufs_splice_read,
#if 0
.aio_splice_write = aufs_aio_splice_write,
.aio_splice_read = aufs_aio_splice_read,
#endif
.fallocate = aufs_fallocate,
.copy_file_range = aufs_copy_file_range
};
......@@ -320,6 +320,30 @@ out:
/* ---------------------------------------------------------------------- */
int au_do_flush(struct file *file, fl_owner_t id,
int (*flush)(struct file *file, fl_owner_t id))
{
int err;
struct super_block *sb;
struct inode *inode;
inode = file_inode(file);
sb = inode->i_sb;
si_noflush_read_lock(sb);
fi_read_lock(file);
ii_read_lock_child(inode);
err = flush(file, id);
au_cpup_attr_timesizes(inode);
ii_read_unlock(inode);
fi_read_unlock(file);
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
{
int err;
......
......@@ -70,11 +70,14 @@ struct au_pin;
int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
int wlock, unsigned int fi_lsc);
int au_do_flush(struct file *file, fl_owner_t id,
int (*flush)(struct file *file, fl_owner_t id));
/* f_op.c */
extern const struct file_operations aufs_file_fop;
int au_do_open_nondir(struct file *file, int flags);
int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc);
/* finfo.c */
void au_hfput(struct au_hfile *hf, int execed);
......
......@@ -748,7 +748,6 @@ static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
goto out_kfree;
if (ia->ia_valid & ATTR_FILE) {
#if 0 /* re-commit later */
/* currently ftruncate(2) only */
AuDebugOn(!d_is_reg(dentry));
file = ia->ia_file;
......@@ -758,10 +757,6 @@ static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
goto out_si;
ia->ia_file = au_hf_top(file);
a->udba = AuOpt_UDBA_NONE;
#else
err = -ENOSYS;
goto out_si;
#endif
} else {
/* fchmod() doesn't pass ia_file */
a->udba = au_opt_udba(sb);
......@@ -799,7 +794,6 @@ static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
}
if (ia->ia_valid & ATTR_SIZE) {
#if 0 /* re-commit later */
struct file *f;
if (ia->ia_size < i_size_read(inode))
......@@ -812,9 +806,6 @@ static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
inode_unlock(a->h_inode);
err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
#else
err = -ENOSYS;
#endif
} else {
delegated = NULL;
while (1) {
......
......@@ -9,6 +9,7 @@
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/splice.h>
#include "aufs.h"
int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
......@@ -367,6 +368,23 @@ ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
return err;
}
int vfsub_flush(struct file *file, fl_owner_t id)
{
int err;
err = 0;
if (file->f_op->flush) {
if (!au_test_nfs(file->f_path.dentry->d_sb))
err = file->f_op->flush(file, id);
else {
lockdep_off();
err = file->f_op->flush(file, id);
lockdep_on();
}
}
return err;
}
int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
{
int err;
......@@ -380,6 +398,75 @@ int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
return err;
}
long vfsub_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
long err;
lockdep_off();
err = do_splice_to(in, ppos, pipe, len, flags);
lockdep_on();
file_accessed(in);
return err;
}
long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
long err;
lockdep_off();
err = do_splice_from(pipe, out, ppos, len, flags);
lockdep_on();
return err;
}
int vfsub_fsync(struct file *file, struct path *path, int datasync)
{
int err;
/* file can be NULL */
lockdep_off();
err = vfs_fsync(file, datasync);
lockdep_on();
return err;
}
/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
struct file *h_file)
{
int err;
struct inode *h_inode;
struct super_block *h_sb;
if (!h_file) {
err = vfsub_truncate(h_path, length);
goto out;
}
h_inode = d_inode(h_path->dentry);
h_sb = h_inode->i_sb;
lockdep_off();
sb_start_write(h_sb);
lockdep_on();
err = locks_verify_truncate(h_inode, h_file, length);
if (!err)
err = security_path_truncate(h_path);
if (!err) {
lockdep_off();
err = do_truncate(h_path->dentry, length, attr, h_file);
lockdep_on();
}
lockdep_off();
sb_end_write(h_sb);
lockdep_on();
out:
return err;
}
/* ---------------------------------------------------------------------- */
struct au_vfsub_mkdir_args {
......
......@@ -98,6 +98,15 @@ static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
lockdep_on();
}
#if 0 /* reserved */
static inline void vfsub_mnt_drop_write_file(struct file *file)
{
lockdep_off();
mnt_drop_write_file(file);
lockdep_on();
}
#endif
/* ---------------------------------------------------------------------- */
struct au_hinode;
......@@ -129,6 +138,7 @@ ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
loff_t *ppos);
ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
loff_t *ppos);
int vfsub_flush(struct file *file, fl_owner_t id);
int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
static inline loff_t vfsub_f_size_read(struct file *file)
......@@ -153,6 +163,14 @@ static inline int vfsub_file_execed(struct file *file)
return !!(vfsub_file_flags(file) & __FMODE_EXEC);
}
#if 0 /* reserved */
static inline void vfsub_file_accessed(struct file *h_file)
{
file_accessed(h_file);
vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
}
#endif
#if 0 /* reserved */
static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
struct dentry *h_dentry)
......@@ -186,6 +204,26 @@ static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
#endif
long vfsub_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags);
static inline long vfsub_truncate(struct path *path, loff_t length)
{
long err;
lockdep_off();
err = vfs_truncate(path, length);
lockdep_on();
return err;
}
int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
struct file *h_file);
int vfsub_fsync(struct file *file, struct path *path, int datasync);
/*
* re-use branch fs's ioctl(FICLONE) while aufs itself doesn't support such
* ioctl.
......@@ -202,6 +240,20 @@ static inline loff_t vfsub_clone_file_range(struct file *src, struct file *dst,
return err;
}
/* copy_file_range(2) is a systemcall */
static inline ssize_t vfsub_copy_file_range(struct file *src, loff_t src_pos,
struct file *dst, loff_t dst_pos,
size_t len, unsigned int flags)
{
ssize_t ssz;
lockdep_off();
ssz = vfs_copy_file_range(src, src_pos, dst, dst_pos, len, flags);
lockdep_on();
return ssz;
}
/* ---------------------------------------------------------------------- */
static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment