// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2005-2020 Junjiro R. Okajima */ /* * policies for selecting one among multiple writable branches */ #include #include "aufs.h" /* subset of cpup_attr() */ static noinline_for_stack int au_cpdown_attr(struct path *h_path, struct dentry *h_src) { int err, sbits; struct iattr ia; struct inode *h_isrc; h_isrc = d_inode(h_src); ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID; ia.ia_mode = h_isrc->i_mode; ia.ia_uid = h_isrc->i_uid; ia.ia_gid = h_isrc->i_gid; sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID)); au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags); /* no delegation since it is just created */ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL); /* is this nfs only? */ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) { ia.ia_valid = ATTR_FORCE | ATTR_MODE; ia.ia_mode = h_isrc->i_mode; err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL); } return err; } #define AuCpdown_PARENT_OPQ 1 #define AuCpdown_WHED (1 << 1) #define AuCpdown_MADE_DIR (1 << 2) #define AuCpdown_DIROPQ (1 << 3) #define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name) #define au_fset_cpdown(flags, name) \ do { (flags) |= AuCpdown_##name; } while (0) #define au_fclr_cpdown(flags, name) \ do { (flags) &= ~AuCpdown_##name; } while (0) static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst, unsigned int *flags) { int err; struct dentry *opq_dentry; opq_dentry = au_diropq_create(dentry, bdst); err = PTR_ERR(opq_dentry); if (IS_ERR(opq_dentry)) goto out; dput(opq_dentry); au_fset_cpdown(*flags, DIROPQ); out: return err; } static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent, struct inode *dir, aufs_bindex_t bdst) { int err; struct path h_path; struct au_branch *br; br = au_sbr(dentry->d_sb, bdst); h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br); err = PTR_ERR(h_path.dentry); if (IS_ERR(h_path.dentry)) goto out; err = 0; if (d_is_positive(h_path.dentry)) { h_path.mnt = au_br_mnt(br); err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path, dentry); } dput(h_path.dentry); out: return err; } static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst, struct au_pin *pin, struct dentry *h_parent, void *arg) { int err, rerr; aufs_bindex_t bopq, btop; struct path h_path; struct dentry *parent; struct inode *h_dir, *h_inode, *inode, *dir; unsigned int *flags = arg; btop = au_dbtop(dentry); /* dentry is di-locked */ parent = dget_parent(dentry); dir = d_inode(parent); h_dir = d_inode(h_parent); AuDebugOn(h_dir != au_h_iptr(dir, bdst)); IMustLock(h_dir); err = au_lkup_neg(dentry, bdst, /*wh*/0); if (unlikely(err < 0)) goto out; h_path.dentry = au_h_dptr(dentry, bdst); h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst); err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path, 0755); if (unlikely(err)) goto out_put; au_fset_cpdown(*flags, MADE_DIR); bopq = au_dbdiropq(dentry); au_fclr_cpdown(*flags, WHED); au_fclr_cpdown(*flags, DIROPQ); if (au_dbwh(dentry) == bdst) au_fset_cpdown(*flags, WHED); if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst) au_fset_cpdown(*flags, PARENT_OPQ); h_inode = d_inode(h_path.dentry); inode_lock_nested(h_inode, AuLsc_I_CHILD); if (au_ftest_cpdown(*flags, WHED)) { err = au_cpdown_dir_opq(dentry, bdst, flags); if (unlikely(err)) { inode_unlock(h_inode); goto out_dir; } } err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop)); inode_unlock(h_inode); if (unlikely(err)) goto out_opq; if (au_ftest_cpdown(*flags, WHED)) { err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst); if (unlikely(err)) goto out_opq; } inode = d_inode(dentry); if (au_ibbot(inode) < bdst) au_set_ibbot(inode, bdst); au_set_h_iptr(inode, bdst, au_igrab(h_inode), au_hi_flags(inode, /*isdir*/1)); au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0); goto out; /* success */ /* revert */ out_opq: if (au_ftest_cpdown(*flags, DIROPQ)) { inode_lock_nested(h_inode, AuLsc_I_CHILD); rerr = au_diropq_remove(dentry, bdst); inode_unlock(h_inode); if (unlikely(rerr)) { AuIOErr("failed removing diropq for %pd b%d (%d)\n", dentry, bdst, rerr); err = -EIO; goto out; } } out_dir: if (au_ftest_cpdown(*flags, MADE_DIR)) { rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path); if (unlikely(rerr)) { AuIOErr("failed removing %pd b%d (%d)\n", dentry, bdst, rerr); err = -EIO; } } out_put: au_set_h_dptr(dentry, bdst, NULL); if (au_dbbot(dentry) == bdst) au_update_dbbot(dentry); out: dput(parent); return err; } int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst) { int err; unsigned int flags; flags = 0; err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags); return err; } /* ---------------------------------------------------------------------- */ /* policies for create */ int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex) { int err, i, j, ndentry; aufs_bindex_t bopq; struct au_dcsub_pages dpages; struct au_dpage *dpage; struct dentry **dentries, *parent, *d; err = au_dpages_init(&dpages, GFP_NOFS); if (unlikely(err)) goto out; parent = dget_parent(dentry); err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0); if (unlikely(err)) goto out_free; err = bindex; for (i = 0; i < dpages.ndpage; i++) { dpage = dpages.dpages + i; dentries = dpage->dentries; ndentry = dpage->ndentry; for (j = 0; j < ndentry; j++) { d = dentries[j]; di_read_lock_parent2(d, !AuLock_IR); bopq = au_dbdiropq(d); di_read_unlock(d, !AuLock_IR); if (bopq >= 0 && bopq < err) err = bopq; } } out_free: dput(parent); au_dpages_free(&dpages); out: return err; } static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex) { for (; bindex >= 0; bindex--) if (!au_br_rdonly(au_sbr(sb, bindex))) return bindex; return -EROFS; } /* top down parent */ static int au_wbr_create_tdp(struct dentry *dentry, unsigned int flags __maybe_unused) { int err; aufs_bindex_t btop, bindex; struct super_block *sb; struct dentry *parent, *h_parent; sb = dentry->d_sb; btop = au_dbtop(dentry); err = btop; if (!au_br_rdonly(au_sbr(sb, btop))) goto out; err = -EROFS; parent = dget_parent(dentry); for (bindex = au_dbtop(parent); bindex < btop; bindex++) { h_parent = au_h_dptr(parent, bindex); if (!h_parent || d_is_negative(h_parent)) continue; if (!au_br_rdonly(au_sbr(sb, bindex))) { err = bindex; break; } } dput(parent); /* bottom up here */ if (unlikely(err < 0)) { err = au_wbr_bu(sb, btop - 1); if (err >= 0) err = au_wbr_nonopq(dentry, err); } out: AuDbg("b%d\n", err); return err; } /* ---------------------------------------------------------------------- */ /* an exception for the policy other than tdp */ static int au_wbr_create_exp(struct dentry *dentry) { int err; aufs_bindex_t bwh, bdiropq; struct dentry *parent; err = -1; bwh = au_dbwh(dentry); parent = dget_parent(dentry); bdiropq = au_dbdiropq(parent); if (bwh >= 0) { if (bdiropq >= 0) err = min(bdiropq, bwh); else err = bwh; AuDbg("%d\n", err); } else if (bdiropq >= 0) { err = bdiropq; AuDbg("%d\n", err); } dput(parent); if (err >= 0) err = au_wbr_nonopq(dentry, err); if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err))) err = -1; AuDbg("%d\n", err); return err; } /* ---------------------------------------------------------------------- */ /* round robin */ static int au_wbr_create_init_rr(struct super_block *sb) { int err; err = au_wbr_bu(sb, au_sbbot(sb)); atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */ /* smp_mb(); */ AuDbg("b%d\n", err); return err; } static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags) { int err, nbr; unsigned int u; aufs_bindex_t bindex, bbot; struct super_block *sb; atomic_t *next; err = au_wbr_create_exp(dentry); if (err >= 0) goto out; sb = dentry->d_sb; next = &au_sbi(sb)->si_wbr_rr_next; bbot = au_sbbot(sb); nbr = bbot + 1; for (bindex = 0; bindex <= bbot; bindex++) { if (!au_ftest_wbr(flags, DIR)) { err = atomic_dec_return(next) + 1; /* modulo for 0 is meaningless */ if (unlikely(!err)) err = atomic_dec_return(next) + 1; } else err = atomic_read(next); AuDbg("%d\n", err); u = err; err = u % nbr; AuDbg("%d\n", err); if (!au_br_rdonly(au_sbr(sb, err))) break; err = -EROFS; } if (err >= 0) err = au_wbr_nonopq(dentry, err); out: AuDbg("%d\n", err); return err; } /* ---------------------------------------------------------------------- */ /* most free space */ static void au_mfs(struct dentry *dentry, struct dentry *parent) { struct super_block *sb; struct au_branch *br; struct au_wbr_mfs *mfs; struct dentry *h_parent; aufs_bindex_t bindex, bbot; int err; unsigned long long b, bavail; struct path h_path; /* reduce the stack usage */ struct kstatfs *st; st = kmalloc(sizeof(*st), GFP_NOFS); if (unlikely(!st)) { AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM); return; } bavail = 0; sb = dentry->d_sb; mfs = &au_sbi(sb)->si_wbr_mfs; MtxMustLock(&mfs->mfs_lock); mfs->mfs_bindex = -EROFS; mfs->mfsrr_bytes = 0; if (!parent) { bindex = 0; bbot = au_sbbot(sb); } else { bindex = au_dbtop(parent); bbot = au_dbtaildir(parent); } for (; bindex <= bbot; bindex++) { if (parent) { h_parent = au_h_dptr(parent, bindex); if (!h_parent || d_is_negative(h_parent)) continue; } br = au_sbr(sb, bindex); if (au_br_rdonly(br)) continue; /* sb->s_root for NFS is unreliable */ h_path.mnt = au_br_mnt(br); h_path.dentry = h_path.mnt->mnt_root; err = vfs_statfs(&h_path, st); if (unlikely(err)) { AuWarn1("failed statfs, b%d, %d\n", bindex, err); continue; } /* when the available size is equal, select the lower one */ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail) || sizeof(b) < sizeof(st->f_bsize)); b = st->f_bavail * st->f_bsize; br->br_wbr->wbr_bytes = b; if (b >= bavail) { bavail = b; mfs->mfs_bindex = bindex; mfs->mfs_jiffy = jiffies; } } mfs->mfsrr_bytes = bavail; AuDbg("b%d\n", mfs->mfs_bindex); au_kfree_rcu(st); } static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags) { int err; struct dentry *parent; struct super_block *sb; struct au_wbr_mfs *mfs; err = au_wbr_create_exp(dentry); if (err >= 0) goto out; sb = dentry->d_sb; parent = NULL; if (au_ftest_wbr(flags, PARENT)) parent = dget_parent(dentry); mfs = &au_sbi(sb)->si_wbr_mfs; mutex_lock(&mfs->mfs_lock); if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire) || mfs->mfs_bindex < 0 || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex))) au_mfs(dentry, parent); mutex_unlock(&mfs->mfs_lock); err = mfs->mfs_bindex; dput(parent); if (err >= 0) err = au_wbr_nonopq(dentry, err); out: AuDbg("b%d\n", err); return err; } static int au_wbr_create_init_mfs(struct super_block *sb) { struct au_wbr_mfs *mfs; mfs = &au_sbi(sb)->si_wbr_mfs; mutex_init(&mfs->mfs_lock); mfs->mfs_jiffy = 0; mfs->mfs_bindex = -EROFS; return 0; } static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused) { mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock); return 0; } /* ---------------------------------------------------------------------- */ /* top down regardless parent, and then mfs */ static int au_wbr_create_tdmfs(struct dentry *dentry, unsigned int flags __maybe_unused) { int err; aufs_bindex_t bwh, btail, bindex, bfound, bmfs; unsigned long long watermark; struct super_block *sb; struct au_wbr_mfs *mfs; struct au_branch *br; struct dentry *parent; sb = dentry->d_sb; mfs = &au_sbi(sb)->si_wbr_mfs; mutex_lock(&mfs->mfs_lock); if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire) || mfs->mfs_bindex < 0) au_mfs(dentry, /*parent*/NULL); watermark = mfs->mfsrr_watermark; bmfs = mfs->mfs_bindex; mutex_unlock(&mfs->mfs_lock); /* another style of au_wbr_create_exp() */ bwh = au_dbwh(dentry); parent = dget_parent(dentry); btail = au_dbtaildir(parent); if (bwh >= 0 && bwh < btail) btail = bwh; err = au_wbr_nonopq(dentry, btail); if (unlikely(err < 0)) goto out; btail = err; bfound = -1; for (bindex = 0; bindex <= btail; bindex++) { br = au_sbr(sb, bindex); if (au_br_rdonly(br)) continue; if (br->br_wbr->wbr_bytes > watermark) { bfound = bindex; break; } } err = bfound; if (err < 0) err = bmfs; out: dput(parent); AuDbg("b%d\n", err); return err; } /* ---------------------------------------------------------------------- */ /* most free space and then round robin */ static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags) { int err; struct au_wbr_mfs *mfs; err = au_wbr_create_mfs(dentry, flags); if (err >= 0) { mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs; mutex_lock(&mfs->mfs_lock); if (mfs->mfsrr_bytes < mfs->mfsrr_watermark) err = au_wbr_create_rr(dentry, flags); mutex_unlock(&mfs->mfs_lock); } AuDbg("b%d\n", err); return err; } static int au_wbr_create_init_mfsrr(struct super_block *sb) { int err; au_wbr_create_init_mfs(sb); /* ignore */ err = au_wbr_create_init_rr(sb); return err; } /* ---------------------------------------------------------------------- */ /* top down parent and most free space */ static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags) { int err, e2; unsigned long long b; aufs_bindex_t bindex, btop, bbot; struct super_block *sb; struct dentry *parent, *h_parent; struct au_branch *br; err = au_wbr_create_tdp(dentry, flags); if (unlikely(err < 0)) goto out; parent = dget_parent(dentry); btop = au_dbtop(parent); bbot = au_dbtaildir(parent); if (btop == bbot) goto out_parent; /* success */ e2 = au_wbr_create_mfs(dentry, flags); if (e2 < 0) goto out_parent; /* success */ /* when the available size is equal, select upper one */ sb = dentry->d_sb; br = au_sbr(sb, err); b = br->br_wbr->wbr_bytes; AuDbg("b%d, %llu\n", err, b); for (bindex = btop; bindex <= bbot; bindex++) { h_parent = au_h_dptr(parent, bindex); if (!h_parent || d_is_negative(h_parent)) continue; br = au_sbr(sb, bindex); if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) { b = br->br_wbr->wbr_bytes; err = bindex; AuDbg("b%d, %llu\n", err, b); } } if (err >= 0) err = au_wbr_nonopq(dentry, err); out_parent: dput(parent); out: AuDbg("b%d\n", err); return err; } /* ---------------------------------------------------------------------- */ /* * - top down parent * - most free space with parent * - most free space round-robin regardless parent */ static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags) { int err; unsigned long long watermark; struct super_block *sb; struct au_branch *br; struct au_wbr_mfs *mfs; err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT); if (unlikely(err < 0)) goto out; sb = dentry->d_sb; br = au_sbr(sb, err); mfs = &au_sbi(sb)->si_wbr_mfs; mutex_lock(&mfs->mfs_lock); watermark = mfs->mfsrr_watermark; mutex_unlock(&mfs->mfs_lock); if (br->br_wbr->wbr_bytes < watermark) /* regardless the parent dir */ err = au_wbr_create_mfsrr(dentry, flags); out: AuDbg("b%d\n", err); return err; } /* ---------------------------------------------------------------------- */ /* policies for copyup */ /* top down parent */ static int au_wbr_copyup_tdp(struct dentry *dentry) { return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0); } /* bottom up parent */ static int au_wbr_copyup_bup(struct dentry *dentry) { int err; aufs_bindex_t bindex, btop; struct dentry *parent, *h_parent; struct super_block *sb; err = -EROFS; sb = dentry->d_sb; parent = dget_parent(dentry); btop = au_dbtop(parent); for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) { h_parent = au_h_dptr(parent, bindex); if (!h_parent || d_is_negative(h_parent)) continue; if (!au_br_rdonly(au_sbr(sb, bindex))) { err = bindex; break; } } dput(parent); /* bottom up here */ if (unlikely(err < 0)) err = au_wbr_bu(sb, btop - 1); AuDbg("b%d\n", err); return err; } /* bottom up */ int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop) { int err; err = au_wbr_bu(dentry->d_sb, btop); AuDbg("b%d\n", err); if (err > btop) err = au_wbr_nonopq(dentry, err); AuDbg("b%d\n", err); return err; } static int au_wbr_copyup_bu(struct dentry *dentry) { int err; aufs_bindex_t btop; btop = au_dbtop(dentry); err = au_wbr_do_copyup_bu(dentry, btop); return err; } /* ---------------------------------------------------------------------- */ struct au_wbr_copyup_operations au_wbr_copyup_ops[] = { [AuWbrCopyup_TDP] = { .copyup = au_wbr_copyup_tdp }, [AuWbrCopyup_BUP] = { .copyup = au_wbr_copyup_bup }, [AuWbrCopyup_BU] = { .copyup = au_wbr_copyup_bu } }; struct au_wbr_create_operations au_wbr_create_ops[] = { [AuWbrCreate_TDP] = { .create = au_wbr_create_tdp }, [AuWbrCreate_RR] = { .create = au_wbr_create_rr, .init = au_wbr_create_init_rr }, [AuWbrCreate_MFS] = { .create = au_wbr_create_mfs, .init = au_wbr_create_init_mfs, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_MFSV] = { .create = au_wbr_create_mfs, .init = au_wbr_create_init_mfs, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_MFSRR] = { .create = au_wbr_create_mfsrr, .init = au_wbr_create_init_mfsrr, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_MFSRRV] = { .create = au_wbr_create_mfsrr, .init = au_wbr_create_init_mfsrr, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_TDMFS] = { .create = au_wbr_create_tdmfs, .init = au_wbr_create_init_mfs, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_TDMFSV] = { .create = au_wbr_create_tdmfs, .init = au_wbr_create_init_mfs, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_PMFS] = { .create = au_wbr_create_pmfs, .init = au_wbr_create_init_mfs, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_PMFSV] = { .create = au_wbr_create_pmfs, .init = au_wbr_create_init_mfs, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_PMFSRR] = { .create = au_wbr_create_pmfsrr, .init = au_wbr_create_init_mfsrr, .fin = au_wbr_create_fin_mfs }, [AuWbrCreate_PMFSRRV] = { .create = au_wbr_create_pmfsrr, .init = au_wbr_create_init_mfsrr, .fin = au_wbr_create_fin_mfs } };