diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 15:19:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 15:19:35 -0700 |
commit | ae8ac6b7dbfd67f883050421fd195c153d02f5f3 (patch) | |
tree | 162db451c57aa9be511aeb704f7a4301e3ea2225 /fs/quota/dquot.c | |
parent | 460352c2f18e18accc993a9127a03e903ed8e0ed (diff) | |
parent | 6c83fd5142c68294acb0e857b7bac2ce8a5077f7 (diff) | |
download | lwn-ae8ac6b7dbfd67f883050421fd195c153d02f5f3.tar.gz lwn-ae8ac6b7dbfd67f883050421fd195c153d02f5f3.zip |
Merge branch 'quota_scaling' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull quota scaling updates from Jan Kara:
"This contains changes to make the quota subsystem more scalable.
Reportedly it improves number of files created per second on ext4
filesystem on fast storage by about a factor of 2x"
* 'quota_scaling' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (28 commits)
quota: Add lock annotations to struct members
quota: Reduce contention on dq_data_lock
fs: Provide __inode_get_bytes()
quota: Inline dquot_[re]claim_reserved_space() into callsite
quota: Inline inode_{incr,decr}_space() into callsites
quota: Inline functions into their callsites
ext4: Disable dirty list tracking of dquots when journalling quotas
quota: Allow disabling tracking of dirty dquots in a list
quota: Remove dq_wait_unused from dquot
quota: Move locking into clear_dquot_dirty()
quota: Do not dirty bad dquots
quota: Fix possible corruption of dqi_flags
quota: Propagate ->quota_read errors from v2_read_file_info()
quota: Fix error codes in v2_read_file_info()
quota: Push dqio_sem down to ->read_file_info()
quota: Push dqio_sem down to ->write_file_info()
quota: Push dqio_sem down to ->get_next_id()
quota: Push dqio_sem down to ->release_dqblk()
quota: Remove locking for writing to the old quota format
quota: Do not acquire dqio_sem for dquot overwrites in v2 format
...
Diffstat (limited to 'fs/quota/dquot.c')
-rw-r--r-- | fs/quota/dquot.c | 504 |
1 files changed, 259 insertions, 245 deletions
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 566e6ef99f07..8381db9db6d9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -82,16 +82,19 @@ #include <linux/uaccess.h> /* - * There are three quota SMP locks. dq_list_lock protects all lists with quotas - * and quota formats. - * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and - * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. - * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly - * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects - * modifications of quota state (on quotaon and quotaoff) and readers who care - * about latest values take it as well. + * There are five quota SMP locks: + * * dq_list_lock protects all lists with quotas and quota formats. + * * dquot->dq_dqb_lock protects data from dq_dqb + * * inode->i_lock protects inode->i_blocks, i_bytes and also guards + * consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that + * dquot_transfer() can stabilize amount it transfers + * * dq_data_lock protects mem_dqinfo structures and modifications of dquot + * pointers in the inode + * * dq_state_lock protects modifications of quota state (on quotaon and + * quotaoff) and readers who care about latest values take it as well. * - * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock, + * The spinlock ordering is hence: + * dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock, * dq_list_lock > dq_state_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during @@ -110,17 +113,14 @@ * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * - * Each dquot has its dq_lock mutex. Locked dquots might not be referenced - * from inodes (dquot_alloc_space() and such don't check the dq_lock). - * Currently dquot is locked only when it is being read to memory (or space for - * it is being allocated) on the first dqget() and when it is being released on - * the last dqput(). The allocation and release oparations are serialized by - * the dq_lock and by checking the use count in dquot_release(). Write - * operations on dquots don't hold dq_lock as they copy data under dq_data_lock - * spinlock to internal buffers before writing. + * Each dquot has its dq_lock mutex. Dquot is locked when it is being read to + * memory (or space for it is being allocated) on the first dqget(), when it is + * being written out, and when it is being released on the last dqput(). The + * allocation and release operations are serialized by the dq_lock and by + * checking the use count in dquot_release(). * * Lock ordering (including related VFS locks) is the following: - * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex + * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); @@ -129,6 +129,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); DEFINE_STATIC_SRCU(dquot_srcu); +static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq); + void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) { @@ -247,6 +249,7 @@ struct dqstats dqstats; EXPORT_SYMBOL(dqstats); static qsize_t inode_get_rsv_space(struct inode *inode); +static qsize_t __inode_get_rsv_space(struct inode *inode); static int __dquot_initialize(struct inode *inode, int type); static inline unsigned int @@ -342,6 +345,12 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) { int ret = 1; + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + return 0; + + if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) + return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags); + /* If quota is dirty already, we don't have to acquire dq_list_lock */ if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return 1; @@ -381,18 +390,26 @@ static inline void dqput_all(struct dquot **dquot) dqput(dquot[cnt]); } -/* This function needs dq_list_lock */ static inline int clear_dquot_dirty(struct dquot *dquot) { - if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) + if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) + return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags); + + spin_lock(&dq_list_lock); + if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); return 0; + } list_del_init(&dquot->dq_dirty); + spin_unlock(&dq_list_lock); return 1; } void mark_info_dirty(struct super_block *sb, int type) { - set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags); + spin_lock(&dq_data_lock); + sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY; + spin_unlock(&dq_data_lock); } EXPORT_SYMBOL(mark_info_dirty); @@ -406,7 +423,6 @@ int dquot_acquire(struct dquot *dquot) struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); - mutex_lock(&dqopt->dqio_mutex); if (!test_bit(DQ_READ_B, &dquot->dq_flags)) ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); if (ret < 0) @@ -436,7 +452,6 @@ int dquot_acquire(struct dquot *dquot) smp_mb__before_atomic(); set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_iolock: - mutex_unlock(&dqopt->dqio_mutex); mutex_unlock(&dquot->dq_lock); return ret; } @@ -450,21 +465,17 @@ int dquot_commit(struct dquot *dquot) int ret = 0; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - mutex_lock(&dqopt->dqio_mutex); - spin_lock(&dq_list_lock); - if (!clear_dquot_dirty(dquot)) { - spin_unlock(&dq_list_lock); - goto out_sem; - } - spin_unlock(&dq_list_lock); + mutex_lock(&dquot->dq_lock); + if (!clear_dquot_dirty(dquot)) + goto out_lock; /* Inactive dquot can be only if there was error during read/init * => we have better not writing it */ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); else ret = -EIO; -out_sem: - mutex_unlock(&dqopt->dqio_mutex); +out_lock: + mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_commit); @@ -481,7 +492,6 @@ int dquot_release(struct dquot *dquot) /* Check whether we are not racing with some other dqget() */ if (atomic_read(&dquot->dq_count) > 1) goto out_dqlock; - mutex_lock(&dqopt->dqio_mutex); if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); /* Write the info */ @@ -493,7 +503,6 @@ int dquot_release(struct dquot *dquot) ret = ret2; } clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); - mutex_unlock(&dqopt->dqio_mutex); out_dqlock: mutex_unlock(&dquot->dq_lock); return ret; @@ -530,22 +539,18 @@ restart: continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { - DEFINE_WAIT(wait); - dqgrab(dquot); - prepare_to_wait(&dquot->dq_wait_unused, &wait, - TASK_UNINTERRUPTIBLE); spin_unlock(&dq_list_lock); - /* Once dqput() wakes us up, we know it's time to free + /* + * Once dqput() wakes us up, we know it's time to free * the dquot. * IMPORTANT: we rely on the fact that there is always * at most one process waiting for dquot to free. * Otherwise dq_count would be > 1 and we would never * wake up. */ - if (atomic_read(&dquot->dq_count) > 1) - schedule(); - finish_wait(&dquot->dq_wait_unused, &wait); + wait_event(dquot_ref_wq, + atomic_read(&dquot->dq_count) == 1); dqput(dquot); /* At this moment dquot() need not exist (it could be * reclaimed by prune_dqcache(). Hence we must @@ -629,11 +634,9 @@ int dquot_writeback_dquots(struct super_block *sb, int type) while (!list_empty(dirty)) { dquot = list_first_entry(dirty, struct dquot, dq_dirty); - /* Dirty and inactive can be only bad dquot... */ - if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { - clear_dquot_dirty(dquot); - continue; - } + + WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)); + /* Now we have active dquot from which someone is * holding reference so we can safely just increase * use count */ @@ -759,12 +762,12 @@ we_slept: /* Releasing dquot during quotaoff phase? */ if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) && atomic_read(&dquot->dq_count) == 1) - wake_up(&dquot->dq_wait_unused); + wake_up(&dquot_ref_wq); spin_unlock(&dq_list_lock); return; } /* Need to release dquot? */ - if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { + if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ ret = dquot->dq_sb->dq_op->write_dquot(dquot); @@ -776,14 +779,10 @@ we_slept: * We clear dirty bit anyway, so that we avoid * infinite loop here */ - spin_lock(&dq_list_lock); clear_dquot_dirty(dquot); - spin_unlock(&dq_list_lock); } goto we_slept; } - /* Clear flag in case dquot was inactive (something bad happened) */ - clear_dquot_dirty(dquot); if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->release_dquot(dquot); @@ -818,10 +817,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) INIT_LIST_HEAD(&dquot->dq_inuse); INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); - init_waitqueue_head(&dquot->dq_wait_unused); dquot->dq_sb = sb; dquot->dq_id = make_kqid_invalid(type); atomic_set(&dquot->dq_count, 1); + spin_lock_init(&dquot->dq_dqb_lock); return dquot; } @@ -1079,42 +1078,6 @@ static void drop_dquot_ref(struct super_block *sb, int type) } } -static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number) -{ - dquot->dq_dqb.dqb_curinodes += number; -} - -static inline void dquot_incr_space(struct dquot *dquot, qsize_t number) -{ - dquot->dq_dqb.dqb_curspace += number; -} - -static inline void dquot_resv_space(struct dquot *dquot, qsize_t number) -{ - dquot->dq_dqb.dqb_rsvspace += number; -} - -/* - * Claim reserved quota space - */ -static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number) -{ - if (dquot->dq_dqb.dqb_rsvspace < number) { - WARN_ON_ONCE(1); - number = dquot->dq_dqb.dqb_rsvspace; - } - dquot->dq_dqb.dqb_curspace += number; - dquot->dq_dqb.dqb_rsvspace -= number; -} - -static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number) -{ - if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) - number = dquot->dq_dqb.dqb_curspace; - dquot->dq_dqb.dqb_rsvspace += number; - dquot->dq_dqb.dqb_curspace -= number; -} - static inline void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) { @@ -1278,21 +1241,24 @@ static int ignore_hardlimit(struct dquot *dquot) !(info->dqi_flags & DQF_ROOT_SQUASH)); } -/* needs dq_data_lock */ -static int check_idq(struct dquot *dquot, qsize_t inodes, - struct dquot_warn *warn) +static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes, + struct dquot_warn *warn) { - qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; + qsize_t newinodes; + int ret = 0; + spin_lock(&dquot->dq_dqb_lock); + newinodes = dquot->dq_dqb.dqb_curinodes + inodes; if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - return 0; + goto add; if (dquot->dq_dqb.dqb_ihardlimit && newinodes > dquot->dq_dqb.dqb_ihardlimit && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN); - return -EDQUOT; + ret = -EDQUOT; + goto out; } if (dquot->dq_dqb.dqb_isoftlimit && @@ -1301,7 +1267,8 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN); - return -EDQUOT; + ret = -EDQUOT; + goto out; } if (dquot->dq_dqb.dqb_isoftlimit && @@ -1311,30 +1278,40 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; } +add: + dquot->dq_dqb.dqb_curinodes = newinodes; - return 0; +out: + spin_unlock(&dquot->dq_dqb_lock); + return ret; } -/* needs dq_data_lock */ -static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, - struct dquot_warn *warn) +static int dquot_add_space(struct dquot *dquot, qsize_t space, + qsize_t rsv_space, unsigned int flags, + struct dquot_warn *warn) { qsize_t tspace; struct super_block *sb = dquot->dq_sb; + int ret = 0; + spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - return 0; + goto add; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace - + space; + + space + rsv_space; + + if (flags & DQUOT_SPACE_NOFAIL) + goto add; if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { - if (!prealloc) + if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); - return -EDQUOT; + ret = -EDQUOT; + goto out; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1342,28 +1319,34 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, dquot->dq_dqb.dqb_btime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime && !ignore_hardlimit(dquot)) { - if (!prealloc) + if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); - return -EDQUOT; + ret = -EDQUOT; + goto out; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime == 0) { - if (!prealloc) { + if (flags & DQUOT_SPACE_WARN) { prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() + sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; - } - else + } else { /* * We don't allow preallocation to exceed softlimit so exceeding will * be always printed */ - return -EDQUOT; + ret = -EDQUOT; + goto out; + } } - - return 0; +add: + dquot->dq_dqb.dqb_rsvspace += rsv_space; + dquot->dq_dqb.dqb_curspace += space; +out: + spin_unlock(&dquot->dq_dqb_lock); + return ret; } static int info_idq_free(struct dquot *dquot, qsize_t inodes) @@ -1502,8 +1485,15 @@ static int __dquot_initialize(struct inode *inode, int type) * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); - if (unlikely(rsv)) - dquot_resv_space(dquots[cnt], rsv); + if (unlikely(rsv)) { + spin_lock(&inode->i_lock); + /* Get reservation again under proper lock */ + rsv = __inode_get_rsv_space(inode); + spin_lock(&dquots[cnt]->dq_dqb_lock); + dquots[cnt]->dq_dqb.dqb_rsvspace += rsv; + spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_unlock(&inode->i_lock); + } } } out_lock: @@ -1598,39 +1588,12 @@ static qsize_t *inode_reserved_space(struct inode * inode) return inode->i_sb->dq_op->get_reserved_space(inode); } -void inode_add_rsv_space(struct inode *inode, qsize_t number) -{ - spin_lock(&inode->i_lock); - *inode_reserved_space(inode) += number; - spin_unlock(&inode->i_lock); -} -EXPORT_SYMBOL(inode_add_rsv_space); - -void inode_claim_rsv_space(struct inode *inode, qsize_t number) +static qsize_t __inode_get_rsv_space(struct inode *inode) { - spin_lock(&inode->i_lock); - *inode_reserved_space(inode) -= number; - __inode_add_bytes(inode, number); - spin_unlock(&inode->i_lock); -} -EXPORT_SYMBOL(inode_claim_rsv_space); - -void inode_reclaim_rsv_space(struct inode *inode, qsize_t number) -{ - spin_lock(&inode->i_lock); - *inode_reserved_space(inode) += number; - __inode_sub_bytes(inode, number); - spin_unlock(&inode->i_lock); -} -EXPORT_SYMBOL(inode_reclaim_rsv_space); - -void inode_sub_rsv_space(struct inode *inode, qsize_t number) -{ - spin_lock(&inode->i_lock); - *inode_reserved_space(inode) -= number; - spin_unlock(&inode->i_lock); + if (!inode->i_sb->dq_op->get_reserved_space) + return 0; + return *inode_reserved_space(inode); } -EXPORT_SYMBOL(inode_sub_rsv_space); static qsize_t inode_get_rsv_space(struct inode *inode) { @@ -1639,28 +1602,11 @@ static qsize_t inode_get_rsv_space(struct inode *inode) if (!inode->i_sb->dq_op->get_reserved_space) return 0; spin_lock(&inode->i_lock); - ret = *inode_reserved_space(inode); + ret = __inode_get_rsv_space(inode); spin_unlock(&inode->i_lock); return ret; } -static void inode_incr_space(struct inode *inode, qsize_t number, - int reserve) -{ - if (reserve) - inode_add_rsv_space(inode, number); - else - inode_add_bytes(inode, number); -} - -static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) -{ - if (reserve) - inode_sub_rsv_space(inode, number); - else - inode_sub_bytes(inode, number); -} - /* * This functions updates i_blocks+i_bytes fields and quota information * (together with appropriate checks). @@ -1682,7 +1628,13 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) struct dquot **dquots; if (!dquot_active(inode)) { - inode_incr_space(inode, number, reserve); + if (reserve) { + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) += number; + spin_unlock(&inode->i_lock); + } else { + inode_add_bytes(inode, number); + } goto out; } @@ -1691,27 +1643,41 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); - spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) continue; - ret = check_bdq(dquots[cnt], number, - !(flags & DQUOT_SPACE_WARN), &warn[cnt]); - if (ret && !(flags & DQUOT_SPACE_NOFAIL)) { - spin_unlock(&dq_data_lock); + if (flags & DQUOT_SPACE_RESERVE) { + ret = dquot_add_space(dquots[cnt], 0, number, flags, + &warn[cnt]); + } else { + ret = dquot_add_space(dquots[cnt], number, 0, flags, + &warn[cnt]); + } + if (ret) { + /* Back out changes we already did */ + for (cnt--; cnt >= 0; cnt--) { + if (!dquots[cnt]) + continue; + spin_lock(&dquots[cnt]->dq_dqb_lock); + if (flags & DQUOT_SPACE_RESERVE) { + dquots[cnt]->dq_dqb.dqb_rsvspace -= + number; + } else { + dquots[cnt]->dq_dqb.dqb_curspace -= + number; + } + spin_unlock(&dquots[cnt]->dq_dqb_lock); + } + spin_unlock(&inode->i_lock); goto out_flush_warn; } } - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) - continue; - if (reserve) - dquot_resv_space(dquots[cnt], number); - else - dquot_incr_space(dquots[cnt], number); - } - inode_incr_space(inode, number, reserve); - spin_unlock(&dq_data_lock); + if (reserve) + *inode_reserved_space(inode) += number; + else + __inode_add_bytes(inode, number); + spin_unlock(&inode->i_lock); if (reserve) goto out_flush_warn; @@ -1740,23 +1706,26 @@ int dquot_alloc_inode(struct inode *inode) dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); - spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) continue; - ret = check_idq(dquots[cnt], 1, &warn[cnt]); - if (ret) + ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]); + if (ret) { + for (cnt--; cnt >= 0; cnt--) { + if (!dquots[cnt]) + continue; + /* Back out changes we already did */ + spin_lock(&dquots[cnt]->dq_dqb_lock); + dquots[cnt]->dq_dqb.dqb_curinodes--; + spin_unlock(&dquots[cnt]->dq_dqb_lock); + } goto warn_put_all; - } - - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) - continue; - dquot_incr_inodes(dquots[cnt], 1); + } } warn_put_all: - spin_unlock(&dq_data_lock); + spin_unlock(&inode->i_lock); if (ret == 0) mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); @@ -1774,21 +1743,33 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) int cnt, index; if (!dquot_active(inode)) { - inode_claim_rsv_space(inode, number); + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) -= number; + __inode_add_bytes(inode, number); + spin_unlock(&inode->i_lock); return 0; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); - spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) - dquot_claim_reserved_space(dquots[cnt], number); + if (dquots[cnt]) { + struct dquot *dquot = dquots[cnt]; + + spin_lock(&dquot->dq_dqb_lock); + if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) + number = dquot->dq_dqb.dqb_rsvspace; + dquot->dq_dqb.dqb_curspace += number; + dquot->dq_dqb.dqb_rsvspace -= number; + spin_unlock(&dquot->dq_dqb_lock); + } } /* Update inode bytes */ - inode_claim_rsv_space(inode, number); - spin_unlock(&dq_data_lock); + *inode_reserved_space(inode) -= number; + __inode_add_bytes(inode, number); + spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); return 0; @@ -1804,21 +1785,33 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) int cnt, index; if (!dquot_active(inode)) { - inode_reclaim_rsv_space(inode, number); + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) += number; + __inode_sub_bytes(inode, number); + spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); - spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) - dquot_reclaim_reserved_space(dquots[cnt], number); + if (dquots[cnt]) { + struct dquot *dquot = dquots[cnt]; + + spin_lock(&dquot->dq_dqb_lock); + if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) + number = dquot->dq_dqb.dqb_curspace; + dquot->dq_dqb.dqb_rsvspace += number; + dquot->dq_dqb.dqb_curspace -= number; + spin_unlock(&dquot->dq_dqb_lock); + } } /* Update inode bytes */ - inode_reclaim_rsv_space(inode, number); - spin_unlock(&dq_data_lock); + *inode_reserved_space(inode) += number; + __inode_sub_bytes(inode, number); + spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); return; @@ -1836,19 +1829,26 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!dquot_active(inode)) { - inode_decr_space(inode, number, reserve); + if (reserve) { + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) -= number; + spin_unlock(&inode->i_lock); + } else { + inode_sub_bytes(inode, number); + } return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); - spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; if (!dquots[cnt]) continue; + spin_lock(&dquots[cnt]->dq_dqb_lock); wtype = info_bdq_free(dquots[cnt], number); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquots[cnt], wtype); @@ -1856,9 +1856,13 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) dquot_free_reserved_space(dquots[cnt], number); else dquot_decr_space(dquots[cnt], number); + spin_unlock(&dquots[cnt]->dq_dqb_lock); } - inode_decr_space(inode, number, reserve); - spin_unlock(&dq_data_lock); + if (reserve) + *inode_reserved_space(inode) -= number; + else + __inode_sub_bytes(inode, number); + spin_unlock(&inode->i_lock); if (reserve) goto out_unlock; @@ -1884,19 +1888,21 @@ void dquot_free_inode(struct inode *inode) dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); - spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; if (!dquots[cnt]) continue; + spin_lock(&dquots[cnt]->dq_dqb_lock); wtype = info_idq_free(dquots[cnt], 1); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquots[cnt], wtype); dquot_decr_inodes(dquots[cnt], 1); + spin_unlock(&dquots[cnt]->dq_dqb_lock); } - spin_unlock(&dq_data_lock); + spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); @@ -1917,7 +1923,7 @@ EXPORT_SYMBOL(dquot_free_inode); */ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { - qsize_t space, cur_space; + qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot *transfer_from[MAXQUOTAS] = {}; @@ -1944,14 +1950,18 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) } spin_lock(&dq_data_lock); + spin_lock(&inode->i_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ + spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); return 0; } - cur_space = inode_get_bytes(inode); - rsv_space = inode_get_rsv_space(inode); - space = cur_space + rsv_space; - /* Build the transfer_from list and check the limits */ + cur_space = __inode_get_bytes(inode); + rsv_space = __inode_get_rsv_space(inode); + /* + * Build the transfer_from list, check limits, and update usage in + * the target structures. + */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* * Skip changes for same uid or gid or for turned off quota-type. @@ -1963,28 +1973,33 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) continue; is_valid[cnt] = 1; transfer_from[cnt] = i_dquot(inode)[cnt]; - ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]); + ret = dquot_add_inodes(transfer_to[cnt], inode_usage, + &warn_to[cnt]); if (ret) goto over_quota; - ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]); - if (ret) + ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, + &warn_to[cnt]); + if (ret) { + dquot_decr_inodes(transfer_to[cnt], inode_usage); goto over_quota; + } } - /* - * Finally perform the needed transfer from transfer_from to transfer_to - */ + /* Decrease usage for source structures and update quota pointers */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!is_valid[cnt]) continue; /* Due to IO error we might not have transfer_from[] structure */ if (transfer_from[cnt]) { int wtype; + + spin_lock(&transfer_from[cnt]->dq_dqb_lock); wtype = info_idq_free(transfer_from[cnt], inode_usage); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_inodes[cnt], transfer_from[cnt], wtype); - wtype = info_bdq_free(transfer_from[cnt], space); + wtype = info_bdq_free(transfer_from[cnt], + cur_space + rsv_space); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_space[cnt], transfer_from[cnt], wtype); @@ -1992,14 +2007,11 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) dquot_decr_space(transfer_from[cnt], cur_space); dquot_free_reserved_space(transfer_from[cnt], rsv_space); + spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } - - dquot_incr_inodes(transfer_to[cnt], inode_usage); - dquot_incr_space(transfer_to[cnt], cur_space); - dquot_resv_space(transfer_to[cnt], rsv_space); - i_dquot(inode)[cnt] = transfer_to[cnt]; } + spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); mark_all_dquot_dirty(transfer_from); @@ -2013,6 +2025,17 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) transfer_to[cnt] = transfer_from[cnt]; return 0; over_quota: + /* Back out changes we already did */ + for (cnt--; cnt >= 0; cnt--) { + if (!is_valid[cnt]) + continue; + spin_lock(&transfer_to[cnt]->dq_dqb_lock); + dquot_decr_inodes(transfer_to[cnt], inode_usage); + dquot_decr_space(transfer_to[cnt], cur_space); + dquot_free_reserved_space(transfer_to[cnt], rsv_space); + spin_unlock(&transfer_to[cnt]->dq_dqb_lock); + } + spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); flush_warnings(warn_to); return ret; @@ -2066,29 +2089,21 @@ EXPORT_SYMBOL(dquot_transfer); */ int dquot_commit_info(struct super_block *sb, int type) { - int ret; struct quota_info *dqopt = sb_dqopt(sb); - mutex_lock(&dqopt->dqio_mutex); - ret = dqopt->ops[type]->write_file_info(sb, type); - mutex_unlock(&dqopt->dqio_mutex); - return ret; + return dqopt->ops[type]->write_file_info(sb, type); } EXPORT_SYMBOL(dquot_commit_info); int dquot_get_next_id(struct super_block *sb, struct kqid *qid) { struct quota_info *dqopt = sb_dqopt(sb); - int err; if (!sb_has_quota_active(sb, qid->type)) return -ESRCH; if (!dqopt->ops[qid->type]->get_next_id) return -ENOSYS; - mutex_lock(&dqopt->dqio_mutex); - err = dqopt->ops[qid->type]->get_next_id(sb, qid); - mutex_unlock(&dqopt->dqio_mutex); - return err; + return dqopt->ops[qid->type]->get_next_id(sb, qid); } EXPORT_SYMBOL(dquot_get_next_id); @@ -2337,15 +2352,14 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dqopt->info[type].dqi_format = fmt; dqopt->info[type].dqi_fmt_id = format_id; INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); - mutex_lock(&dqopt->dqio_mutex); error = dqopt->ops[type]->read_file_info(sb, type); - if (error < 0) { - mutex_unlock(&dqopt->dqio_mutex); + if (error < 0) goto out_file_init; - } - if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) + if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) { + spin_lock(&dq_data_lock); dqopt->info[type].dqi_flags |= DQF_SYS_FILE; - mutex_unlock(&dqopt->dqio_mutex); + spin_unlock(&dq_data_lock); + } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); @@ -2572,7 +2586,7 @@ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) struct mem_dqblk *dm = &dquot->dq_dqb; memset(di, 0, sizeof(*di)); - spin_lock(&dq_data_lock); + spin_lock(&dquot->dq_dqb_lock); di->d_spc_hardlimit = dm->dqb_bhardlimit; di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit; @@ -2581,7 +2595,7 @@ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) di->d_ino_count = dm->dqb_curinodes; di->d_spc_timer = dm->dqb_btime; di->d_ino_timer = dm->dqb_itime; - spin_unlock(&dq_data_lock); + spin_unlock(&dquot->dq_dqb_lock); } int dquot_get_dqblk(struct super_block *sb, struct kqid qid, @@ -2645,7 +2659,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) (di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) return -ERANGE; - spin_lock(&dq_data_lock); + spin_lock(&dquot->dq_dqb_lock); if (di->d_fieldmask & QC_SPACE) { dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; check_blim = 1; @@ -2711,7 +2725,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); - spin_unlock(&dq_data_lock); + spin_unlock(&dquot->dq_dqb_lock); mark_dquot_dirty(dquot); return 0; |