summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2018-05-30 14:05:15 -0500
committerAndreas Gruenbacher <agruenba@redhat.com>2018-07-25 00:09:09 +0200
commitf6753df35c32f17b7abf0de37aa52850ca9733c9 (patch)
tree775f7a52274a6e7ddb86f06b43ec6f276897c8f9
parentd1b0cb933c8e638947ea72f3ab4e3dad4325bb96 (diff)
downloadlwn-f6753df35c32f17b7abf0de37aa52850ca9733c9.tar.gz
lwn-f6753df35c32f17b7abf0de37aa52850ca9733c9.zip
GFS2: rgrp free blocks used incorrectly
Before this patch, several functions in rgrp.c checked the value of rgd->rd_free_clone. That does not take into account blocks that were reserved by a multi-block reservation. This causes a problem when space gets tight in the file system. For example, when function gfs2_inplace_reserve checks to see if a rgrp has enough blocks to satisfy the request, it can accept a rgrp that it should reject because, although there are enough blocks to satisfy the request _now_, those blocks may be reserved for another running process. A second problem with this occurs when we've reserved the remaining blocks in an rgrp: function rg_mblk_search() can reject an rgrp improperly because it calculates: u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; But rd_reserved includes blocks that the current process just reserved in its own call to inplace_reserve. For example, it can reserve the last 128 blocks of an rgrp, then reject that same rgrp because the above calculates out to free_blocks = 0; Consequences include, but are not limited to, (1) leaving holes, and thus increasing file system fragmentation, and (2) reporting file system is full long before it actually is. This patch introduces a new function, rgd_free, which returns the number of clone-free blocks (blocks that are truly free as opposed to blocks that are still being used because an unlinked file is still open) minus the number of blocks reserved by processes, but not counting the blocks we ourselves reserved (because obviously we need to allocate them). Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
-rw-r--r--fs/gfs2/rgrp.c39
1 files changed, 34 insertions, 5 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0a484a009ba2..68a81afd3b4a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1490,6 +1490,34 @@ static void rs_insert(struct gfs2_inode *ip)
}
/**
+ * rgd_free - return the number of free blocks we can allocate.
+ * @rgd: the resource group
+ *
+ * This function returns the number of free blocks for an rgrp.
+ * That's the clone-free blocks (blocks that are free, not including those
+ * still being used for unlinked files that haven't been deleted.)
+ *
+ * It also subtracts any blocks reserved by someone else, but does not
+ * include free blocks that are still part of our current reservation,
+ * because obviously we can (and will) allocate them.
+ */
+static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
+{
+ u32 tot_reserved, tot_free;
+
+ if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
+ return 0;
+ tot_reserved = rgd->rd_reserved - rs->rs_free;
+
+ if (rgd->rd_free_clone < tot_reserved)
+ tot_reserved = 0;
+
+ tot_free = rgd->rd_free_clone - tot_reserved;
+
+ return tot_free;
+}
+
+/**
* rg_mblk_search - find a group of multiple free blocks to form a reservation
* @rgd: the resource group descriptor
* @ip: pointer to the inode for which we're reserving blocks
@@ -1504,7 +1532,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
u64 goal;
struct gfs2_blkreserv *rs = &ip->i_res;
u32 extlen;
- u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
+ u32 free_blocks = rgd_free(rgd, rs);
int ret;
struct inode *inode = &ip->i_inode;
@@ -1985,7 +2013,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
int error = 0, rg_locked, flags = 0;
u64 last_unlinked = NO_BLOCK;
int loops = 0;
- u32 skip = 0;
+ u32 free_blocks, skip = 0;
if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP;
@@ -2056,10 +2084,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
goto check_rgrp;
/* If rgrp has enough free space, use it */
- if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
+ free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
+ if (free_blocks >= ap->target ||
(loops == 2 && ap->min_target &&
- rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
- ap->allowed = rs->rs_rbm.rgd->rd_free_clone;
+ free_blocks >= ap->min_target)) {
+ ap->allowed = free_blocks;
return 0;
}
check_rgrp: