fs/xfs/libxfs/xfs_metafile.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_metafile.h"
#include "xfs_trace.h"
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_alloc.h"
#include "xfs_rtgroup.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"

static const struct {
	enum xfs_metafile_type	mtype;
	const char		*name;
} xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };

const char *
xfs_metafile_type_str(enum xfs_metafile_type metatype)
{
	unsigned int	i;

	for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
		if (xfs_metafile_type_strs[i].mtype == metatype)
			return xfs_metafile_type_strs[i].name;
	}

	return NULL;
}

/* Set up an inode to be recognized as a metadata directory inode. */
void
xfs_metafile_set_iflag(
	struct xfs_trans	*tp,
	struct xfs_inode	*ip,
	enum xfs_metafile_type	metafile_type)
{
	VFS_I(ip)->i_mode &= ~0777;
	VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
	VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
	if (S_ISDIR(VFS_I(ip)->i_mode))
		ip->i_diflags |= XFS_METADIR_DIFLAGS;
	else
		ip->i_diflags |= XFS_METAFILE_DIFLAGS;
	ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
	ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
	ip->i_metatype = metafile_type;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}

/* Clear the metadata directory inode flag. */
void
xfs_metafile_clear_iflag(
	struct xfs_trans	*tp,
	struct xfs_inode	*ip)
{
	ASSERT(xfs_is_metadir_inode(ip));
	ASSERT(VFS_I(ip)->i_nlink == 0);

	ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}

/*
 * Is the metafile reservations at or beneath a certain threshold?
 */
static inline bool
xfs_metafile_resv_can_cover(
	struct xfs_mount	*mp,
	int64_t			rhs)
{
	/*
	 * The amount of space that can be allocated to this metadata file is
	 * the remaining reservation for the particular metadata file + the
	 * global free block count.  Take care of the first case to avoid
	 * touching the per-cpu counter.
	 */
	if (mp->m_metafile_resv_avail >= rhs)
		return true;

	/*
	 * There aren't enough blocks left in the inode's reservation, but it
	 * isn't critical unless there also isn't enough free space.
	 */
	return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
			rhs - mp->m_metafile_resv_avail, 2048) >= 0;
}

/*
 * Is the metafile reservation critically low on blocks?  For now we'll define
 * that as the number of blocks we can get our hands on being less than 10% of
 * what we reserved or less than some arbitrary number (maximum btree height).
 */
bool
xfs_metafile_resv_critical(
	struct xfs_mount	*mp)
{
	ASSERT(xfs_has_metadir(mp));

	trace_xfs_metafile_resv_critical(mp, 0);

	if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
		return true;

	if (!xfs_metafile_resv_can_cover(mp,
			div_u64(mp->m_metafile_resv_target, 10)))
		return true;

	return XFS_TEST_ERROR(false, mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
}

/* Allocate a block from the metadata file's reservation. */
void
xfs_metafile_resv_alloc_space(
	struct xfs_inode	*ip,
	struct xfs_alloc_arg	*args)
{
	struct xfs_mount	*mp = ip->i_mount;
	int64_t			len = args->len;

	ASSERT(xfs_is_metadir_inode(ip));
	ASSERT(args->resv == XFS_AG_RESV_METAFILE);

	trace_xfs_metafile_resv_alloc_space(mp, args->len);

	/*
	 * Allocate the blocks from the metadata inode's block reservation
	 * and update the ondisk sb counter.
	 */
	mutex_lock(&mp->m_metafile_resv_lock);
	if (mp->m_metafile_resv_avail > 0) {
		int64_t		from_resv;

		from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
		mp->m_metafile_resv_avail -= from_resv;
		xfs_mod_delalloc(ip, 0, -from_resv);
		xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
				-from_resv);
		len -= from_resv;
	}

	/*
	 * Any allocation in excess of the reservation requires in-core and
	 * on-disk fdblocks updates.  If we can grab @len blocks from the
	 * in-core fdblocks then all we need to do is update the on-disk
	 * superblock; if not, then try to steal some from the transaction's
	 * block reservation.  Overruns are only expected for rmap btrees.
	 */
	if (len) {
		unsigned int	field;
		int		error;

		error = xfs_dec_fdblocks(ip->i_mount, len, true);
		if (error)
			field = XFS_TRANS_SB_FDBLOCKS;
		else
			field = XFS_TRANS_SB_RES_FDBLOCKS;

		xfs_trans_mod_sb(args->tp, field, -len);
	}

	mp->m_metafile_resv_used += args->len;
	mutex_unlock(&mp->m_metafile_resv_lock);

	ip->i_nblocks += args->len;
	xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
}

/* Free a block to the metadata file's reservation. */
void
xfs_metafile_resv_free_space(
	struct xfs_inode	*ip,
	struct xfs_trans	*tp,
	xfs_filblks_t		len)
{
	struct xfs_mount	*mp = ip->i_mount;
	int64_t			to_resv;

	ASSERT(xfs_is_metadir_inode(ip));

	trace_xfs_metafile_resv_free_space(mp, len);

	ip->i_nblocks -= len;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

	mutex_lock(&mp->m_metafile_resv_lock);
	mp->m_metafile_resv_used -= len;

	/*
	 * Add the freed blocks back into the inode's delalloc reservation
	 * until it reaches the maximum size.  Update the ondisk fdblocks only.
	 */
	to_resv = mp->m_metafile_resv_target -
		(mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
	if (to_resv > 0) {
		to_resv = min_t(int64_t, to_resv, len);
		mp->m_metafile_resv_avail += to_resv;
		xfs_mod_delalloc(ip, 0, to_resv);
		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
		len -= to_resv;
	}
	mutex_unlock(&mp->m_metafile_resv_lock);

	/*
	 * Everything else goes back to the filesystem, so update the in-core
	 * and on-disk counters.
	 */
	if (len)
		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
}

static void
__xfs_metafile_resv_free(
	struct xfs_mount	*mp)
{
	if (mp->m_metafile_resv_avail) {
		xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
		xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
	}
	mp->m_metafile_resv_avail = 0;
	mp->m_metafile_resv_used = 0;
	mp->m_metafile_resv_target = 0;
}

/* Release unused metafile space reservation. */
void
xfs_metafile_resv_free(
	struct xfs_mount	*mp)
{
	if (!xfs_has_metadir(mp))
		return;

	trace_xfs_metafile_resv_free(mp, 0);

	mutex_lock(&mp->m_metafile_resv_lock);
	__xfs_metafile_resv_free(mp);
	mutex_unlock(&mp->m_metafile_resv_lock);
}

/* Set up a metafile space reservation. */
int
xfs_metafile_resv_init(
	struct xfs_mount	*mp)
{
	struct xfs_rtgroup	*rtg = NULL;
	xfs_filblks_t		used = 0, target = 0;
	xfs_filblks_t		hidden_space;
	xfs_rfsblock_t		dblocks_avail = mp->m_sb.sb_dblocks / 4;
	int			error = 0;

	if (!xfs_has_metadir(mp))
		return 0;

	/*
	 * Free any previous reservation to have a clean slate.
	 */
	mutex_lock(&mp->m_metafile_resv_lock);
	__xfs_metafile_resv_free(mp);

	/*
	 * Currently the only btree metafiles that require reservations are the
	 * rtrmap and the rtrefcount.  Anything new will have to be added here
	 * as well.
	 */
	while ((rtg = xfs_rtgroup_next(mp, rtg))) {
		if (xfs_has_rtrmapbt(mp)) {
			used += rtg_rmap(rtg)->i_nblocks;
			target += xfs_rtrmapbt_calc_reserves(mp);
		}
		if (xfs_has_rtreflink(mp)) {
			used += rtg_refcount(rtg)->i_nblocks;
			target += xfs_rtrefcountbt_calc_reserves(mp);
		}
	}

	if (!target)
		goto out_unlock;

	/*
	 * Space taken by the per-AG metadata btrees are accounted on-disk as
	 * used space.  We therefore only hide the space that is reserved but
	 * not used by the trees.
	 */
	if (used > target)
		target = used;
	else if (target > dblocks_avail)
		target = dblocks_avail;
	hidden_space = target - used;

	error = xfs_dec_fdblocks(mp, hidden_space, true);
	if (error) {
		trace_xfs_metafile_resv_init_error(mp, 0);
		goto out_unlock;
	}

	xfs_mod_sb_delalloc(mp, hidden_space);

	mp->m_metafile_resv_target = target;
	mp->m_metafile_resv_used = used;
	mp->m_metafile_resv_avail = hidden_space;

	trace_xfs_metafile_resv_init(mp, target);

out_unlock:
	mutex_unlock(&mp->m_metafile_resv_lock);
	return error;
}