summaryrefslogtreecommitdiff
path: root/fs/gfs2/util.c
blob: f52141ce948533607c9a2af2914df2279331269b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/kthread.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/delay.h>
#include <linux/uaccess.h>

#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "glops.h"
#include "log.h"
#include "lops.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "util.h"

struct kmem_cache *gfs2_glock_cachep __read_mostly;
struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
struct kmem_cache *gfs2_inode_cachep __read_mostly;
struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
struct kmem_cache *gfs2_quotad_cachep __read_mostly;
struct kmem_cache *gfs2_qadata_cachep __read_mostly;
struct kmem_cache *gfs2_trans_cachep __read_mostly;
mempool_t *gfs2_page_pool __read_mostly;

void gfs2_assert_i(struct gfs2_sbd *sdp)
{
	fs_emerg(sdp, "fatal assertion failed\n");
}

/**
 * check_journal_clean - Make sure a journal is clean for a spectator mount
 * @sdp: The GFS2 superblock
 * @jd: The journal descriptor
 * @verbose: Show more prints in the log
 *
 * Returns: 0 if the journal is clean or locked, else an error
 */
int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
			bool verbose)
{
	int error;
	struct gfs2_holder j_gh;
	struct gfs2_log_header_host head;
	struct gfs2_inode *ip;

	ip = GFS2_I(jd->jd_inode);
	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
				   GL_EXACT | GL_NOCACHE, &j_gh);
	if (error) {
		if (verbose)
			fs_err(sdp, "Error %d locking journal for spectator "
			       "mount.\n", error);
		return -EPERM;
	}
	error = gfs2_jdesc_check(jd);
	if (error) {
		if (verbose)
			fs_err(sdp, "Error checking journal for spectator "
			       "mount.\n");
		goto out_unlock;
	}
	error = gfs2_find_jhead(jd, &head, false);
	if (error) {
		if (verbose)
			fs_err(sdp, "Error parsing journal for spectator "
			       "mount.\n");
		goto out_unlock;
	}
	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
		error = -EPERM;
		if (verbose)
			fs_err(sdp, "jid=%u: Journal is dirty, so the first "
			       "mounter must not be a spectator.\n",
			       jd->jd_jid);
	}

out_unlock:
	gfs2_glock_dq_uninit(&j_gh);
	return error;
}

/**
 * gfs2_freeze_lock_shared - hold the freeze glock
 * @sdp: the superblock
 */
int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp)
{
	int error;

	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
				   LM_FLAG_NOEXP | GL_EXACT,
				   &sdp->sd_freeze_gh);
	if (error)
		fs_err(sdp, "can't lock the freeze glock: %d\n", error);
	return error;
}

void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
{
	if (gfs2_holder_initialized(freeze_gh))
		gfs2_glock_dq_uninit(freeze_gh);
}

static void signal_our_withdraw(struct gfs2_sbd *sdp)
{
	struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
	struct inode *inode;
	struct gfs2_inode *ip;
	struct gfs2_glock *i_gl;
	u64 no_formal_ino;
	int ret = 0;
	int tries;

	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
		return;

	gfs2_ail_drain(sdp); /* frees all transactions */
	inode = sdp->sd_jdesc->jd_inode;
	ip = GFS2_I(inode);
	i_gl = ip->i_gl;
	no_formal_ino = ip->i_no_formal_ino;

	/* Prevent any glock dq until withdraw recovery is complete */
	set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
	/*
	 * Don't tell dlm we're bailing until we have no more buffers in the
	 * wind. If journal had an IO error, the log code should just purge
	 * the outstanding buffers rather than submitting new IO. Making the
	 * file system read-only will flush the journal, etc.
	 *
	 * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
	 * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
	 * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
	 * therefore we need to clear SDF_JOURNAL_LIVE manually.
	 */
	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
	if (!sb_rdonly(sdp->sd_vfs)) {
		bool locked = mutex_trylock(&sdp->sd_freeze_mutex);

		wake_up(&sdp->sd_logd_waitq);
		wake_up(&sdp->sd_quota_wait);

		wait_event_timeout(sdp->sd_log_waitq,
				   gfs2_log_is_empty(sdp),
				   HZ * 5);

		sdp->sd_vfs->s_flags |= SB_RDONLY;

		if (locked)
			mutex_unlock(&sdp->sd_freeze_mutex);

		/*
		 * Dequeue any pending non-system glock holders that can no
		 * longer be granted because the file system is withdrawn.
		 */
		gfs2_gl_dq_holders(sdp);
	}

	if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
		if (!ret)
			ret = -EIO;
		clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
		goto skip_recovery;
	}
	/*
	 * Drop the glock for our journal so another node can recover it.
	 */
	if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
		gfs2_glock_dq_wait(&sdp->sd_journal_gh);
		gfs2_holder_uninit(&sdp->sd_journal_gh);
	}
	sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
	gfs2_glock_dq(&sdp->sd_jinode_gh);
	gfs2_thaw_freeze_initiator(sdp->sd_vfs);
	wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);

	/*
	 * holder_uninit to force glock_put, to force dlm to let go
	 */
	gfs2_holder_uninit(&sdp->sd_jinode_gh);

	/*
	 * Note: We need to be careful here:
	 * Our iput of jd_inode will evict it. The evict will dequeue its
	 * glock, but the glock dq will wait for the withdraw unless we have
	 * exception code in glock_dq.
	 */
	iput(inode);
	sdp->sd_jdesc->jd_inode = NULL;
	/*
	 * Wait until the journal inode's glock is freed. This allows try locks
	 * on other nodes to be successful, otherwise we remain the owner of
	 * the glock as far as dlm is concerned.
	 */
	if (i_gl->gl_ops->go_free) {
		set_bit(GLF_FREEING, &i_gl->gl_flags);
		wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
	}

	/*
	 * Dequeue the "live" glock, but keep a reference so it's never freed.
	 */
	gfs2_glock_hold(live_gl);
	gfs2_glock_dq_wait(&sdp->sd_live_gh);
	/*
	 * We enqueue the "live" glock in EX so that all other nodes
	 * get a demote request and act on it. We don't really want the
	 * lock in EX, so we send a "try" lock with 1CB to produce a callback.
	 */
	fs_warn(sdp, "Requesting recovery of jid %d.\n",
		sdp->sd_lockstruct.ls_jid);
	gfs2_holder_reinit(LM_ST_EXCLUSIVE,
			   LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID,
			   &sdp->sd_live_gh);
	msleep(GL_GLOCK_MAX_HOLD);
	/*
	 * This will likely fail in a cluster, but succeed standalone:
	 */
	ret = gfs2_glock_nq(&sdp->sd_live_gh);

	/*
	 * If we actually got the "live" lock in EX mode, there are no other
	 * nodes available to replay our journal. So we try to replay it
	 * ourselves. We hold the "live" glock to prevent other mounters
	 * during recovery, then just dequeue it and reacquire it in our
	 * normal SH mode. Just in case the problem that caused us to
	 * withdraw prevents us from recovering our journal (e.g. io errors
	 * and such) we still check if the journal is clean before proceeding
	 * but we may wait forever until another mounter does the recovery.
	 */
	if (ret == 0) {
		fs_warn(sdp, "No other mounters found. Trying to recover our "
			"own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
		if (gfs2_recover_journal(sdp->sd_jdesc, 1))
			fs_warn(sdp, "Unable to recover our journal jid %d.\n",
				sdp->sd_lockstruct.ls_jid);
		gfs2_glock_dq_wait(&sdp->sd_live_gh);
		gfs2_holder_reinit(LM_ST_SHARED,
				   LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
				   &sdp->sd_live_gh);
		gfs2_glock_nq(&sdp->sd_live_gh);
	}

	gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
	clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);

	/*
	 * At this point our journal is evicted, so we need to get a new inode
	 * for it. Once done, we need to call gfs2_find_jhead which
	 * calls gfs2_map_journal_extents to map it for us again.
	 *
	 * Note that we don't really want it to look up a FREE block. The
	 * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
	 * which would otherwise fail because it requires grabbing an rgrp
	 * glock, which would fail with -EIO because we're withdrawing.
	 */
	inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
				  sdp->sd_jdesc->jd_no_addr, no_formal_ino,
				  GFS2_BLKST_FREE);
	if (IS_ERR(inode)) {
		fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
			sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
		goto skip_recovery;
	}
	sdp->sd_jdesc->jd_inode = inode;
	d_mark_dontcache(inode);

	/*
	 * Now wait until recovery is complete.
	 */
	for (tries = 0; tries < 10; tries++) {
		ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
		if (!ret)
			break;
		msleep(HZ);
		fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
			sdp->sd_lockstruct.ls_jid);
	}
skip_recovery:
	if (!ret)
		fs_warn(sdp, "Journal recovery complete for jid %d.\n",
			sdp->sd_lockstruct.ls_jid);
	else
		fs_warn(sdp, "Journal recovery skipped for jid %d until next "
			"mount.\n", sdp->sd_lockstruct.ls_jid);
	fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
	sdp->sd_glock_dqs_held = 0;
	wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
}

void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
{
	struct va_format vaf;
	va_list args;

	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
	    test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
		return;

	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;
	fs_err(sdp, "%pV", &vaf);
	va_end(args);
}

int gfs2_withdraw(struct gfs2_sbd *sdp)
{
	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
	const struct lm_lockops *lm = ls->ls_ops;

	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
		unsigned long old = READ_ONCE(sdp->sd_flags), new;

		do {
			if (old & BIT(SDF_WITHDRAWN)) {
				wait_on_bit(&sdp->sd_flags,
					    SDF_WITHDRAW_IN_PROG,
					    TASK_UNINTERRUPTIBLE);
				return -1;
			}
			new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG);
		} while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));

		fs_err(sdp, "about to withdraw this file system\n");
		BUG_ON(sdp->sd_args.ar_debug);

		signal_our_withdraw(sdp);

		kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);

		if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
			wait_for_completion(&sdp->sd_wdack);

		if (lm->lm_unmount) {
			fs_err(sdp, "telling LM to unmount\n");
			lm->lm_unmount(sdp);
		}
		set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
		fs_err(sdp, "File system withdrawn\n");
		dump_stack();
		clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
		smp_mb__after_atomic();
		wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
	}

	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
		panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);

	return -1;
}

/*
 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
 */

void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
			    const char *function, char *file, unsigned int line,
			    bool delayed)
{
	if (gfs2_withdrawing_or_withdrawn(sdp))
		return;

	fs_err(sdp,
	       "fatal: assertion \"%s\" failed\n"
	       "   function = %s, file = %s, line = %u\n",
	       assertion, function, file, line);

	/*
	 * If errors=panic was specified on mount, it won't help to delay the
	 * withdraw.
	 */
	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
		delayed = false;

	if (delayed)
		gfs2_withdraw_delayed(sdp);
	else
		gfs2_withdraw(sdp);
	dump_stack();
}

/*
 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
 */

void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
			const char *function, char *file, unsigned int line)
{
	if (time_before(jiffies,
			sdp->sd_last_warning +
			gfs2_tune_get(sdp, gt_complain_secs) * HZ))
		return;

	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
		fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
			assertion, function, file, line);

	if (sdp->sd_args.ar_debug)
		BUG();
	else
		dump_stack();

	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
		panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
		      "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
		      sdp->sd_fsname, assertion,
		      sdp->sd_fsname, function, file, line);

	sdp->sd_last_warning = jiffies;
}

/*
 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
 */

void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
		    char *file, unsigned int line)
{
	gfs2_lm(sdp,
		"fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
		function, file, line);
	gfs2_withdraw(sdp);
}

/*
 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
 */

void gfs2_consist_inode_i(struct gfs2_inode *ip,
			  const char *function, char *file, unsigned int line)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);

	gfs2_lm(sdp,
		"fatal: filesystem consistency error\n"
		"  inode = %llu %llu\n"
		"  function = %s, file = %s, line = %u\n",
		(unsigned long long)ip->i_no_formal_ino,
		(unsigned long long)ip->i_no_addr,
		function, file, line);
	gfs2_dump_glock(NULL, ip->i_gl, 1);
	gfs2_withdraw(sdp);
}

/*
 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
 */

void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
			  const char *function, char *file, unsigned int line)
{
	struct gfs2_sbd *sdp = rgd->rd_sbd;
	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];

	sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
	gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
	gfs2_lm(sdp,
		"fatal: filesystem consistency error\n"
		"  RG = %llu\n"
		"  function = %s, file = %s, line = %u\n",
		(unsigned long long)rgd->rd_addr,
		function, file, line);
	gfs2_dump_glock(NULL, rgd->rd_gl, 1);
	gfs2_withdraw(sdp);
}

/*
 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
 * Returns: -1 if this call withdrew the machine,
 *          -2 if it was already withdrawn
 */

int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
		       const char *type, const char *function, char *file,
		       unsigned int line)
{
	int me;

	gfs2_lm(sdp,
		"fatal: invalid metadata block\n"
		"  bh = %llu (%s)\n"
		"  function = %s, file = %s, line = %u\n",
		(unsigned long long)bh->b_blocknr, type,
		function, file, line);
	me = gfs2_withdraw(sdp);
	return (me) ? -1 : -2;
}

/*
 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
 * Returns: -1 if this call withdrew the machine,
 *          -2 if it was already withdrawn
 */

int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
			   u16 type, u16 t, const char *function,
			   char *file, unsigned int line)
{
	int me;

	gfs2_lm(sdp,
		"fatal: invalid metadata block\n"
		"  bh = %llu (type: exp=%u, found=%u)\n"
		"  function = %s, file = %s, line = %u\n",
		(unsigned long long)bh->b_blocknr, type, t,
		function, file, line);
	me = gfs2_withdraw(sdp);
	return (me) ? -1 : -2;
}

/*
 * gfs2_io_error_i - Flag an I/O error and withdraw
 * Returns: -1 if this call withdrew the machine,
 *          0 if it was already withdrawn
 */

int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
		    unsigned int line)
{
	gfs2_lm(sdp,
		"fatal: I/O error\n"
		"  function = %s, file = %s, line = %u\n",
		function, file, line);
	return gfs2_withdraw(sdp);
}

/*
 * gfs2_io_error_bh_i - Flag a buffer I/O error
 * @withdraw: withdraw the filesystem
 */

void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
			const char *function, char *file, unsigned int line,
			bool withdraw)
{
	if (gfs2_withdrawing_or_withdrawn(sdp))
		return;

	fs_err(sdp, "fatal: I/O error\n"
	       "  block = %llu\n"
	       "  function = %s, file = %s, line = %u\n",
	       (unsigned long long)bh->b_blocknr, function, file, line);
	if (withdraw)
		gfs2_withdraw(sdp);
}