summaryrefslogtreecommitdiff
path: root/drivers/md/dm-zoned.h
blob: 265494d3f7114be37f96d17a68fffe0a31a35341 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
 *
 * This file is released under the GPL.
 */

#ifndef DM_ZONED_H
#define DM_ZONED_H

#include <linux/types.h>
#include <linux/blkdev.h>
#include <linux/device-mapper.h>
#include <linux/dm-kcopyd.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/radix-tree.h>
#include <linux/shrinker.h>

/*
 * dm-zoned creates block devices with 4KB blocks, always.
 */
#define DMZ_BLOCK_SHIFT		12
#define DMZ_BLOCK_SIZE		(1 << DMZ_BLOCK_SHIFT)
#define DMZ_BLOCK_MASK		(DMZ_BLOCK_SIZE - 1)

#define DMZ_BLOCK_SHIFT_BITS	(DMZ_BLOCK_SHIFT + 3)
#define DMZ_BLOCK_SIZE_BITS	(1 << DMZ_BLOCK_SHIFT_BITS)
#define DMZ_BLOCK_MASK_BITS	(DMZ_BLOCK_SIZE_BITS - 1)

#define DMZ_BLOCK_SECTORS_SHIFT	(DMZ_BLOCK_SHIFT - SECTOR_SHIFT)
#define DMZ_BLOCK_SECTORS	(DMZ_BLOCK_SIZE >> SECTOR_SHIFT)
#define DMZ_BLOCK_SECTORS_MASK	(DMZ_BLOCK_SECTORS - 1)

/*
 * 4KB block <-> 512B sector conversion.
 */
#define dmz_blk2sect(b)		((sector_t)(b) << DMZ_BLOCK_SECTORS_SHIFT)
#define dmz_sect2blk(s)		((sector_t)(s) >> DMZ_BLOCK_SECTORS_SHIFT)

#define dmz_bio_block(bio)	dmz_sect2blk((bio)->bi_iter.bi_sector)
#define dmz_bio_blocks(bio)	dmz_sect2blk(bio_sectors(bio))

struct dmz_metadata;
struct dmz_reclaim;

/*
 * Zoned block device information.
 */
struct dmz_dev {
	struct block_device	*bdev;
	struct dmz_metadata	*metadata;
	struct dmz_reclaim	*reclaim;

	uuid_t			uuid;

	sector_t		capacity;

	unsigned int		dev_idx;

	unsigned int		nr_zones;
	unsigned int		zone_offset;

	unsigned int		flags;

	sector_t		zone_nr_sectors;

	unsigned int		nr_rnd;
	atomic_t		unmap_nr_rnd;
	struct list_head	unmap_rnd_list;
	struct list_head	map_rnd_list;

	unsigned int		nr_seq;
	atomic_t		unmap_nr_seq;
	struct list_head	unmap_seq_list;
	struct list_head	map_seq_list;
};

#define dmz_bio_chunk(zmd, bio)	((bio)->bi_iter.bi_sector >> \
				 dmz_zone_nr_sectors_shift(zmd))
#define dmz_chunk_block(zmd, b)	((b) & (dmz_zone_nr_blocks(zmd) - 1))

/* Device flags. */
#define DMZ_BDEV_DYING		(1 << 0)
#define DMZ_CHECK_BDEV		(2 << 0)
#define DMZ_BDEV_REGULAR	(4 << 0)

/*
 * Zone descriptor.
 */
struct dm_zone {
	/* For listing the zone depending on its state */
	struct list_head	link;

	/* Device containing this zone */
	struct dmz_dev		*dev;

	/* Zone type and state */
	unsigned long		flags;

	/* Zone activation reference count */
	atomic_t		refcount;

	/* Zone id */
	unsigned int		id;

	/* Zone write pointer block (relative to the zone start block) */
	unsigned int		wp_block;

	/* Zone weight (number of valid blocks in the zone) */
	unsigned int		weight;

	/* The chunk that the zone maps */
	unsigned int		chunk;

	/*
	 * For a sequential data zone, pointer to the random zone
	 * used as a buffer for processing unaligned writes.
	 * For a buffer zone, this points back to the data zone.
	 */
	struct dm_zone		*bzone;
};

/*
 * Zone flags.
 */
enum {
	/* Zone write type */
	DMZ_CACHE,
	DMZ_RND,
	DMZ_SEQ,

	/* Zone critical condition */
	DMZ_OFFLINE,
	DMZ_READ_ONLY,

	/* How the zone is being used */
	DMZ_META,
	DMZ_DATA,
	DMZ_BUF,
	DMZ_RESERVED,

	/* Zone internal state */
	DMZ_RECLAIM,
	DMZ_SEQ_WRITE_ERR,
	DMZ_RECLAIM_TERMINATE,
};

/*
 * Zone data accessors.
 */
#define dmz_is_cache(z)		test_bit(DMZ_CACHE, &(z)->flags)
#define dmz_is_rnd(z)		test_bit(DMZ_RND, &(z)->flags)
#define dmz_is_seq(z)		test_bit(DMZ_SEQ, &(z)->flags)
#define dmz_is_empty(z)		((z)->wp_block == 0)
#define dmz_is_offline(z)	test_bit(DMZ_OFFLINE, &(z)->flags)
#define dmz_is_readonly(z)	test_bit(DMZ_READ_ONLY, &(z)->flags)
#define dmz_in_reclaim(z)	test_bit(DMZ_RECLAIM, &(z)->flags)
#define dmz_is_reserved(z)	test_bit(DMZ_RESERVED, &(z)->flags)
#define dmz_seq_write_err(z)	test_bit(DMZ_SEQ_WRITE_ERR, &(z)->flags)
#define dmz_reclaim_should_terminate(z) \
				test_bit(DMZ_RECLAIM_TERMINATE, &(z)->flags)

#define dmz_is_meta(z)		test_bit(DMZ_META, &(z)->flags)
#define dmz_is_buf(z)		test_bit(DMZ_BUF, &(z)->flags)
#define dmz_is_data(z)		test_bit(DMZ_DATA, &(z)->flags)

#define dmz_weight(z)		((z)->weight)

/*
 * Message functions.
 */
#define dmz_dev_info(dev, format, args...)	\
	DMINFO("(%pg): " format, (dev)->bdev, ## args)

#define dmz_dev_err(dev, format, args...)	\
	DMERR("(%pg): " format, (dev)->bdev, ## args)

#define dmz_dev_warn(dev, format, args...)	\
	DMWARN("(%pg): " format, (dev)->bdev, ## args)

#define dmz_dev_debug(dev, format, args...)	\
	DMDEBUG("(%pg): " format, (dev)->bdev, ## args)

/*
 * Functions defined in dm-zoned-metadata.c
 */
int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
		     struct dmz_metadata **zmd, const char *devname);
void dmz_dtr_metadata(struct dmz_metadata *zmd);
int dmz_resume_metadata(struct dmz_metadata *zmd);

void dmz_lock_map(struct dmz_metadata *zmd);
void dmz_unlock_map(struct dmz_metadata *zmd);
void dmz_lock_metadata(struct dmz_metadata *zmd);
void dmz_unlock_metadata(struct dmz_metadata *zmd);
void dmz_lock_flush(struct dmz_metadata *zmd);
void dmz_unlock_flush(struct dmz_metadata *zmd);
int dmz_flush_metadata(struct dmz_metadata *zmd);
const char *dmz_metadata_label(struct dmz_metadata *zmd);

sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone);
sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone);
unsigned int dmz_nr_chunks(struct dmz_metadata *zmd);

bool dmz_check_dev(struct dmz_metadata *zmd);
bool dmz_dev_is_dying(struct dmz_metadata *zmd);

#define DMZ_ALLOC_RND		0x01
#define DMZ_ALLOC_CACHE		0x02
#define DMZ_ALLOC_SEQ		0x04
#define DMZ_ALLOC_RECLAIM	0x10

struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd,
			       unsigned int dev_idx, unsigned long flags);
void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone);

void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
		  unsigned int chunk);
void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone);
unsigned int dmz_nr_zones(struct dmz_metadata *zmd);
unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd);
unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd);
unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd, int idx);
unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx);
unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx);
unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx);
unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd);
unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd);
unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd);
unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata *zmd);

/*
 * Activate a zone (increment its reference count).
 */
static inline void dmz_activate_zone(struct dm_zone *zone)
{
	atomic_inc(&zone->refcount);
}

int dmz_lock_zone_reclaim(struct dm_zone *zone);
void dmz_unlock_zone_reclaim(struct dm_zone *zone);
struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd,
					 unsigned int dev_idx, bool idle);

struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd,
				      unsigned int chunk, enum req_op op);
void dmz_put_chunk_mapping(struct dmz_metadata *zmd, struct dm_zone *zone);
struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
				     struct dm_zone *dzone);

int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone,
			sector_t chunk_block, unsigned int nr_blocks);
int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone,
			  sector_t chunk_block, unsigned int nr_blocks);
int dmz_block_valid(struct dmz_metadata *zmd, struct dm_zone *zone,
		    sector_t chunk_block);
int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone,
			  sector_t *chunk_block);
int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
			  struct dm_zone *to_zone);
int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
			   struct dm_zone *to_zone, sector_t chunk_block);

/*
 * Functions defined in dm-zoned-reclaim.c
 */
int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc, int idx);
void dmz_dtr_reclaim(struct dmz_reclaim *zrc);
void dmz_suspend_reclaim(struct dmz_reclaim *zrc);
void dmz_resume_reclaim(struct dmz_reclaim *zrc);
void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc);
void dmz_schedule_reclaim(struct dmz_reclaim *zrc);

/*
 * Functions defined in dm-zoned-target.c
 */
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
bool dmz_check_bdev(struct dmz_dev *dmz_dev);

/*
 * Deactivate a zone. This decrement the zone reference counter
 * indicating that all BIOs to the zone have completed when the count is 0.
 */
static inline void dmz_deactivate_zone(struct dm_zone *zone)
{
	dmz_reclaim_bio_acc(zone->dev->reclaim);
	atomic_dec(&zone->refcount);
}

/*
 * Test if a zone is active, that is, has a refcount > 0.
 */
static inline bool dmz_is_active(struct dm_zone *zone)
{
	return atomic_read(&zone->refcount);
}

#endif /* DM_ZONED_H */