summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 12:57:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 12:57:51 -0800
commitac7ac4618cf25e0d5cd8eba83d5f600084b65b9a (patch)
treee5d28907ff72690a0463a2238b96202d751a535c /fs
parent48aba79bcf6ea05148dc82ad9c40713960b00396 (diff)
parentfa94ba8a7b22890e6a17b39b9359e114fe18cd59 (diff)
downloadlwn-ac7ac4618cf25e0d5cd8eba83d5f600084b65b9a.tar.gz
lwn-ac7ac4618cf25e0d5cd8eba83d5f600084b65b9a.zip
Merge tag 'for-5.11/block-2020-12-14' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "Another series of killing more code than what is being added, again thanks to Christoph's relentless cleanups and tech debt tackling. This contains: - blk-iocost improvements (Baolin Wang) - part0 iostat fix (Jeffle Xu) - Disable iopoll for split bios (Jeffle Xu) - block tracepoint cleanups (Christoph Hellwig) - Merging of struct block_device and hd_struct (Christoph Hellwig) - Rework/cleanup of how block device sizes are updated (Christoph Hellwig) - Simplification of gendisk lookup and removal of block device aliasing (Christoph Hellwig) - Block device ioctl cleanups (Christoph Hellwig) - Removal of bdget()/blkdev_get() as exported API (Christoph Hellwig) - Disk change rework, avoid ->revalidate_disk() (Christoph Hellwig) - sbitmap improvements (Pavel Begunkov) - Hybrid polling fix (Pavel Begunkov) - bvec iteration improvements (Pavel Begunkov) - Zone revalidation fixes (Damien Le Moal) - blk-throttle limit fix (Yu Kuai) - Various little fixes" * tag 'for-5.11/block-2020-12-14' of git://git.kernel.dk/linux-block: (126 commits) blk-mq: fix msec comment from micro to milli seconds blk-mq: update arg in comment of blk_mq_map_queue blk-mq: add helper allocating tagset->tags Revert "block: Fix a lockdep complaint triggered by request queue flushing" nvme-loop: use blk_mq_hctx_set_fq_lock_class to set loop's lock class blk-mq: add new API of blk_mq_hctx_set_fq_lock_class block: disable iopoll for split bio block: Improve blk_revalidate_disk_zones() checks sbitmap: simplify wrap check sbitmap: replace CAS with atomic and sbitmap: remove swap_lock sbitmap: optimise sbitmap_deferred_clear() blk-mq: skip hybrid polling if iopoll doesn't spin blk-iocost: Factor out the base vrate change into a separate function blk-iocost: Factor out the active iocgs' state check into a separate function blk-iocost: Move the usage ratio calculation to the correct place blk-iocost: Remove unnecessary advance declaration blk-iocost: Fix some typos in comments blktrace: fix up a kerneldoc comment block: remove the request_queue to argument request based tracepoints ...
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c755
-rw-r--r--fs/btrfs/sysfs.c15
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/btrfs/zoned.c6
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/super.c18
-rw-r--r--fs/ext4/sysfs.c10
-rw-r--r--fs/f2fs/checkpoint.c5
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/file.c14
-rw-r--r--fs/f2fs/super.c8
-rw-r--r--fs/f2fs/sysfs.c9
-rw-r--r--fs/inode.c3
-rw-r--r--fs/internal.h7
-rw-r--r--fs/io_uring.c10
-rw-r--r--fs/pipe.c5
-rw-r--r--fs/pstore/blk.c2
-rw-r--r--fs/quota/quota.c40
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c93
-rw-r--r--fs/xfs/xfs_fsops.c7
22 files changed, 357 insertions, 671 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9e84b1928b94..9e56ee1f2652 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -32,6 +32,7 @@
#include <linux/cleancache.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h>
+#include <linux/part_stat.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include "internal.h"
@@ -110,24 +111,20 @@ EXPORT_SYMBOL(invalidate_bdev);
int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
loff_t lstart, loff_t lend)
{
- struct block_device *claimed_bdev = NULL;
- int err;
-
/*
* If we don't hold exclusive handle for the device, upgrade to it
* while we discard the buffer cache to avoid discarding buffers
* under live filesystem.
*/
if (!(mode & FMODE_EXCL)) {
- claimed_bdev = bdev->bd_contains;
- err = bd_prepare_to_claim(bdev, claimed_bdev,
- truncate_bdev_range);
+ int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
if (err)
return err;
}
+
truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
- if (claimed_bdev)
- bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range);
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(bdev, truncate_bdev_range);
return 0;
}
EXPORT_SYMBOL(truncate_bdev_range);
@@ -548,55 +545,47 @@ EXPORT_SYMBOL(fsync_bdev);
* count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
* actually.
*/
-struct super_block *freeze_bdev(struct block_device *bdev)
+int freeze_bdev(struct block_device *bdev)
{
struct super_block *sb;
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (++bdev->bd_fsfreeze_count > 1) {
- /*
- * We don't even need to grab a reference - the first call
- * to freeze_bdev grab an active reference and only the last
- * thaw_bdev drops it.
- */
- sb = get_super(bdev);
- if (sb)
- drop_super(sb);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return sb;
- }
+ if (++bdev->bd_fsfreeze_count > 1)
+ goto done;
sb = get_active_super(bdev);
if (!sb)
- goto out;
+ goto sync;
if (sb->s_op->freeze_super)
error = sb->s_op->freeze_super(sb);
else
error = freeze_super(sb);
+ deactivate_super(sb);
+
if (error) {
- deactivate_super(sb);
bdev->bd_fsfreeze_count--;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return ERR_PTR(error);
+ goto done;
}
- deactivate_super(sb);
- out:
+ bdev->bd_fsfreeze_sb = sb;
+
+sync:
sync_blockdev(bdev);
+done:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return sb; /* thaw_bdev releases s->s_umount */
+ return error;
}
EXPORT_SYMBOL(freeze_bdev);
/**
* thaw_bdev -- unlock filesystem
* @bdev: blockdevice to unlock
- * @sb: associated superblock
*
* Unlocks the filesystem and marks it writeable again after freeze_bdev().
*/
-int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+int thaw_bdev(struct block_device *bdev)
{
+ struct super_block *sb;
int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
@@ -607,6 +596,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
if (--bdev->bd_fsfreeze_count > 0)
goto out;
+ sb = bdev->bd_fsfreeze_sb;
if (!sb)
goto out;
@@ -792,23 +782,19 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
static void bdev_free_inode(struct inode *inode)
{
+ struct block_device *bdev = I_BDEV(inode);
+
+ free_percpu(bdev->bd_stats);
+ kfree(bdev->bd_meta_info);
+
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
-static void init_once(void *foo)
+static void init_once(void *data)
{
- struct bdev_inode *ei = (struct bdev_inode *) foo;
- struct block_device *bdev = &ei->bdev;
+ struct bdev_inode *ei = data;
- memset(bdev, 0, sizeof(*bdev));
- mutex_init(&bdev->bd_mutex);
-#ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_disks);
-#endif
- bdev->bd_bdi = &noop_backing_dev_info;
inode_init_once(&ei->vfs_inode);
- /* Initialize mutex for freeze. */
- mutex_init(&bdev->bd_fsfreeze_mutex);
}
static void bdev_evict_inode(struct inode *inode)
@@ -870,72 +856,72 @@ void __init bdev_cache_init(void)
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
-/*
- * Most likely _very_ bad one - but then it's hardly critical for small
- * /dev and can be fixed when somebody will need really large one.
- * Keep in mind that it will be fed through icache hash function too.
- */
-static inline unsigned long hash(dev_t dev)
+struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
{
- return MAJOR(dev)+MINOR(dev);
-}
+ struct block_device *bdev;
+ struct inode *inode;
-static int bdev_test(struct inode *inode, void *data)
-{
- return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
+ inode = new_inode(blockdev_superblock);
+ if (!inode)
+ return NULL;
+ inode->i_mode = S_IFBLK;
+ inode->i_rdev = 0;
+ inode->i_data.a_ops = &def_blk_aops;
+ mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+
+ bdev = I_BDEV(inode);
+ memset(bdev, 0, sizeof(*bdev));
+ mutex_init(&bdev->bd_mutex);
+ mutex_init(&bdev->bd_fsfreeze_mutex);
+ spin_lock_init(&bdev->bd_size_lock);
+ bdev->bd_disk = disk;
+ bdev->bd_partno = partno;
+ bdev->bd_inode = inode;
+ bdev->bd_bdi = &noop_backing_dev_info;
+#ifdef CONFIG_SYSFS
+ INIT_LIST_HEAD(&bdev->bd_holder_disks);
+#endif
+ bdev->bd_stats = alloc_percpu(struct disk_stats);
+ if (!bdev->bd_stats) {
+ iput(inode);
+ return NULL;
+ }
+ return bdev;
}
-static int bdev_set(struct inode *inode, void *data)
+void bdev_add(struct block_device *bdev, dev_t dev)
{
- BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
- return 0;
+ bdev->bd_dev = dev;
+ bdev->bd_inode->i_rdev = dev;
+ bdev->bd_inode->i_ino = dev;
+ insert_inode_hash(bdev->bd_inode);
}
static struct block_device *bdget(dev_t dev)
{
- struct block_device *bdev;
struct inode *inode;
- inode = iget5_locked(blockdev_superblock, hash(dev),
- bdev_test, bdev_set, &dev);
-
+ inode = ilookup(blockdev_superblock, dev);
if (!inode)
return NULL;
-
- bdev = &BDEV_I(inode)->bdev;
-
- if (inode->i_state & I_NEW) {
- spin_lock_init(&bdev->bd_size_lock);
- bdev->bd_contains = NULL;
- bdev->bd_super = NULL;
- bdev->bd_inode = inode;
- bdev->bd_part_count = 0;
- inode->i_mode = S_IFBLK;
- inode->i_rdev = dev;
- inode->i_bdev = bdev;
- inode->i_data.a_ops = &def_blk_aops;
- mapping_set_gfp_mask(&inode->i_data, GFP_USER);
- unlock_new_inode(inode);
- }
- return bdev;
+ return &BDEV_I(inode)->bdev;
}
/**
* bdgrab -- Grab a reference to an already referenced block device
* @bdev: Block device to grab a reference to.
+ *
+ * Returns the block_device with an additional reference when successful,
+ * or NULL if the inode is already beeing freed.
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- ihold(bdev->bd_inode);
+ if (!igrab(bdev->bd_inode))
+ return NULL;
return bdev;
}
EXPORT_SYMBOL(bdgrab);
-struct block_device *bdget_part(struct hd_struct *part)
-{
- return bdget(part_devt(part));
-}
-
long nr_blockdev_pages(void)
{
struct inode *inode;
@@ -953,67 +939,8 @@ void bdput(struct block_device *bdev)
{
iput(bdev->bd_inode);
}
-
EXPORT_SYMBOL(bdput);
-static struct block_device *bd_acquire(struct inode *inode)
-{
- struct block_device *bdev;
-
- spin_lock(&bdev_lock);
- bdev = inode->i_bdev;
- if (bdev && !inode_unhashed(bdev->bd_inode)) {
- bdgrab(bdev);
- spin_unlock(&bdev_lock);
- return bdev;
- }
- spin_unlock(&bdev_lock);
-
- /*
- * i_bdev references block device inode that was already shut down
- * (corresponding device got removed). Remove the reference and look
- * up block device inode again just in case new device got
- * reestablished under the same device number.
- */
- if (bdev)
- bd_forget(inode);
-
- bdev = bdget(inode->i_rdev);
- if (bdev) {
- spin_lock(&bdev_lock);
- if (!inode->i_bdev) {
- /*
- * We take an additional reference to bd_inode,
- * and it's released in clear_inode() of inode.
- * So, we can access it via ->i_mapping always
- * without igrab().
- */
- bdgrab(bdev);
- inode->i_bdev = bdev;
- inode->i_mapping = bdev->bd_inode->i_mapping;
- }
- spin_unlock(&bdev_lock);
- }
- return bdev;
-}
-
-/* Call when you free inode */
-
-void bd_forget(struct inode *inode)
-{
- struct block_device *bdev = NULL;
-
- spin_lock(&bdev_lock);
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- inode->i_bdev = NULL;
- inode->i_mapping = &inode->i_data;
- spin_unlock(&bdev_lock);
-
- if (bdev)
- bdput(bdev);
-}
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
@@ -1049,7 +976,6 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
/**
* bd_prepare_to_claim - claim a block device
* @bdev: block device of interest
- * @whole: the whole device containing @bdev, may equal @bdev
* @holder: holder trying to claim @bdev
*
* Claim @bdev. This function fails if @bdev is already claimed by another
@@ -1059,9 +985,12 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
* RETURNS:
* 0 if @bdev can be claimed, -EBUSY otherwise.
*/
-int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
+int bd_prepare_to_claim(struct block_device *bdev, void *holder)
{
+ struct block_device *whole = bdev_whole(bdev);
+
+ if (WARN_ON_ONCE(!holder))
+ return -EINVAL;
retry:
spin_lock(&bdev_lock);
/* if someone else claimed, fail */
@@ -1089,27 +1018,6 @@ retry:
}
EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
-static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
-{
- struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
-
- if (!disk)
- return NULL;
- /*
- * Now that we hold gendisk reference we make sure bdev we looked up is
- * not stale. If it is, it means device got removed and created before
- * we looked up gendisk and we fail open in such case. Associating
- * unhashed bdev with newly created gendisk could lead to two bdevs
- * (and thus two independent caches) being associated with one device
- * which is bad.
- */
- if (inode_unhashed(bdev->bd_inode)) {
- put_disk_and_module(disk);
- return NULL;
- }
- return disk;
-}
-
static void bd_clear_claiming(struct block_device *whole, void *holder)
{
lockdep_assert_held(&bdev_lock);
@@ -1122,15 +1030,15 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
/**
* bd_finish_claiming - finish claiming of a block device
* @bdev: block device of interest
- * @whole: whole block device
* @holder: holder that has claimed @bdev
*
* Finish exclusive open of a block device. Mark the device as exlusively
* open by the holder and wake up all waiters for exclusive open to finish.
*/
-static void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder)
+static void bd_finish_claiming(struct block_device *bdev, void *holder)
{
+ struct block_device *whole = bdev_whole(bdev);
+
spin_lock(&bdev_lock);
BUG_ON(!bd_may_claim(bdev, whole, holder));
/*
@@ -1155,11 +1063,10 @@ static void bd_finish_claiming(struct block_device *bdev,
* also used when exclusive open is not actually desired and we just needed
* to block other exclusive openers for a while.
*/
-void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
- void *holder)
+void bd_abort_claiming(struct block_device *bdev, void *holder)
{
spin_lock(&bdev_lock);
- bd_clear_claiming(whole, holder);
+ bd_clear_claiming(bdev_whole(bdev), holder);
spin_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_abort_claiming);
@@ -1230,7 +1137,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
WARN_ON_ONCE(!bdev->bd_holder);
/* FIXME: remove the following once add_disk() handles errors */
- if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
+ if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
goto out_unlock;
holder = bd_find_holder_disk(bdev, disk);
@@ -1249,24 +1156,24 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
holder->disk = disk;
holder->refcnt = 1;
- ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
if (ret)
goto out_free;
- ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
if (ret)
goto out_del;
/*
* bdev could be deleted beneath us which would implicitly destroy
* the holder directory. Hold on to it.
*/
- kobject_get(bdev->bd_part->holder_dir);
+ kobject_get(bdev->bd_holder_dir);
list_add(&holder->list, &bdev->bd_holder_disks);
goto out_unlock;
out_del:
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
out_free:
kfree(holder);
out_unlock:
@@ -1294,10 +1201,9 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
holder = bd_find_holder_disk(bdev, disk);
if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
- del_symlink(bdev->bd_part->holder_dir,
- &disk_to_dev(disk)->kobj);
- kobject_put(bdev->bd_part->holder_dir);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
@@ -1307,77 +1213,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
-/**
- * check_disk_size_change - checks for disk size change and adjusts bdev size.
- * @disk: struct gendisk to check
- * @bdev: struct bdev to adjust.
- * @verbose: if %true log a message about a size change if there is any
- *
- * This routine checks to see if the bdev size does not match the disk size
- * and adjusts it if it differs. When shrinking the bdev size, its all caches
- * are freed.
- */
-static void check_disk_size_change(struct gendisk *disk,
- struct block_device *bdev, bool verbose)
-{
- loff_t disk_size, bdev_size;
-
- spin_lock(&bdev->bd_size_lock);
- disk_size = (loff_t)get_capacity(disk) << 9;
- bdev_size = i_size_read(bdev->bd_inode);
- if (disk_size != bdev_size) {
- if (verbose) {
- printk(KERN_INFO
- "%s: detected capacity change from %lld to %lld\n",
- disk->disk_name, bdev_size, disk_size);
- }
- i_size_write(bdev->bd_inode, disk_size);
- }
- spin_unlock(&bdev->bd_size_lock);
-
- if (bdev_size > disk_size) {
- if (__invalidate_device(bdev, false))
- pr_warn("VFS: busy inodes on resized disk %s\n",
- disk->disk_name);
- }
-}
-
-/**
- * revalidate_disk_size - checks for disk size change and adjusts bdev size.
- * @disk: struct gendisk to check
- * @verbose: if %true log a message about a size change if there is any
- *
- * This routine checks to see if the bdev size does not match the disk size
- * and adjusts it if it differs. When shrinking the bdev size, its all caches
- * are freed.
- */
-void revalidate_disk_size(struct gendisk *disk, bool verbose)
-{
- struct block_device *bdev;
-
- /*
- * Hidden disks don't have associated bdev so there's no point in
- * revalidating them.
- */
- if (disk->flags & GENHD_FL_HIDDEN)
- return;
-
- bdev = bdget_disk(disk, 0);
- if (bdev) {
- check_disk_size_change(disk, bdev, verbose);
- bdput(bdev);
- }
-}
-EXPORT_SYMBOL(revalidate_disk_size);
-
-void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors)
-{
- spin_lock(&bdev->bd_size_lock);
- i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
- spin_unlock(&bdev->bd_size_lock);
-}
-EXPORT_SYMBOL(bd_set_nr_sectors);
-
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
@@ -1411,8 +1246,6 @@ rescan:
disk->fops->revalidate_disk(disk);
}
- check_disk_size_change(disk, bdev, !invalidate);
-
if (get_capacity(disk)) {
ret = blk_add_partitions(disk, bdev);
if (ret == -EAGAIN)
@@ -1439,71 +1272,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
* mutex_lock(part->bd_mutex)
* mutex_lock_nested(whole->bd_mutex, 1)
*/
-
-static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
- int for_part)
+static int __blkdev_get(struct block_device *bdev, fmode_t mode)
{
- struct block_device *whole = NULL, *claiming = NULL;
- struct gendisk *disk;
- int ret;
- int partno;
- bool first_open = false, unblock_events = true, need_restart;
-
- restart:
- need_restart = false;
- ret = -ENXIO;
- disk = bdev_get_gendisk(bdev, &partno);
- if (!disk)
- goto out;
-
- if (partno) {
- whole = bdget_disk(disk, 0);
- if (!whole) {
- ret = -ENOMEM;
- goto out_put_disk;
- }
- }
-
- if (!for_part && (mode & FMODE_EXCL)) {
- WARN_ON_ONCE(!holder);
- if (whole)
- claiming = whole;
- else
- claiming = bdev;
- ret = bd_prepare_to_claim(bdev, claiming, holder);
- if (ret)
- goto out_put_whole;
- }
+ struct gendisk *disk = bdev->bd_disk;
+ int ret = 0;
- disk_block_events(disk);
- mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
- first_open = true;
- bdev->bd_disk = disk;
- bdev->bd_contains = bdev;
- bdev->bd_partno = partno;
-
- if (!partno) {
- ret = -ENXIO;
- bdev->bd_part = disk_get_part(disk, partno);
- if (!bdev->bd_part)
- goto out_clear;
-
+ if (!bdev_is_partition(bdev)) {
ret = 0;
- if (disk->fops->open) {
+ if (disk->fops->open)
ret = disk->fops->open(bdev, mode);
- /*
- * If we lost a race with 'disk' being deleted,
- * try again. See md.c
- */
- if (ret == -ERESTARTSYS)
- need_restart = true;
- }
- if (!ret) {
- bd_set_nr_sectors(bdev, get_capacity(disk));
+ if (!ret)
set_init_blocksize(bdev);
- }
/*
* If the device is invalidated, rescan partition
@@ -1516,28 +1297,33 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
- goto out_clear;
+ return ret;
} else {
- BUG_ON(for_part);
- ret = __blkdev_get(whole, mode, NULL, 1);
- if (ret)
- goto out_clear;
- bdev->bd_contains = bdgrab(whole);
- bdev->bd_part = disk_get_part(disk, partno);
+ struct block_device *whole = bdgrab(disk->part0);
+
+ mutex_lock_nested(&whole->bd_mutex, 1);
+ ret = __blkdev_get(whole, mode);
+ if (ret) {
+ mutex_unlock(&whole->bd_mutex);
+ bdput(whole);
+ return ret;
+ }
+ whole->bd_part_count++;
+ mutex_unlock(&whole->bd_mutex);
+
if (!(disk->flags & GENHD_FL_UP) ||
- !bdev->bd_part || !bdev->bd_part->nr_sects) {
- ret = -ENXIO;
- goto out_clear;
+ !bdev_nr_sectors(bdev)) {
+ __blkdev_put(whole, mode, 1);
+ bdput(whole);
+ return -ENXIO;
}
- bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects);
set_init_blocksize(bdev);
}
if (bdev->bd_bdi == &noop_backing_dev_info)
bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
} else {
- if (bdev->bd_contains == bdev) {
- ret = 0;
+ if (!bdev_is_partition(bdev)) {
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
@@ -1545,101 +1331,145 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
(!ret || ret == -ENOMEDIUM))
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
- goto out_unlock_bdev;
+ return ret;
}
}
bdev->bd_openers++;
- if (for_part)
- bdev->bd_part_count++;
- if (claiming)
- bd_finish_claiming(bdev, claiming, holder);
+ return 0;
+}
- /*
- * Block event polling for write claims if requested. Any write holder
- * makes the write_holder state stick until all are released. This is
- * good enough and tracking individual writeable reference is too
- * fragile given the way @mode is used in blkdev_get/put().
- */
- if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
- (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
- bdev->bd_write_holder = true;
- unblock_events = false;
- }
- mutex_unlock(&bdev->bd_mutex);
+struct block_device *blkdev_get_no_open(dev_t dev)
+{
+ struct block_device *bdev;
+ struct gendisk *disk;
- if (unblock_events)
- disk_unblock_events(disk);
+ down_read(&bdev_lookup_sem);
+ bdev = bdget(dev);
+ if (!bdev) {
+ up_read(&bdev_lookup_sem);
+ blk_request_module(dev);
+ down_read(&bdev_lookup_sem);
+
+ bdev = bdget(dev);
+ if (!bdev)
+ goto unlock;
+ }
- /* only one opener holds refs to the module and disk */
- if (!first_open)
- put_disk_and_module(disk);
- if (whole)
- bdput(whole);
- return 0;
+ disk = bdev->bd_disk;
+ if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
+ goto bdput;
+ if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
+ goto put_disk;
+ if (!try_module_get(bdev->bd_disk->fops->owner))
+ goto put_disk;
+ up_read(&bdev_lookup_sem);
+ return bdev;
+put_disk:
+ put_disk(disk);
+bdput:
+ bdput(bdev);
+unlock:
+ up_read(&bdev_lookup_sem);
+ return NULL;
+}
- out_clear:
- disk_put_part(bdev->bd_part);
- bdev->bd_disk = NULL;
- bdev->bd_part = NULL;
- if (bdev != bdev->bd_contains)
- __blkdev_put(bdev->bd_contains, mode, 1);
- bdev->bd_contains = NULL;
- out_unlock_bdev:
- if (claiming)
- bd_abort_claiming(bdev, claiming, holder);
- mutex_unlock(&bdev->bd_mutex);
- disk_unblock_events(disk);
- out_put_whole:
- if (whole)
- bdput(whole);
- out_put_disk:
- put_disk_and_module(disk);
- if (need_restart)
- goto restart;
- out:
- return ret;
+void blkdev_put_no_open(struct block_device *bdev)
+{
+ module_put(bdev->bd_disk->fops->owner);
+ put_disk(bdev->bd_disk);
+ bdput(bdev);
}
/**
- * blkdev_get - open a block device
- * @bdev: block_device to open
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
- * open with exclusive access. Specifying %FMODE_EXCL with %NULL
- * @holder is invalid. Exclusive opens may nest for the same @holder.
+ * Open the block device described by device number @dev. If @mode includes
+ * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
+ * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
+ * the same @holder.
*
- * On success, the reference count of @bdev is unchanged. On failure,
- * @bdev is put.
+ * Use this interface ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a device
+ * number. Everything else should use blkdev_get_by_path().
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * 0 on success, -errno on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
{
- int ret, perm = 0;
+ bool unblock_events = true;
+ struct block_device *bdev;
+ struct gendisk *disk;
+ int ret;
- if (mode & FMODE_READ)
- perm |= MAY_READ;
- if (mode & FMODE_WRITE)
- perm |= MAY_WRITE;
- ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+ ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+ MAJOR(dev), MINOR(dev),
+ ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
- goto bdput;
+ return ERR_PTR(ret);
+
+ /*
+ * If we lost a race with 'disk' being deleted, try again. See md.c.
+ */
+retry:
+ bdev = blkdev_get_no_open(dev);
+ if (!bdev)
+ return ERR_PTR(-ENXIO);
+ disk = bdev->bd_disk;
+
+ if (mode & FMODE_EXCL) {
+ ret = bd_prepare_to_claim(bdev, holder);
+ if (ret)
+ goto put_blkdev;
+ }
+
+ disk_block_events(disk);
- ret =__blkdev_get(bdev, mode, holder, 0);
+ mutex_lock(&bdev->bd_mutex);
+ ret =__blkdev_get(bdev, mode);
if (ret)
- goto bdput;
- return 0;
+ goto abort_claiming;
+ if (mode & FMODE_EXCL) {
+ bd_finish_claiming(bdev, holder);
-bdput:
- bdput(bdev);
- return ret;
+ /*
+ * Block event polling for write claims if requested. Any write
+ * holder makes the write_holder state stick until all are
+ * released. This is good enough and tracking individual
+ * writeable reference is too fragile given the way @mode is
+ * used in blkdev_get/put().
+ */
+ if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+ (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+ bdev->bd_write_holder = true;
+ unblock_events = false;
+ }
+ }
+ mutex_unlock(&bdev->bd_mutex);
+
+ if (unblock_events)
+ disk_unblock_events(disk);
+ return bdev;
+
+abort_claiming:
+ if (mode & FMODE_EXCL)
+ bd_abort_claiming(bdev, holder);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_unblock_events(disk);
+put_blkdev:
+ blkdev_put_no_open(bdev);
+ if (ret == -ERESTARTSYS)
+ goto retry;
+ return ERR_PTR(ret);
}
+EXPORT_SYMBOL(blkdev_get_by_dev);
/**
* blkdev_get_by_path - open a block device by name
@@ -1647,32 +1477,30 @@ bdput:
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open the blockdevice described by the device file at @path. @mode
- * and @holder are identical to blkdev_get().
- *
- * On success, the returned block_device has reference count of one.
+ * Open the block device described by the device file at @path. If @mode
+ * includes %FMODE_EXCL, the block device is opened with exclusive access.
+ * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
+ * nest for the same @holder.
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
void *holder)
{
struct block_device *bdev;
- int err;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
+ dev_t dev;
+ int error;
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
+ error = lookup_bdev(path, &dev);
+ if (error)
+ return ERR_PTR(error);
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ bdev = blkdev_get_by_dev(dev, mode, holder);
+ if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
blkdev_put(bdev, mode);
return ERR_PTR(-EACCES);
}
@@ -1681,45 +1509,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
}
EXPORT_SYMBOL(blkdev_get_by_path);
-/**
- * blkdev_get_by_dev - open a block device by device number
- * @dev: device number of block device to open
- * @mode: FMODE_* mask
- * @holder: exclusive holder identifier
- *
- * Open the blockdevice described by device number @dev. @mode and
- * @holder are identical to blkdev_get().
- *
- * Use it ONLY if you really do not have anything better - i.e. when
- * you are behind a truly sucky interface and all you are given is a
- * device number. _Never_ to be used for internal purposes. If you
- * ever need it - reconsider your API.
- *
- * On success, the returned block_device has reference count of one.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
- */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-{
- struct block_device *bdev;
- int err;
-
- bdev = bdget(dev);
- if (!bdev)
- return ERR_PTR(-ENOMEM);
-
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
-
- return bdev;
-}
-EXPORT_SYMBOL(blkdev_get_by_dev);
-
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
@@ -1741,14 +1530,12 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if ((filp->f_flags & O_ACCMODE) == 3)
filp->f_mode |= FMODE_WRITE_IOCTL;
- bdev = bd_acquire(inode);
- if (bdev == NULL)
- return -ENOMEM;
-
+ bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-
- return blkdev_get(bdev, filp->f_mode, filp);
+ return 0;
}
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -1774,34 +1561,28 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
-
bdev_write_inode(bdev);
+ if (bdev_is_partition(bdev))
+ victim = bdev_whole(bdev);
}
- if (bdev->bd_contains == bdev) {
- if (disk->fops->release)
- disk->fops->release(disk, mode);
- }
- if (!bdev->bd_openers) {
- disk_put_part(bdev->bd_part);
- bdev->bd_part = NULL;
- bdev->bd_disk = NULL;
- if (bdev != bdev->bd_contains)
- victim = bdev->bd_contains;
- bdev->bd_contains = NULL;
-
- put_disk_and_module(disk);
- }
+
+ if (!bdev_is_partition(bdev) && disk->fops->release)
+ disk->fops->release(disk, mode);
mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
- if (victim)
+ if (victim) {
__blkdev_put(victim, mode, 1);
+ bdput(victim);
+ }
}
void blkdev_put(struct block_device *bdev, fmode_t mode)
{
+ struct gendisk *disk = bdev->bd_disk;
+
mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) {
+ struct block_device *whole = bdev_whole(bdev);
bool bdev_free;
/*
@@ -1812,13 +1593,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
spin_lock(&bdev_lock);
WARN_ON_ONCE(--bdev->bd_holders < 0);
- WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+ WARN_ON_ONCE(--whole->bd_holders < 0);
- /* bd_contains might point to self, check in a separate step */
if ((bdev_free = !bdev->bd_holders))
bdev->bd_holder = NULL;
- if (!bdev->bd_contains->bd_holders)
- bdev->bd_contains->bd_holder = NULL;
+ if (!whole->bd_holders)
+ whole->bd_holder = NULL;
spin_unlock(&bdev_lock);
@@ -1827,7 +1607,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* unblock evpoll if it was a write holder.
*/
if (bdev_free && bdev->bd_write_holder) {
- disk_unblock_events(bdev->bd_disk);
+ disk_unblock_events(disk);
bdev->bd_write_holder = false;
}
}
@@ -1837,11 +1617,11 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* event. This is to ensure detection of media removal commanded
* from userland - e.g. eject(1).
*/
- disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
-
+ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
mutex_unlock(&bdev->bd_mutex);
__blkdev_put(bdev, mode, 0);
+ blkdev_put_no_open(bdev);
}
EXPORT_SYMBOL(blkdev_put);
@@ -2054,37 +1834,32 @@ const struct file_operations def_blk_fops = {
* namespace if possible and return it. Return ERR_PTR(error)
* otherwise.
*/
-struct block_device *lookup_bdev(const char *pathname)
+int lookup_bdev(const char *pathname, dev_t *dev)
{
- struct block_device *bdev;
struct inode *inode;
struct path path;
int error;
if (!pathname || !*pathname)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
error = kern_path(pathname, LOOKUP_FOLLOW, &path);
if (error)
- return ERR_PTR(error);
+ return error;
inode = d_backing_inode(path.dentry);
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))
- goto fail;
+ goto out_path_put;
error = -EACCES;
if (!may_open_dev(&path))
- goto fail;
- error = -ENOMEM;
- bdev = bd_acquire(inode);
- if (!bdev)
- goto fail;
-out:
+ goto out_path_put;
+
+ *dev = inode->i_rdev;
+ error = 0;
+out_path_put:
path_put(&path);
- return bdev;
-fail:
- bdev = ERR_PTR(error);
- goto out;
+ return error;
}
EXPORT_SYMBOL(lookup_bdev);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4522a1c4cd08..19b9fffa2c9c 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1343,8 +1343,6 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
void btrfs_sysfs_remove_device(struct btrfs_device *device)
{
- struct hd_struct *disk;
- struct kobject *disk_kobj;
struct kobject *devices_kobj;
/*
@@ -1354,11 +1352,8 @@ void btrfs_sysfs_remove_device(struct btrfs_device *device)
devices_kobj = device->fs_info->fs_devices->devices_kobj;
ASSERT(devices_kobj);
- if (device->bdev) {
- disk = device->bdev->bd_part;
- disk_kobj = &part_to_dev(disk)->kobj;
- sysfs_remove_link(devices_kobj, disk_kobj->name);
- }
+ if (device->bdev)
+ sysfs_remove_link(devices_kobj, bdev_kobj(device->bdev)->name);
if (device->devid_kobj.state_initialized) {
kobject_del(&device->devid_kobj);
@@ -1464,11 +1459,7 @@ int btrfs_sysfs_add_device(struct btrfs_device *device)
nofs_flag = memalloc_nofs_save();
if (device->bdev) {
- struct hd_struct *disk;
- struct kobject *disk_kobj;
-
- disk = device->bdev->bd_part;
- disk_kobj = &part_to_dev(disk)->kobj;
+ struct kobject *disk_kobj = bdev_kobj(device->bdev);
ret = sysfs_create_link(devices_kobj, disk_kobj, disk_kobj->name);
if (ret) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7930e1c78c45..ee086fc56c30 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -935,16 +935,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
* make sure it's the same device if the device is mounted
*/
if (device->bdev) {
- struct block_device *path_bdev;
+ int error;
+ dev_t path_dev;
- path_bdev = lookup_bdev(path);
- if (IS_ERR(path_bdev)) {
+ error = lookup_bdev(path, &path_dev);
+ if (error) {
mutex_unlock(&fs_devices->device_list_mutex);
- return ERR_CAST(path_bdev);
+ return ERR_PTR(error);
}
- if (device->bdev != path_bdev) {
- bdput(path_bdev);
+ if (device->bdev->bd_dev != path_dev) {
mutex_unlock(&fs_devices->device_list_mutex);
/*
* device->fs_info may not be reliable here, so
@@ -959,7 +959,6 @@ static noinline struct btrfs_device *device_list_add(const char *path,
task_pid_nr(current));
return ERR_PTR(-EEXIST);
}
- bdput(path_bdev);
btrfs_info_in_rcu(device->fs_info,
"devid %llu device path %s changed to %s scanned by %s (%d)",
devid, rcu_str_deref(device->name),
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 155545180046..c38846659019 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -165,7 +165,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
if (!zone_info)
return -ENOMEM;
- nr_sectors = bdev->bd_part->nr_sects;
+ nr_sectors = bdev_nr_sectors(bdev);
zone_sectors = bdev_zone_sectors(bdev);
/* Check if it's power of 2 (see is_power_of_2) */
ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
@@ -505,7 +505,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
return -EINVAL;
zone_size = zone_sectors << SECTOR_SHIFT;
zone_sectors_shift = ilog2(zone_sectors);
- nr_sectors = bdev->bd_part->nr_sects;
+ nr_sectors = bdev_nr_sectors(bdev);
nr_zones = nr_sectors >> zone_sectors_shift;
sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);
@@ -603,7 +603,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
zone_sectors = bdev_zone_sectors(bdev);
zone_sectors_shift = ilog2(zone_sectors);
- nr_sectors = bdev->bd_part->nr_sects;
+ nr_sectors = bdev_nr_sectors(bdev);
nr_zones = nr_sectors >> zone_sectors_shift;
sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);
diff --git a/fs/buffer.c b/fs/buffer.c
index b56f99f82b5b..32647d2011df 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -523,7 +523,7 @@ repeat:
void emergency_thaw_bdev(struct super_block *sb)
{
- while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
+ while (sb->s_bdev && !thaw_bdev(sb->s_bdev))
printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f0381876a7e5..524e13432447 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -624,7 +624,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
case EXT4_GOING_FLAGS_DEFAULT:
freeze_bdev(sb->s_bdev);
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
- thaw_bdev(sb->s_bdev, sb);
+ thaw_bdev(sb->s_bdev);
break;
case EXT4_GOING_FLAGS_LOGFLUSH:
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 94472044f4c1..a2ec60fa8811 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4044,9 +4044,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_sb = sb;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block;
- if (sb->s_bdev->bd_part)
- sbi->s_sectors_written_start =
- part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
/* Cleanup superblock name */
strreplace(sb->s_id, '/', '!');
@@ -5505,15 +5504,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
*/
if (!(sb->s_flags & SB_RDONLY))
ext4_update_tstamp(es, s_wtime);
- if (sb->s_bdev->bd_part)
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
- else
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
+ es->s_kbytes_written =
+ cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+ EXT4_SB(sb)->s_sectors_written_start) >> 1));
if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
ext4_free_blocks_count_set(es,
EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 4e27fe6ed3ae..075aa3a19ff5 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -62,11 +62,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%lu\n",
- (part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]) -
+ (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
sbi->s_sectors_written_start) >> 1);
}
@@ -74,12 +71,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->s_buddy_cache->i_sb;
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)(sbi->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]) -
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
EXT4_SB(sb)->s_sectors_written_start) >> 1)));
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 023462e80e58..54a1905af052 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1395,7 +1395,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
int i;
int cp_payload_blks = __cp_payload(sbi);
- struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
int err;
@@ -1489,9 +1488,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk += data_sum_blocks;
/* Record write statistics in the hot node summary */
- kbytes_written = sbi->kbytes_written;
- if (sb->s_bdev->bd_part)
- kbytes_written += BD_PART_WRITTEN(sbi);
+ kbytes_written = sbi->kbytes_written + BD_PART_WRITTEN(sbi);
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9a321c52face..9f793923dabe 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1675,7 +1675,7 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
#define BD_PART_WRITTEN(s) \
-(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \
+ (((u64)part_stat_read((s)->sb->s_bdev, sectors[STAT_WRITE]) - \
(s)->sectors_written_start) >> 1)
static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ee861c6d9ff0..a9fc482a0e60 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2230,16 +2230,12 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
switch (in) {
case F2FS_GOING_DOWN_FULLSYNC:
- sb = freeze_bdev(sb->s_bdev);
- if (IS_ERR(sb)) {
- ret = PTR_ERR(sb);
+ ret = freeze_bdev(sb->s_bdev);
+ if (ret)
goto out;
- }
- if (sb) {
- f2fs_stop_checkpoint(sbi, false);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
- thaw_bdev(sb->s_bdev, sb);
- }
+ f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
+ thaw_bdev(sb->s_bdev);
break;
case F2FS_GOING_DOWN_METASYNC:
/* do checkpoint only */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 00eff2f51807..af9f449da64b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3151,7 +3151,7 @@ static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
- sector_t nr_sectors = bdev->bd_part->nr_sects;
+ sector_t nr_sectors = bdev_nr_sectors(bdev);
struct f2fs_report_zones_args rep_zone_arg;
int ret;
@@ -3700,10 +3700,8 @@ try_onemore:
}
/* For write statistics */
- if (sb->s_bdev->bd_part)
- sbi->sectors_written_start =
- (u64)part_stat_read(sb->s_bdev->bd_part,
- sectors[STAT_WRITE]);
+ sbi->sectors_written_start =
+ (u64)part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index ec77ccfea923..24e876e849c5 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -90,11 +90,6 @@ static ssize_t free_segments_show(struct f2fs_attr *a,
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
- struct super_block *sb = sbi->sb;
-
- if (!sb->s_bdev->bd_part)
- return sprintf(buf, "0\n");
-
return sprintf(buf, "%llu\n",
(unsigned long long)(sbi->kbytes_written +
BD_PART_WRITTEN(sbi)));
@@ -103,12 +98,8 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
static ssize_t features_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
- struct super_block *sb = sbi->sb;
int len = 0;
- if (!sb->s_bdev->bd_part)
- return sprintf(buf, "0\n");
-
if (f2fs_sb_has_encrypt(sbi))
len += scnprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
diff --git a/fs/inode.c b/fs/inode.c
index 9d78c37b00b8..cb008acf0efd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -155,7 +155,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_bytes = 0;
inode->i_generation = 0;
inode->i_pipe = NULL;
- inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_link = NULL;
inode->i_dir_seq = 0;
@@ -580,8 +579,6 @@ static void evict(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
}
- if (S_ISBLK(inode->i_mode) && inode->i_bdev)
- bd_forget(inode);
if (S_ISCHR(inode->i_mode) && inode->i_cdev)
cd_forget(inode);
diff --git a/fs/internal.h b/fs/internal.h
index 6fd14ea213c3..77c50befbfbe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,7 +25,6 @@ extern void __init bdev_cache_init(void);
extern int __sync_blockdev(struct block_device *bdev, int wait);
void iterate_bdevs(void (*)(struct block_device *, void *), void *);
void emergency_thaw_bdev(struct super_block *sb);
-void bd_forget(struct inode *inode);
#else
static inline void bdev_cache_init(void)
{
@@ -43,9 +42,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb)
{
return 0;
}
-static inline void bd_forget(struct inode *inode)
-{
-}
#endif /* CONFIG_BLOCK */
/*
@@ -116,7 +112,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
*/
extern int reconfigure_super(struct fs_context *);
extern bool trylock_super(struct super_block *sb);
-extern struct super_block *user_get_super(dev_t);
+struct super_block *user_get_super(dev_t, bool excl);
+void put_super(struct super_block *sb);
extern bool mount_capable(struct fs_context *);
/*
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 22e31050f33e..6f9392c35eef 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2802,11 +2802,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd)
static bool io_bdev_nowait(struct block_device *bdev)
{
-#ifdef CONFIG_BLOCK
return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
-#else
- return true;
-#endif
}
/*
@@ -2819,14 +2815,16 @@ static bool io_file_supports_async(struct file *file, int rw)
umode_t mode = file_inode(file)->i_mode;
if (S_ISBLK(mode)) {
- if (io_bdev_nowait(file->f_inode->i_bdev))
+ if (IS_ENABLED(CONFIG_BLOCK) &&
+ io_bdev_nowait(I_BDEV(file->f_mapping->host)))
return true;
return false;
}
if (S_ISCHR(mode) || S_ISSOCK(mode))
return true;
if (S_ISREG(mode)) {
- if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
+ if (IS_ENABLED(CONFIG_BLOCK) &&
+ io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
file->f_op != &io_uring_fops)
return true;
return false;
diff --git a/fs/pipe.c b/fs/pipe.c
index 0ac197658a2d..c5989cfd564d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1342,9 +1342,8 @@ out_revert_acct:
}
/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
+ * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
+ * not enough to verify that this is a pipe.
*/
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
{
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 882d0bc0cfd7..4bb8a344957a 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -244,7 +244,7 @@ static struct block_device *psblk_get_bdev(void *holder,
return bdev;
}
- nr_sects = part_nr_sects_read(bdev->bd_part);
+ nr_sects = bdev_nr_sectors(bdev);
if (!nr_sects) {
pr_err("not enough space for '%s'\n", blkdev);
blkdev_put(bdev, mode);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 9af95c7a0bbe..6d16b2be5ac4 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -20,6 +20,7 @@
#include <linux/writeback.h>
#include <linux/nospec.h>
#include "compat.h"
+#include "../internal.h"
static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
qid_t id)
@@ -865,27 +866,42 @@ static bool quotactl_cmd_onoff(int cmd)
static struct super_block *quotactl_block(const char __user *special, int cmd)
{
#ifdef CONFIG_BLOCK
- struct block_device *bdev;
struct super_block *sb;
struct filename *tmp = getname(special);
+ bool excl = false, thawed = false;
+ int error;
+ dev_t dev;
if (IS_ERR(tmp))
return ERR_CAST(tmp);
- bdev = lookup_bdev(tmp->name);
+ error = lookup_bdev(tmp->name, &dev);
putname(tmp);
- if (IS_ERR(bdev))
- return ERR_CAST(bdev);
- if (quotactl_cmd_onoff(cmd))
- sb = get_super_exclusive_thawed(bdev);
- else if (quotactl_cmd_write(cmd))
- sb = get_super_thawed(bdev);
- else
- sb = get_super(bdev);
- bdput(bdev);
+ if (error)
+ return ERR_PTR(error);
+
+ if (quotactl_cmd_onoff(cmd)) {
+ excl = true;
+ thawed = true;
+ } else if (quotactl_cmd_write(cmd)) {
+ thawed = true;
+ }
+
+retry:
+ sb = user_get_super(dev, excl);
if (!sb)
return ERR_PTR(-ENODEV);
-
+ if (thawed && sb->s_writers.frozen != SB_UNFROZEN) {
+ if (excl)
+ up_write(&sb->s_umount);
+ else
+ up_read(&sb->s_umount);
+ wait_event(sb->s_writers.wait_unfrozen,
+ sb->s_writers.frozen == SB_UNFROZEN);
+ put_super(sb);
+ goto retry;
+ }
return sb;
+
#else
return ERR_PTR(-ENODEV);
#endif
diff --git a/fs/statfs.c b/fs/statfs.c
index 59f33752c131..68cb07788750 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -235,7 +235,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
static int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
{
- struct super_block *s = user_get_super(dev);
+ struct super_block *s = user_get_super(dev, false);
int err;
if (!s)
return -EINVAL;
diff --git a/fs/super.c b/fs/super.c
index 98bb0629ee10..2c6cdea2ab2d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -307,7 +307,7 @@ static void __put_super(struct super_block *s)
* Drops a temporary reference, frees superblock if there's no
* references left.
*/
-static void put_super(struct super_block *sb)
+void put_super(struct super_block *sb)
{
spin_lock(&sb_lock);
__put_super(sb);
@@ -740,7 +740,14 @@ void iterate_supers_type(struct file_system_type *type,
EXPORT_SYMBOL(iterate_supers_type);
-static struct super_block *__get_super(struct block_device *bdev, bool excl)
+/**
+ * get_super - get the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given. %NULL is returned if no match is found.
+ */
+struct super_block *get_super(struct block_device *bdev)
{
struct super_block *sb;
@@ -755,17 +762,11 @@ rescan:
if (sb->s_bdev == bdev) {
sb->s_count++;
spin_unlock(&sb_lock);
- if (!excl)
- down_read(&sb->s_umount);
- else
- down_write(&sb->s_umount);
+ down_read(&sb->s_umount);
/* still alive? */
if (sb->s_root && (sb->s_flags & SB_BORN))
return sb;
- if (!excl)
- up_read(&sb->s_umount);
- else
- up_write(&sb->s_umount);
+ up_read(&sb->s_umount);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);
@@ -777,66 +778,6 @@ rescan:
}
/**
- * get_super - get the superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device given. %NULL is returned if no match is found.
- */
-struct super_block *get_super(struct block_device *bdev)
-{
- return __get_super(bdev, false);
-}
-EXPORT_SYMBOL(get_super);
-
-static struct super_block *__get_super_thawed(struct block_device *bdev,
- bool excl)
-{
- while (1) {
- struct super_block *s = __get_super(bdev, excl);
- if (!s || s->s_writers.frozen == SB_UNFROZEN)
- return s;
- if (!excl)
- up_read(&s->s_umount);
- else
- up_write(&s->s_umount);
- wait_event(s->s_writers.wait_unfrozen,
- s->s_writers.frozen == SB_UNFROZEN);
- put_super(s);
- }
-}
-
-/**
- * get_super_thawed - get thawed superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device. The superblock is returned once it is thawed
- * (or immediately if it was not frozen). %NULL is returned if no match
- * is found.
- */
-struct super_block *get_super_thawed(struct block_device *bdev)
-{
- return __get_super_thawed(bdev, false);
-}
-EXPORT_SYMBOL(get_super_thawed);
-
-/**
- * get_super_exclusive_thawed - get thawed superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device. The superblock is returned once it is thawed
- * (or immediately if it was not frozen) and s_umount semaphore is held
- * in exclusive mode. %NULL is returned if no match is found.
- */
-struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
-{
- return __get_super_thawed(bdev, true);
-}
-EXPORT_SYMBOL(get_super_exclusive_thawed);
-
-/**
* get_active_super - get an active reference to the superblock of a device
* @bdev: device to get the superblock for
*
@@ -867,7 +808,7 @@ restart:
return NULL;
}
-struct super_block *user_get_super(dev_t dev)
+struct super_block *user_get_super(dev_t dev, bool excl)
{
struct super_block *sb;
@@ -879,11 +820,17 @@ rescan:
if (sb->s_dev == dev) {
sb->s_count++;
spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
+ if (excl)
+ down_write(&sb->s_umount);
+ else
+ down_read(&sb->s_umount);
/* still alive? */
if (sb->s_root && (sb->s_flags & SB_BORN))
return sb;
- up_read(&sb->s_umount);
+ if (excl)
+ up_write(&sb->s_umount);
+ else
+ up_read(&sb->s_umount);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ef1d5bb88b93..b7c5783a031c 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -433,13 +433,10 @@ xfs_fs_goingdown(
{
switch (inflags) {
case XFS_FSOP_GOING_FLAGS_DEFAULT: {
- struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
-
- if (sb && !IS_ERR(sb)) {
+ if (!freeze_bdev(mp->m_super->s_bdev)) {
xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
- thaw_bdev(sb->s_bdev, sb);
+ thaw_bdev(mp->m_super->s_bdev);
}
-
break;
}
case XFS_FSOP_GOING_FLAGS_LOGFLUSH: