summaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 15:32:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 15:32:19 -0800
commite2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
treeb97a90170c45211bcc437761653aa8016c34afcd /fs/buffer.c
parentabc36be236358162202e86ad88616ff95a755101 (diff)
parenta04b5de5050ab8b891128eb2c47a0916fe8622e1 (diff)
downloadlwn-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.gz
lwn-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c70
1 files changed, 19 insertions, 51 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 49b7e9bdcd1d..1c18a22a6013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -253,27 +253,6 @@ out:
}
/*
- * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
- */
-static void free_more_memory(void)
-{
- struct zoneref *z;
- int nid;
-
- wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
- yield();
-
- for_each_online_node(nid) {
-
- z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
- gfp_zone(GFP_NOFS), NULL);
- if (z->zone)
- try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
- GFP_NOFS, NULL);
- }
-}
-
-/*
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
*/
@@ -861,16 +840,19 @@ int remove_inode_buffers(struct inode *inode)
* which may not fail from ordinary buffer allocations.
*/
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- int retry)
+ bool retry)
{
struct buffer_head *bh, *head;
+ gfp_t gfp = GFP_NOFS;
long offset;
-try_again:
+ if (retry)
+ gfp |= __GFP_NOFAIL;
+
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
- bh = alloc_buffer_head(GFP_NOFS);
+ bh = alloc_buffer_head(gfp);
if (!bh)
goto no_grow;
@@ -896,23 +878,7 @@ no_grow:
} while (head);
}
- /*
- * Return failure for non-async IO requests. Async IO requests
- * are not allowed to fail, so we have to wait until buffer heads
- * become available. But we don't want tasks sleeping with
- * partially complete buffers, so all were released above.
- */
- if (!retry)
- return NULL;
-
- /* We're _really_ low on memory. Now we just
- * wait for old buffer heads to become free due to
- * finishing IO. Since this is an async request and
- * the reserve list is empty, we're sure there are
- * async buffer heads in use.
- */
- free_more_memory();
- goto try_again;
+ return NULL;
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
@@ -1001,8 +967,6 @@ grow_dev_page(struct block_device *bdev, sector_t block,
gfp_mask |= __GFP_NOFAIL;
page = find_or_create_page(inode->i_mapping, index, gfp_mask);
- if (!page)
- return ret;
BUG_ON(!PageLocked(page));
@@ -1021,9 +985,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
/*
* Allocate some buffers for this page
*/
- bh = alloc_page_buffers(page, size, 0);
- if (!bh)
- goto failed;
+ bh = alloc_page_buffers(page, size, true);
/*
* Link the page to the buffers and initialise them. Take the
@@ -1103,8 +1065,6 @@ __getblk_slow(struct block_device *bdev, sector_t block,
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
- if (ret == 0)
- free_more_memory();
}
}
@@ -1575,7 +1535,7 @@ void create_empty_buffers(struct page *page,
{
struct buffer_head *bh, *head, *tail;
- head = alloc_page_buffers(page, blocksize, 1);
+ head = alloc_page_buffers(page, blocksize, true);
bh = head;
do {
bh->b_state |= b_state;
@@ -2639,7 +2599,7 @@ int nobh_write_begin(struct address_space *mapping,
* Be careful: the buffer linked list is a NULL terminated one, rather
* than the circular one we're used to.
*/
- head = alloc_page_buffers(page, blocksize, 0);
+ head = alloc_page_buffers(page, blocksize, false);
if (!head) {
ret = -ENOMEM;
goto out_release;
@@ -3056,8 +3016,16 @@ void guard_bio_eod(int op, struct bio *bio)
sector_t maxsector;
struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned truncated_bytes;
+ struct hd_struct *part;
+
+ rcu_read_lock();
+ part = __disk_get_part(bio->bi_disk, bio->bi_partno);
+ if (part)
+ maxsector = part_nr_sects_read(part);
+ else
+ maxsector = get_capacity(bio->bi_disk);
+ rcu_read_unlock();
- maxsector = get_capacity(bio->bi_disk);
if (!maxsector)
return;