summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-09-11 14:20:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 15:57:15 -0700
commit815c2c543d3aeb914a361f981440ece552778724 (patch)
tree7d6f0de8493abbb08f0a42cb565087868b9eaeb4 /include
parent2a8f9449343260373398d59228a62a4332ea513a (diff)
downloadlwn-815c2c543d3aeb914a361f981440ece552778724.tar.gz
lwn-815c2c543d3aeb914a361f981440ece552778724.zip
swap: make swap discard async
swap can do cluster discard for SSD, which is good, but there are some problems here: 1. swap do the discard just before page reclaim gets a swap entry and writes the disk sectors. This is useless for high end SSD, because an overwrite to a sector implies a discard to original sector too. A discard + overwrite == overwrite. 2. the purpose of doing discard is to improve SSD firmware garbage collection. Idealy we should send discard as early as possible, so firmware can do something smart. Sending discard just after swap entry is freed is considered early compared to sending discard before write. Of course, if workload is already bound to gc speed, sending discard earlier or later doesn't make 3. block discard is a sync API, which will delay scan_swap_map() significantly. 4. Write and discard command can be executed parallel in PCIe SSD. Making swap discard async can make execution more efficiently. This patch makes swap discard async and moves discard to where swap entry is freed. Discard and write have no dependence now, so above issues can be avoided. Idealy we should do discard for any freed sectors, but some SSD discard is very slow. This patch still does discard for a whole cluster. My test does a several round of 'mmap, write, unmap', which will trigger a lot of swap discard. In a fusionio card, with this patch, the test runtime is reduced to 18% of the time without it, so around 5.5x faster. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Kyungmin Park <kmpark@infradead.org> Cc: Hugh Dickins <hughd@google.com> Cc: Rafael Aquini <aquini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/swap.h20
1 files changed, 11 insertions, 9 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cb5baebf31d6..8a3c4a1caa14 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -217,8 +217,6 @@ struct swap_info_struct {
unsigned int inuse_pages; /* number of those currently in use */
unsigned int cluster_next; /* likely index for next allocation */
unsigned int cluster_nr; /* countdown to next cluster search */
- unsigned int lowest_alloc; /* while preparing discard cluster */
- unsigned int highest_alloc; /* while preparing discard cluster */
struct swap_extent *curr_swap_extent;
struct swap_extent first_swap_extent;
struct block_device *bdev; /* swap device or bdev of swap file */
@@ -232,14 +230,18 @@ struct swap_info_struct {
* protect map scan related fields like
* swap_map, lowest_bit, highest_bit,
* inuse_pages, cluster_next,
- * cluster_nr, lowest_alloc and
- * highest_alloc. other fields are only
- * changed at swapon/swapoff, so are
- * protected by swap_lock. changing
- * flags need hold this lock and
- * swap_lock. If both locks need hold,
- * hold swap_lock first.
+ * cluster_nr, lowest_alloc,
+ * highest_alloc, free/discard cluster
+ * list. other fields are only changed
+ * at swapon/swapoff, so are protected
+ * by swap_lock. changing flags need
+ * hold this lock and swap_lock. If
+ * both locks need hold, hold swap_lock
+ * first.
*/
+ struct work_struct discard_work; /* discard worker */
+ struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */
+ struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
};
struct swap_list_t {