diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 411 |
1 files changed, 207 insertions, 204 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 0341c5700e34..8c6b3ce38f09 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) } /* - * We need this because the bdev->unplug_fn can sleep and we cannot - * hold swap_lock while calling the unplug_fn. And swap_lock - * cannot be turned into a mutex. - */ -static DECLARE_RWSEM(swap_unplug_sem); - -void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) -{ - swp_entry_t entry; - - down_read(&swap_unplug_sem); - entry.val = page_private(page); - if (PageSwapCache(page)) { - struct block_device *bdev = swap_info[swp_type(entry)]->bdev; - struct backing_dev_info *bdi; - - /* - * If the page is removed from swapcache from under us (with a - * racy try_to_unuse/swapoff) we need an additional reference - * count to avoid reading garbage from page_private(page) above. - * If the WARN_ON triggers during a swapoff it maybe the race - * condition and it's harmless. However if it triggers without - * swapoff it signals a problem. - */ - WARN_ON(page_count(page) <= 1); - - bdi = bdev->bd_inode->i_mapping->backing_dev_info; - blk_run_backing_dev(bdi, page); - } - up_read(&swap_unplug_sem); -} - -/* * swapon tell device that all the old swap contents can be discarded, * to allow the swap device to optimize its wear-levelling. */ @@ -212,8 +179,8 @@ static int wait_for_discard(void *word) #define SWAPFILE_CLUSTER 256 #define LATENCY_LIMIT 256 -static inline unsigned long scan_swap_map(struct swap_info_struct *si, - unsigned char usage) +static unsigned long scan_swap_map(struct swap_info_struct *si, + unsigned char usage) { unsigned long offset; unsigned long scan_base; @@ -880,7 +847,7 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { - struct mem_cgroup *ptr = NULL; + struct mem_cgroup *ptr; spinlock_t *ptl; pte_t *pte; int ret = 1; @@ -1550,6 +1517,36 @@ bad_bmap: goto out; } +static void enable_swap_info(struct swap_info_struct *p, int prio, + unsigned char *swap_map) +{ + int i, prev; + + spin_lock(&swap_lock); + if (prio >= 0) + p->prio = prio; + else + p->prio = --least_priority; + p->swap_map = swap_map; + p->flags |= SWP_WRITEOK; + nr_swap_pages += p->pages; + total_swap_pages += p->pages; + + /* insert swap space into swap_list: */ + prev = -1; + for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { + if (p->prio >= swap_info[i]->prio) + break; + prev = i; + } + p->next = i; + if (prev < 0) + swap_list.head = swap_list.next = p->type; + else + swap_info[prev]->next = p->type; + spin_unlock(&swap_lock); +} + SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; @@ -1621,32 +1618,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) current->flags &= ~PF_OOM_ORIGIN; if (err) { + /* + * reading p->prio and p->swap_map outside the lock is + * safe here because only sys_swapon and sys_swapoff + * change them, and there can be no other sys_swapon or + * sys_swapoff for this swap_info_struct at this point. + */ /* re-insert swap space back into swap_list */ - spin_lock(&swap_lock); - if (p->prio < 0) - p->prio = --least_priority; - prev = -1; - for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { - if (p->prio >= swap_info[i]->prio) - break; - prev = i; - } - p->next = i; - if (prev < 0) - swap_list.head = swap_list.next = type; - else - swap_info[prev]->next = type; - nr_swap_pages += p->pages; - total_swap_pages += p->pages; - p->flags |= SWP_WRITEOK; - spin_unlock(&swap_lock); + enable_swap_info(p, p->prio, p->swap_map); goto out_dput; } - /* wait for any unplug function to finish */ - down_write(&swap_unplug_sem); - up_write(&swap_unplug_sem); - destroy_swap_extents(p); if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); @@ -1844,49 +1826,24 @@ static int __init max_swapfiles_check(void) late_initcall(max_swapfiles_check); #endif -/* - * Written 01/25/92 by Simmule Turner, heavily changed by Linus. - * - * The swapon system call - */ -SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) +static struct swap_info_struct *alloc_swap_info(void) { struct swap_info_struct *p; - char *name = NULL; - struct block_device *bdev = NULL; - struct file *swap_file = NULL; - struct address_space *mapping; unsigned int type; - int i, prev; - int error; - union swap_header *swap_header; - unsigned int nr_good_pages; - int nr_extents = 0; - sector_t span; - unsigned long maxpages; - unsigned long swapfilepages; - unsigned char *swap_map = NULL; - struct page *page = NULL; - struct inode *inode = NULL; - int did_down = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { if (!(swap_info[type]->flags & SWP_USED)) break; } - error = -EPERM; if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); kfree(p); - goto out; + return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { p->type = type; @@ -1911,81 +1868,49 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->next = -1; spin_unlock(&swap_lock); - name = getname(specialfile); - error = PTR_ERR(name); - if (IS_ERR(name)) { - name = NULL; - goto bad_swap_2; - } - swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0); - error = PTR_ERR(swap_file); - if (IS_ERR(swap_file)) { - swap_file = NULL; - goto bad_swap_2; - } - - p->swap_file = swap_file; - mapping = swap_file->f_mapping; - inode = mapping->host; - - error = -EBUSY; - for (i = 0; i < nr_swapfiles; i++) { - struct swap_info_struct *q = swap_info[i]; + return p; +} - if (i == type || !q->swap_file) - continue; - if (mapping == q->swap_file->f_mapping) - goto bad_swap; - } +static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) +{ + int error; - error = -EINVAL; if (S_ISBLK(inode->i_mode)) { - bdev = bdgrab(I_BDEV(inode)); - error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, + p->bdev = bdgrab(I_BDEV(inode)); + error = blkdev_get(p->bdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, sys_swapon); if (error < 0) { - bdev = NULL; - error = -EINVAL; - goto bad_swap; + p->bdev = NULL; + return -EINVAL; } - p->old_block_size = block_size(bdev); - error = set_blocksize(bdev, PAGE_SIZE); + p->old_block_size = block_size(p->bdev); + error = set_blocksize(p->bdev, PAGE_SIZE); if (error < 0) - goto bad_swap; - p->bdev = bdev; + return error; p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; mutex_lock(&inode->i_mutex); - did_down = 1; - if (IS_SWAPFILE(inode)) { - error = -EBUSY; - goto bad_swap; - } - } else { - goto bad_swap; - } + if (IS_SWAPFILE(inode)) + return -EBUSY; + } else + return -EINVAL; - swapfilepages = i_size_read(inode) >> PAGE_SHIFT; + return 0; +} - /* - * Read the swap header. - */ - if (!mapping->a_ops->readpage) { - error = -EINVAL; - goto bad_swap; - } - page = read_mapping_page(mapping, 0, swap_file); - if (IS_ERR(page)) { - error = PTR_ERR(page); - goto bad_swap; - } - swap_header = kmap(page); +static unsigned long read_swap_header(struct swap_info_struct *p, + union swap_header *swap_header, + struct inode *inode) +{ + int i; + unsigned long maxpages; + unsigned long swapfilepages; if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { printk(KERN_ERR "Unable to find swap-space signature\n"); - error = -EINVAL; - goto bad_swap; + return 0; } /* swap partition endianess hack... */ @@ -2001,8 +1926,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) printk(KERN_WARNING "Unable to handle swap header version %d\n", swap_header->info.version); - error = -EINVAL; - goto bad_swap; + return 0; } p->lowest_bit = 1; @@ -2033,61 +1957,155 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } p->highest_bit = maxpages - 1; - error = -EINVAL; if (!maxpages) - goto bad_swap; + return 0; + swapfilepages = i_size_read(inode) >> PAGE_SHIFT; if (swapfilepages && maxpages > swapfilepages) { printk(KERN_WARNING "Swap area shorter than signature indicates\n"); - goto bad_swap; + return 0; } if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) - goto bad_swap; + return 0; if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) - goto bad_swap; + return 0; - /* OK, set up the swap map and apply the bad block list */ - swap_map = vmalloc(maxpages); - if (!swap_map) { - error = -ENOMEM; - goto bad_swap; - } + return maxpages; +} + +static int setup_swap_map_and_extents(struct swap_info_struct *p, + union swap_header *swap_header, + unsigned char *swap_map, + unsigned long maxpages, + sector_t *span) +{ + int i; + unsigned int nr_good_pages; + int nr_extents; - memset(swap_map, 0, maxpages); nr_good_pages = maxpages - 1; /* omit header page */ for (i = 0; i < swap_header->info.nr_badpages; i++) { unsigned int page_nr = swap_header->info.badpages[i]; - if (page_nr == 0 || page_nr > swap_header->info.last_page) { - error = -EINVAL; - goto bad_swap; - } + if (page_nr == 0 || page_nr > swap_header->info.last_page) + return -EINVAL; if (page_nr < maxpages) { swap_map[page_nr] = SWAP_MAP_BAD; nr_good_pages--; } } - error = swap_cgroup_swapon(type, maxpages); - if (error) - goto bad_swap; - if (nr_good_pages) { swap_map[0] = SWAP_MAP_BAD; p->max = maxpages; p->pages = nr_good_pages; - nr_extents = setup_swap_extents(p, &span); - if (nr_extents < 0) { - error = nr_extents; - goto bad_swap; - } + nr_extents = setup_swap_extents(p, span); + if (nr_extents < 0) + return nr_extents; nr_good_pages = p->pages; } if (!nr_good_pages) { printk(KERN_WARNING "Empty swap-file\n"); + return -EINVAL; + } + + return nr_extents; +} + +SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) +{ + struct swap_info_struct *p; + char *name; + struct file *swap_file = NULL; + struct address_space *mapping; + int i; + int prio; + int error; + union swap_header *swap_header; + int nr_extents; + sector_t span; + unsigned long maxpages; + unsigned char *swap_map = NULL; + struct page *page = NULL; + struct inode *inode = NULL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + p = alloc_swap_info(); + if (IS_ERR(p)) + return PTR_ERR(p); + + name = getname(specialfile); + if (IS_ERR(name)) { + error = PTR_ERR(name); + name = NULL; + goto bad_swap; + } + swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0); + if (IS_ERR(swap_file)) { + error = PTR_ERR(swap_file); + swap_file = NULL; + goto bad_swap; + } + + p->swap_file = swap_file; + mapping = swap_file->f_mapping; + + for (i = 0; i < nr_swapfiles; i++) { + struct swap_info_struct *q = swap_info[i]; + + if (q == p || !q->swap_file) + continue; + if (mapping == q->swap_file->f_mapping) { + error = -EBUSY; + goto bad_swap; + } + } + + inode = mapping->host; + /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ + error = claim_swapfile(p, inode); + if (unlikely(error)) + goto bad_swap; + + /* + * Read the swap header. + */ + if (!mapping->a_ops->readpage) { error = -EINVAL; goto bad_swap; } + page = read_mapping_page(mapping, 0, swap_file); + if (IS_ERR(page)) { + error = PTR_ERR(page); + goto bad_swap; + } + swap_header = kmap(page); + + maxpages = read_swap_header(p, swap_header, inode); + if (unlikely(!maxpages)) { + error = -EINVAL; + goto bad_swap; + } + + /* OK, set up the swap map and apply the bad block list */ + swap_map = vzalloc(maxpages); + if (!swap_map) { + error = -ENOMEM; + goto bad_swap; + } + + error = swap_cgroup_swapon(p->type, maxpages); + if (error) + goto bad_swap; + + nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, + maxpages, &span); + if (unlikely(nr_extents < 0)) { + error = nr_extents; + goto bad_swap; + } if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { @@ -2099,58 +2117,46 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } mutex_lock(&swapon_mutex); - spin_lock(&swap_lock); + prio = -1; if (swap_flags & SWAP_FLAG_PREFER) - p->prio = + prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; - else - p->prio = --least_priority; - p->swap_map = swap_map; - p->flags |= SWP_WRITEOK; - nr_swap_pages += nr_good_pages; - total_swap_pages += nr_good_pages; + enable_swap_info(p, prio, swap_map); printk(KERN_INFO "Adding %uk swap on %s. " "Priority:%d extents:%d across:%lluk %s%s\n", - nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, + p->pages<<(PAGE_SHIFT-10), name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", (p->flags & SWP_DISCARDABLE) ? "D" : ""); - /* insert swap space into swap_list: */ - prev = -1; - for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { - if (p->prio >= swap_info[i]->prio) - break; - prev = i; - } - p->next = i; - if (prev < 0) - swap_list.head = swap_list.next = type; - else - swap_info[prev]->next = type; - spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); + if (S_ISREG(inode->i_mode)) + inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap: - if (bdev) { - set_blocksize(bdev, p->old_block_size); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + if (inode && S_ISBLK(inode->i_mode) && p->bdev) { + set_blocksize(p->bdev, p->old_block_size); + blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } destroy_swap_extents(p); - swap_cgroup_swapoff(type); -bad_swap_2: + swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); p->swap_file = NULL; p->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); - if (swap_file) + if (swap_file) { + if (inode && S_ISREG(inode->i_mode)) { + mutex_unlock(&inode->i_mutex); + inode = NULL; + } filp_close(swap_file, NULL); + } out: if (page && !IS_ERR(page)) { kunmap(page); @@ -2158,11 +2164,8 @@ out: } if (name) putname(name); - if (did_down) { - if (!error) - inode->i_flags |= S_SWAPFILE; + if (inode && S_ISREG(inode->i_mode)) mutex_unlock(&inode->i_mutex); - } return error; } |