diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 17:50:35 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 17:50:35 -0800 |
commit | 5ce1a70e2f00f0bce0cab57f798ca354b9496169 (patch) | |
tree | 6e80200536b7a3576fd71ff2c7135ffe87dc858e /drivers | |
parent | 9d3cae26acb471d5954cfdc25d1438b32060babe (diff) | |
parent | ef53d16cded7f89b3843b7a96970dab897843ea5 (diff) | |
download | lwn-5ce1a70e2f00f0bce0cab57f798ca354b9496169.tar.gz lwn-5ce1a70e2f00f0bce0cab57f798ca354b9496169.zip |
Merge branch 'akpm' (more incoming from Andrew)
Merge second patch-bomb from Andrew Morton:
- A little DM fix
- the MM queue
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (154 commits)
ksm: allocate roots when needed
mm: cleanup "swapcache" in do_swap_page
mm,ksm: swapoff might need to copy
mm,ksm: FOLL_MIGRATION do migration_entry_wait
ksm: shrink 32-bit rmap_item back to 32 bytes
ksm: treat unstable nid like in stable tree
ksm: add some comments
tmpfs: fix mempolicy object leaks
tmpfs: fix use-after-free of mempolicy object
mm/fadvise.c: drain all pagevecs if POSIX_FADV_DONTNEED fails to discard all pages
mm: export mmu notifier invalidates
mm: accelerate mm_populate() treatment of THP pages
mm: use long type for page counts in mm_populate() and get_user_pages()
mm: accurately document nr_free_*_pages functions with code comments
HWPOISON: change order of error_states[]'s elements
HWPOISON: fix misjudgement of page_action() for errors on mlocked pages
memcg: stop warning on memcg_propagate_kmem
net: change type of virtio_chan->p9_max_pages
vmscan: change type of vm_total_pages to unsigned long
fs/nfsd: change type of max_delegations, nfsd_drc_max_mem and nfsd_drc_mem_used
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/acpi_memhotplug.c | 8 | ||||
-rw-r--r-- | drivers/acpi/numa.c | 23 | ||||
-rw-r--r-- | drivers/acpi/processor_driver.c | 2 | ||||
-rw-r--r-- | drivers/base/memory.c | 6 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 89 | ||||
-rw-r--r-- | drivers/firmware/memmap.c | 196 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 14 | ||||
-rw-r--r-- | drivers/staging/zcache/zbud.c | 2 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/zsmalloc-main.c | 2 | ||||
-rw-r--r-- | drivers/usb/core/hub.c | 13 |
10 files changed, 320 insertions, 35 deletions
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 034d3e72aa92..da1f82b445e0 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -280,9 +280,11 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { - int result = 0; + int result = 0, nid; struct acpi_memory_info *info, *n; + nid = acpi_get_node(mem_device->device->handle); + list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (info->failed) /* The kernel does not use this memory block */ @@ -295,7 +297,9 @@ static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) */ return -EBUSY; - result = remove_memory(info->start_addr, info->length); + if (nid < 0) + nid = memory_add_physaddr_to_nid(info->start_addr); + result = remove_memory(nid, info->start_addr, info->length); if (result) return result; diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 33e609f63585..59844ee149be 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id, handler, max_entries); } -int __init acpi_numa_init(void) -{ - int cnt = 0; +static int srat_mem_cnt; +void __init early_parse_srat(void) +{ /* * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= * SRAT cpu entries could have different order with that in MADT. @@ -295,21 +295,24 @@ int __init acpi_numa_init(void) /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, 0); + acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, 0); - cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, - acpi_parse_memory_affinity, - NR_NODE_MEMBLKS); + acpi_parse_processor_affinity, 0); + srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, + acpi_parse_memory_affinity, + NR_NODE_MEMBLKS); } +} +int __init acpi_numa_init(void) +{ /* SLIT: System Locality Information Table */ acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_numa_arch_fixup(); - if (cnt < 0) - return cnt; + if (srat_mem_cnt < 0) + return srat_mem_cnt; else if (!parsed_numa_memblks) return -ENOENT; return 0; diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index cbf1f122666b..df34bd04ae62 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -45,6 +45,7 @@ #include <linux/cpuidle.h> #include <linux/slab.h> #include <linux/acpi.h> +#include <linux/memory_hotplug.h> #include <asm/io.h> #include <asm/cpu.h> @@ -641,6 +642,7 @@ static int acpi_processor_remove(struct acpi_device *device) per_cpu(processors, pr->id) = NULL; per_cpu(processor_device_array, pr->id) = NULL; + try_offline_node(cpu_to_node(pr->id)); free: free_cpumask_var(pr->throttling.shared_cpu_map); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 83d0b17ba1c2..a51007b79032 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -693,6 +693,12 @@ int offline_memory_block(struct memory_block *mem) return ret; } +/* return true if the memory block is offlined, otherwise, return false */ +bool is_memblock_offlined(struct memory_block *mem) +{ + return mem->state == MEM_OFFLINE; +} + /* * Initialize the sysfs support for memory devices... */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 3148b10dc2e5..1244930e3d7a 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -124,6 +124,76 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); +static int dev_memalloc_noio(struct device *dev, void *data) +{ + return dev->power.memalloc_noio; +} + +/* + * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag. + * @dev: Device to handle. + * @enable: True for setting the flag and False for clearing the flag. + * + * Set the flag for all devices in the path from the device to the + * root device in the device tree if @enable is true, otherwise clear + * the flag for devices in the path whose siblings don't set the flag. + * + * The function should only be called by block device, or network + * device driver for solving the deadlock problem during runtime + * resume/suspend: + * + * If memory allocation with GFP_KERNEL is called inside runtime + * resume/suspend callback of any one of its ancestors(or the + * block device itself), the deadlock may be triggered inside the + * memory allocation since it might not complete until the block + * device becomes active and the involed page I/O finishes. The + * situation is pointed out first by Alan Stern. Network device + * are involved in iSCSI kind of situation. + * + * The lock of dev_hotplug_mutex is held in the function for handling + * hotplug race because pm_runtime_set_memalloc_noio() may be called + * in async probe(). + * + * The function should be called between device_add() and device_del() + * on the affected device(block/network device). + */ +void pm_runtime_set_memalloc_noio(struct device *dev, bool enable) +{ + static DEFINE_MUTEX(dev_hotplug_mutex); + + mutex_lock(&dev_hotplug_mutex); + for (;;) { + bool enabled; + + /* hold power lock since bitfield is not SMP-safe. */ + spin_lock_irq(&dev->power.lock); + enabled = dev->power.memalloc_noio; + dev->power.memalloc_noio = enable; + spin_unlock_irq(&dev->power.lock); + + /* + * not need to enable ancestors any more if the device + * has been enabled. + */ + if (enabled && enable) + break; + + dev = dev->parent; + + /* + * clear flag of the parent device only if all the + * children don't set the flag because ancestor's + * flag was set by any one of the descendants. + */ + if (!dev || (!enable && + device_for_each_child(dev, NULL, + dev_memalloc_noio))) + break; + } + mutex_unlock(&dev_hotplug_mutex); +} +EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio); + /** * rpm_check_suspend_allowed - Test whether a device may be suspended. * @dev: Device to test. @@ -278,7 +348,24 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) if (!cb) return -ENOSYS; - retval = __rpm_callback(cb, dev); + if (dev->power.memalloc_noio) { + unsigned int noio_flag; + + /* + * Deadlock might be caused if memory allocation with + * GFP_KERNEL happens inside runtime_suspend and + * runtime_resume callbacks of one block device's + * ancestor or the block device itself. Network + * device might be thought as part of iSCSI block + * device, so network device and its ancestor should + * be marked as memalloc_noio too. + */ + noio_flag = memalloc_noio_save(); + retval = __rpm_callback(cb, dev); + memalloc_noio_restore(noio_flag); + } else { + retval = __rpm_callback(cb, dev); + } dev->power.runtime_error = retval; return retval != -EACCES ? retval : -EIO; diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 90723e65b081..0b5b5f619c75 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -21,6 +21,7 @@ #include <linux/types.h> #include <linux/bootmem.h> #include <linux/slab.h> +#include <linux/mm.h> /* * Data types ------------------------------------------------------------------ @@ -52,6 +53,9 @@ static ssize_t start_show(struct firmware_map_entry *entry, char *buf); static ssize_t end_show(struct firmware_map_entry *entry, char *buf); static ssize_t type_show(struct firmware_map_entry *entry, char *buf); +static struct firmware_map_entry * __meminit +firmware_map_find_entry(u64 start, u64 end, const char *type); + /* * Static data ----------------------------------------------------------------- */ @@ -79,7 +83,52 @@ static const struct sysfs_ops memmap_attr_ops = { .show = memmap_attr_show, }; -static struct kobj_type memmap_ktype = { +/* Firmware memory map entries. */ +static LIST_HEAD(map_entries); +static DEFINE_SPINLOCK(map_entries_lock); + +/* + * For memory hotplug, there is no way to free memory map entries allocated + * by boot mem after the system is up. So when we hot-remove memory whose + * map entry is allocated by bootmem, we need to remember the storage and + * reuse it when the memory is hot-added again. + */ +static LIST_HEAD(map_entries_bootmem); +static DEFINE_SPINLOCK(map_entries_bootmem_lock); + + +static inline struct firmware_map_entry * +to_memmap_entry(struct kobject *kobj) +{ + return container_of(kobj, struct firmware_map_entry, kobj); +} + +static void __meminit release_firmware_map_entry(struct kobject *kobj) +{ + struct firmware_map_entry *entry = to_memmap_entry(kobj); + + if (PageReserved(virt_to_page(entry))) { + /* + * Remember the storage allocated by bootmem, and reuse it when + * the memory is hot-added again. The entry will be added to + * map_entries_bootmem here, and deleted from &map_entries in + * firmware_map_remove_entry(). + */ + if (firmware_map_find_entry(entry->start, entry->end, + entry->type)) { + spin_lock(&map_entries_bootmem_lock); + list_add(&entry->list, &map_entries_bootmem); + spin_unlock(&map_entries_bootmem_lock); + } + + return; + } + + kfree(entry); +} + +static struct kobj_type __refdata memmap_ktype = { + .release = release_firmware_map_entry, .sysfs_ops = &memmap_attr_ops, .default_attrs = def_attrs, }; @@ -88,13 +137,6 @@ static struct kobj_type memmap_ktype = { * Registration functions ------------------------------------------------------ */ -/* - * Firmware memory map entries. No locking is needed because the - * firmware_map_add() and firmware_map_add_early() functions are called - * in firmware initialisation code in one single thread of execution. - */ -static LIST_HEAD(map_entries); - /** * firmware_map_add_entry() - Does the real work to add a firmware memmap entry. * @start: Start of the memory range. @@ -118,11 +160,25 @@ static int firmware_map_add_entry(u64 start, u64 end, INIT_LIST_HEAD(&entry->list); kobject_init(&entry->kobj, &memmap_ktype); + spin_lock(&map_entries_lock); list_add_tail(&entry->list, &map_entries); + spin_unlock(&map_entries_lock); return 0; } +/** + * firmware_map_remove_entry() - Does the real work to remove a firmware + * memmap entry. + * @entry: removed entry. + * + * The caller must hold map_entries_lock, and release it properly. + **/ +static inline void firmware_map_remove_entry(struct firmware_map_entry *entry) +{ + list_del(&entry->list); +} + /* * Add memmap entry on sysfs */ @@ -144,6 +200,78 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry) return 0; } +/* + * Remove memmap entry on sysfs + */ +static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry) +{ + kobject_put(&entry->kobj); +} + +/* + * firmware_map_find_entry_in_list() - Search memmap entry in a given list. + * @start: Start of the memory range. + * @end: End of the memory range (exclusive). + * @type: Type of the memory range. + * @list: In which to find the entry. + * + * This function is to find the memmap entey of a given memory range in a + * given list. The caller must hold map_entries_lock, and must not release + * the lock until the processing of the returned entry has completed. + * + * Return: Pointer to the entry to be found on success, or NULL on failure. + */ +static struct firmware_map_entry * __meminit +firmware_map_find_entry_in_list(u64 start, u64 end, const char *type, + struct list_head *list) +{ + struct firmware_map_entry *entry; + + list_for_each_entry(entry, list, list) + if ((entry->start == start) && (entry->end == end) && + (!strcmp(entry->type, type))) { + return entry; + } + + return NULL; +} + +/* + * firmware_map_find_entry() - Search memmap entry in map_entries. + * @start: Start of the memory range. + * @end: End of the memory range (exclusive). + * @type: Type of the memory range. + * + * This function is to find the memmap entey of a given memory range. + * The caller must hold map_entries_lock, and must not release the lock + * until the processing of the returned entry has completed. + * + * Return: Pointer to the entry to be found on success, or NULL on failure. + */ +static struct firmware_map_entry * __meminit +firmware_map_find_entry(u64 start, u64 end, const char *type) +{ + return firmware_map_find_entry_in_list(start, end, type, &map_entries); +} + +/* + * firmware_map_find_entry_bootmem() - Search memmap entry in map_entries_bootmem. + * @start: Start of the memory range. + * @end: End of the memory range (exclusive). + * @type: Type of the memory range. + * + * This function is similar to firmware_map_find_entry except that it find the + * given entry in map_entries_bootmem. + * + * Return: Pointer to the entry to be found on success, or NULL on failure. + */ +static struct firmware_map_entry * __meminit +firmware_map_find_entry_bootmem(u64 start, u64 end, const char *type) +{ + return firmware_map_find_entry_in_list(start, end, type, + &map_entries_bootmem); +} + /** * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do * memory hotplug. @@ -161,9 +289,19 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; - entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); - if (!entry) - return -ENOMEM; + entry = firmware_map_find_entry_bootmem(start, end, type); + if (!entry) { + entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + } else { + /* Reuse storage allocated by bootmem. */ + spin_lock(&map_entries_bootmem_lock); + list_del(&entry->list); + spin_unlock(&map_entries_bootmem_lock); + + memset(entry, 0, sizeof(*entry)); + } firmware_map_add_entry(start, end, type, entry); /* create the memmap entry */ @@ -196,6 +334,36 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type) return firmware_map_add_entry(start, end, type, entry); } +/** + * firmware_map_remove() - remove a firmware mapping entry + * @start: Start of the memory range. + * @end: End of the memory range. + * @type: Type of the memory range. + * + * removes a firmware mapping entry. + * + * Returns 0 on success, or -EINVAL if no entry. + **/ +int __meminit firmware_map_remove(u64 start, u64 end, const char *type) +{ + struct firmware_map_entry *entry; + + spin_lock(&map_entries_lock); + entry = firmware_map_find_entry(start, end - 1, type); + if (!entry) { + spin_unlock(&map_entries_lock); + return -EINVAL; + } + + firmware_map_remove_entry(entry); + spin_unlock(&map_entries_lock); + + /* remove the memmap entry */ + remove_sysfs_fw_map_entry(entry); + + return 0; +} + /* * Sysfs functions ------------------------------------------------------------- */ @@ -217,8 +385,10 @@ static ssize_t type_show(struct firmware_map_entry *entry, char *buf) return snprintf(buf, PAGE_SIZE, "%s\n", entry->type); } -#define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr) -#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj) +static inline struct memmap_attribute *to_memmap_attr(struct attribute *attr) +{ + return container_of(attr, struct memmap_attribute, attr); +} static ssize_t memmap_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index d247a35da3c6..7b17a1fdeaf9 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -25,8 +25,8 @@ struct shadow_info { /* * It would be nice if we scaled with the size of transaction. */ -#define HASH_SIZE 256 -#define HASH_MASK (HASH_SIZE - 1) +#define DM_HASH_SIZE 256 +#define DM_HASH_MASK (DM_HASH_SIZE - 1) struct dm_transaction_manager { int is_clone; @@ -36,7 +36,7 @@ struct dm_transaction_manager { struct dm_space_map *sm; spinlock_t lock; - struct hlist_head buckets[HASH_SIZE]; + struct hlist_head buckets[DM_HASH_SIZE]; }; /*----------------------------------------------------------------*/ @@ -44,7 +44,7 @@ struct dm_transaction_manager { static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) { int r = 0; - unsigned bucket = dm_hash_block(b, HASH_MASK); + unsigned bucket = dm_hash_block(b, DM_HASH_MASK); struct shadow_info *si; struct hlist_node *n; @@ -71,7 +71,7 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) si = kmalloc(sizeof(*si), GFP_NOIO); if (si) { si->where = b; - bucket = dm_hash_block(b, HASH_MASK); + bucket = dm_hash_block(b, DM_HASH_MASK); spin_lock(&tm->lock); hlist_add_head(&si->hlist, tm->buckets + bucket); spin_unlock(&tm->lock); @@ -86,7 +86,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm) int i; spin_lock(&tm->lock); - for (i = 0; i < HASH_SIZE; i++) { + for (i = 0; i < DM_HASH_SIZE; i++) { bucket = tm->buckets + i; hlist_for_each_entry_safe(si, n, tmp, bucket, hlist) kfree(si); @@ -115,7 +115,7 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm, tm->sm = sm; spin_lock_init(&tm->lock); - for (i = 0; i < HASH_SIZE; i++) + for (i = 0; i < DM_HASH_SIZE; i++) INIT_HLIST_HEAD(tm->buckets + i); return tm; diff --git a/drivers/staging/zcache/zbud.c b/drivers/staging/zcache/zbud.c index 328c397ea5dc..fdff5c6a0239 100644 --- a/drivers/staging/zcache/zbud.c +++ b/drivers/staging/zcache/zbud.c @@ -404,7 +404,7 @@ static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage, else zbud_pers_pageframes--; zbudpage_spin_unlock(zbudpage); - reset_page_mapcount(page); + page_mapcount_reset(page); init_page_count(page); page->index = 0; return page; diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 06f73a93a44d..e78d262c5249 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -472,7 +472,7 @@ static void reset_page(struct page *page) set_page_private(page, 0); page->mapping = NULL; page->freelist = NULL; - reset_page_mapcount(page); + page_mapcount_reset(page); } static void free_zspage(struct page *first_page) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1775ad471edd..5480352f984d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5177,6 +5177,7 @@ int usb_reset_device(struct usb_device *udev) { int ret; int i; + unsigned int noio_flag; struct usb_host_config *config = udev->actconfig; if (udev->state == USB_STATE_NOTATTACHED || @@ -5186,6 +5187,17 @@ int usb_reset_device(struct usb_device *udev) return -EINVAL; } + /* + * Don't allocate memory with GFP_KERNEL in current + * context to avoid possible deadlock if usb mass + * storage interface or usbnet interface(iSCSI case) + * is included in current configuration. The easist + * approach is to do it for every device reset, + * because the device 'memalloc_noio' flag may have + * not been set before reseting the usb device. + */ + noio_flag = memalloc_noio_save(); + /* Prevent autosuspend during the reset */ usb_autoresume_device(udev); @@ -5230,6 +5242,7 @@ int usb_reset_device(struct usb_device *udev) } usb_autosuspend_device(udev); + memalloc_noio_restore(noio_flag); return ret; } EXPORT_SYMBOL_GPL(usb_reset_device); |