diff options
37 files changed, 418 insertions, 161 deletions
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index f0c37511d8da..f01bfcd4bdee 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c @@ -623,8 +623,8 @@ static unsigned long xen_read_cr2_direct(void) static void xen_write_cr4(unsigned long cr4) { - /* never allow TSC to be disabled */ - native_write_cr4(cr4 & ~X86_CR4_TSD); + /* Just ignore cr4 changes; Xen doesn't allow us to do + anything anyway. */ } static unsigned long xen_read_cr3(void) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 727a6699f2f4..c627cf86d1e3 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -239,7 +239,7 @@ static void snapshot_tb_and_purr(void *data) struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); local_irq_save(flags); - p->tb = mftb(); + p->tb = get_tb_or_rtc(); p->purr = mfspr(SPRN_PURR); wmb(); p->initialized = 1; @@ -317,7 +317,7 @@ static void snapshot_purr(void) */ void snapshot_timebase(void) { - __get_cpu_var(last_jiffy) = get_tb(); + __get_cpu_var(last_jiffy) = get_tb_or_rtc(); snapshot_purr(); } @@ -684,6 +684,8 @@ void timer_interrupt(struct pt_regs * regs) write_seqlock(&xtime_lock); tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy; + if (__USE_RTC() && tb_next_jiffy >= 1000000000) + tb_next_jiffy -= 1000000000; if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) { tb_last_jiffy = tb_next_jiffy; do_timer(1); @@ -977,7 +979,7 @@ void __init time_init(void) tb_to_ns_scale = scale; tb_to_ns_shift = shift; /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ - boot_tb = get_tb(); + boot_tb = get_tb_or_rtc(); tm = get_boot_time(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index cef01e4e8989..213fa31ac537 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -98,6 +98,18 @@ static struct vdso_patch_def vdso_patches[] = { CPU_FTR_USE_TB, 0, "__kernel_gettimeofday", NULL }, + { + CPU_FTR_USE_TB, 0, + "__kernel_clock_gettime", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_clock_getres", NULL + }, + { + CPU_FTR_USE_TB, 0, + "__kernel_get_tbfreq", NULL + }, }; /* diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index c784edd40ea7..5bebe7fbe056 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -579,7 +579,7 @@ static struct spu *find_victim(struct spu_context *ctx) list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; - if (tmp->prio > ctx->prio && + if (tmp && tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } @@ -611,9 +611,9 @@ static struct spu *find_victim(struct spu_context *ctx) mutex_lock(&cbe_spu_info[node].list_mutex); cbe_spu_info[node].nr_active--; + spu_unbind_context(spu, victim); mutex_unlock(&cbe_spu_info[node].list_mutex); - spu_unbind_context(spu, victim); victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; mutex_unlock(&victim->state_mutex); diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 8d7f7c1cb9c6..6c2be26f1d7d 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -117,7 +117,7 @@ extern void sigio_handler(int sig, union uml_pt_regs *regs); extern void copy_sc(union uml_pt_regs *regs, void *from); -unsigned long to_irq_stack(int sig, unsigned long *mask_out); +extern unsigned long to_irq_stack(unsigned long *mask_out); unsigned long from_irq_stack(int nested); #endif diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 9870febdbead..cf0dd9cf8c43 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -518,13 +518,13 @@ int init_aio_irq(int irq, char *name, irq_handler_t handler) static unsigned long pending_mask; -unsigned long to_irq_stack(int sig, unsigned long *mask_out) +unsigned long to_irq_stack(unsigned long *mask_out) { struct thread_info *ti; unsigned long mask, old; int nested; - mask = xchg(&pending_mask, 1 << sig); + mask = xchg(&pending_mask, *mask_out); if(mask != 0){ /* If any interrupts come in at this point, we want to * make sure that their bits aren't lost by our @@ -534,7 +534,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out) * and pending_mask contains a bit for each interrupt * that came in. */ - old = 1 << sig; + old = *mask_out; do { old |= mask; mask = xchg(&pending_mask, old); @@ -550,6 +550,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out) task = cpu_tasks[ti->cpu].task; tti = task_thread_info(task); + *ti = *tti; ti->real_thread = tti; task->stack = ti; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 6f92f732d253..c3ecc2a84e0c 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -320,7 +320,8 @@ int os_file_size(char *file, unsigned long long *size_out) } if(S_ISBLK(buf.ust_mode)){ - int fd, blocks; + int fd; + long blocks; fd = os_open_file(file, of_read(OPENFLAGS()), 0); if(fd < 0){ diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 18e5c8b67eb8..b98f7ea2d2f6 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -119,7 +119,7 @@ void (*handlers[_NSIG])(int sig, struct sigcontext *sc); void handle_signal(int sig, struct sigcontext *sc) { - unsigned long pending = 0; + unsigned long pending = 1UL << sig; do { int nested, bail; @@ -134,7 +134,7 @@ void handle_signal(int sig, struct sigcontext *sc) * have to return, and the upper handler will deal * with this interrupt. */ - bail = to_irq_stack(sig, &pending); + bail = to_irq_stack(&pending); if(bail) return; diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 327c9f2fa626..54816adb8e93 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -374,6 +374,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; + /* + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. + */ + if (user_mode_vm(regs)) + error_code |= PF_USER; + again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the diff --git a/drivers/base/core.c b/drivers/base/core.c index e6738bcbe5a9..6de33d7a29ba 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -679,14 +679,26 @@ static int device_add_class_symlinks(struct device *dev) goto out_subsys; } if (dev->parent) { - error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, - "device"); - if (error) - goto out_busid; #ifdef CONFIG_SYSFS_DEPRECATED { - char * class_name = make_class_name(dev->class->name, - &dev->kobj); + struct device *parent = dev->parent; + char *class_name; + + /* + * In old sysfs stacked class devices had 'device' + * link pointing to real device instead of parent + */ + while (parent->class && !parent->bus && parent->parent) + parent = parent->parent; + + error = sysfs_create_link(&dev->kobj, + &parent->kobj, + "device"); + if (error) + goto out_busid; + + class_name = make_class_name(dev->class->name, + &dev->kobj); if (class_name) error = sysfs_create_link(&dev->parent->kobj, &dev->kobj, class_name); @@ -694,6 +706,11 @@ static int device_add_class_symlinks(struct device *dev) if (error) goto out_device; } +#else + error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, + "device"); + if (error) + goto out_busid; #endif } return 0; diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 504a95d888b2..84d6aa500e26 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -31,6 +31,7 @@ #include <linux/genhd.h> #include <linux/hdreg.h> #include <linux/blkpg.h> +#include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/mm.h> diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 35ab1a9f8e8b..8955e7ff759a 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -176,7 +176,7 @@ struct agp_bridge_data { #define I830_GMCH_MEM_MASK 0x1 #define I830_GMCH_MEM_64M 0x1 #define I830_GMCH_MEM_128M 0 -#define I830_GMCH_GMS_MASK 0xF0 +#define I830_GMCH_GMS_MASK 0x70 #define I830_GMCH_GMS_DISABLED 0x00 #define I830_GMCH_GMS_LOCAL 0x10 #define I830_GMCH_GMS_STOLEN_512 0x20 @@ -190,6 +190,7 @@ struct agp_bridge_data { #define INTEL_I830_ERRSTS 0x92 /* Intel 855GM/852GM registers */ +#define I855_GMCH_GMS_MASK 0xF0 #define I855_GMCH_GMS_STOLEN_0M 0x0 #define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) #define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 7c69bf259caa..a5d0e95a227a 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -511,7 +511,7 @@ static void intel_i830_init_gtt_entries(void) */ if (IS_G33) size = 0; - switch (gmch_ctrl & I830_GMCH_GMS_MASK) { + switch (gmch_ctrl & I855_GMCH_GMS_MASK) { case I855_GMCH_GMS_STOLEN_1M: gtt_entries = MB(1) - KB(size); break; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 9b07f7851061..dd441ff4af56 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2215,7 +2215,8 @@ static int ipmi_pci_resume(struct pci_dev *pdev) static struct pci_device_id ipmi_pci_devices[] = { { PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) }, - { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) } + { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ipmi_pci_devices); diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index c08a4152ee8f..049a46cc9f87 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -67,7 +67,7 @@ /* * Page types allocated by the device. */ -enum { +enum mspec_page_type { MSPEC_FETCHOP = 1, MSPEC_CACHED, MSPEC_UNCACHED @@ -83,15 +83,25 @@ static int is_sn2; * One of these structures is allocated when an mspec region is mmaped. The * structure is pointed to by the vma->vm_private_data field in the vma struct. * This structure is used to record the addresses of the mspec pages. + * This structure is shared by all vma's that are split off from the + * original vma when split_vma()'s are done. + * + * The refcnt is incremented atomically because mm->mmap_sem does not + * protect in fork case where multiple tasks share the vma_data. */ struct vma_data { atomic_t refcnt; /* Number of vmas sharing the data. */ - spinlock_t lock; /* Serialize access to the vma. */ + spinlock_t lock; /* Serialize access to this structure. */ int count; /* Number of pages allocated. */ - int type; /* Type of pages allocated. */ + enum mspec_page_type type; /* Type of pages allocated. */ + int flags; /* See VMD_xxx below. */ + unsigned long vm_start; /* Original (unsplit) base. */ + unsigned long vm_end; /* Original (unsplit) end. */ unsigned long maddr[0]; /* Array of MSPEC addresses. */ }; +#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */ + /* used on shub2 to clear FOP cache in the HUB */ static unsigned long scratch_page[MAX_NUMNODES]; #define SH2_AMO_CACHE_ENTRIES 4 @@ -129,8 +139,8 @@ mspec_zero_block(unsigned long addr, int len) * mspec_open * * Called when a device mapping is created by a means other than mmap - * (via fork, etc.). Increments the reference count on the underlying - * mspec data so it is not freed prematurely. + * (via fork, munmap, etc.). Increments the reference count on the + * underlying mspec data so it is not freed prematurely. */ static void mspec_open(struct vm_area_struct *vma) @@ -151,34 +161,44 @@ static void mspec_close(struct vm_area_struct *vma) { struct vma_data *vdata; - int i, pages, result, vdata_size; + int index, last_index, result; + unsigned long my_page; vdata = vma->vm_private_data; - if (!atomic_dec_and_test(&vdata->refcnt)) - return; - pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - vdata_size = sizeof(struct vma_data) + pages * sizeof(long); - for (i = 0; i < pages; i++) { - if (vdata->maddr[i] == 0) + BUG_ON(vma->vm_start < vdata->vm_start || vma->vm_end > vdata->vm_end); + + spin_lock(&vdata->lock); + index = (vma->vm_start - vdata->vm_start) >> PAGE_SHIFT; + last_index = (vma->vm_end - vdata->vm_start) >> PAGE_SHIFT; + for (; index < last_index; index++) { + if (vdata->maddr[index] == 0) continue; /* * Clear the page before sticking it back * into the pool. */ - result = mspec_zero_block(vdata->maddr[i], PAGE_SIZE); + my_page = vdata->maddr[index]; + vdata->maddr[index] = 0; + spin_unlock(&vdata->lock); + result = mspec_zero_block(my_page, PAGE_SIZE); if (!result) - uncached_free_page(vdata->maddr[i]); + uncached_free_page(my_page); else printk(KERN_WARNING "mspec_close(): " "failed to zero page %i\n", result); + spin_lock(&vdata->lock); } + spin_unlock(&vdata->lock); - if (vdata_size <= PAGE_SIZE) - kfree(vdata); - else + if (!atomic_dec_and_test(&vdata->refcnt)) + return; + + if (vdata->flags & VMD_VMALLOCED) vfree(vdata); + else + kfree(vdata); } @@ -195,7 +215,8 @@ mspec_nopfn(struct vm_area_struct *vma, unsigned long address) int index; struct vma_data *vdata = vma->vm_private_data; - index = (address - vma->vm_start) >> PAGE_SHIFT; + BUG_ON(address < vdata->vm_start || address >= vdata->vm_end); + index = (address - vdata->vm_start) >> PAGE_SHIFT; maddr = (volatile unsigned long) vdata->maddr[index]; if (maddr == 0) { maddr = uncached_alloc_page(numa_node_id()); @@ -237,10 +258,11 @@ static struct vm_operations_struct mspec_vm_ops = { * underlying pages. */ static int -mspec_mmap(struct file *file, struct vm_area_struct *vma, int type) +mspec_mmap(struct file *file, struct vm_area_struct *vma, + enum mspec_page_type type) { struct vma_data *vdata; - int pages, vdata_size; + int pages, vdata_size, flags = 0; if (vma->vm_pgoff != 0) return -EINVAL; @@ -255,12 +277,17 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, int type) vdata_size = sizeof(struct vma_data) + pages * sizeof(long); if (vdata_size <= PAGE_SIZE) vdata = kmalloc(vdata_size, GFP_KERNEL); - else + else { vdata = vmalloc(vdata_size); + flags = VMD_VMALLOCED; + } if (!vdata) return -ENOMEM; memset(vdata, 0, vdata_size); + vdata->vm_start = vma->vm_start; + vdata->vm_end = vma->vm_end; + vdata->flags = flags; vdata->type = type; spin_lock_init(&vdata->lock); vdata->refcnt = ATOMIC_INIT(1); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index eba1adbc1b6a..4754769eda97 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -487,6 +487,7 @@ static inline int idedisk_supports_lba48(const struct hd_driveid *id) */ static const struct drive_list_entry hpa_list[] = { { "ST340823A", NULL }, + { "ST320413A", NULL }, { NULL, NULL } }; diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 4b13cd9a027d..f19eb6daeefd 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1802,9 +1802,7 @@ pmac_ide_dma_check(ide_drive_t *drive) { struct hd_driveid *id = drive->id; ide_hwif_t *hwif = HWIF(drive); - pmac_ide_hwif_t* pmif = (pmac_ide_hwif_t *)hwif->hwif_data; int enable = 1; - int map; drive->using_dma = 0; if (drive->media == ide_floppy) diff --git a/drivers/media/video/usbvision/usbvision-cards.c b/drivers/media/video/usbvision/usbvision-cards.c index 380564cd3317..f09eb102731b 100644 --- a/drivers/media/video/usbvision/usbvision-cards.c +++ b/drivers/media/video/usbvision/usbvision-cards.c @@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] = { { USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL }, { USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM }, { USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV }, + { }, /* terminate list */ }; MODULE_DEVICE_TABLE (usb, usbvision_table); diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index cff969d05d4a..6f32a35eb106 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -816,7 +816,8 @@ static void __devexit cafe_nand_remove(struct pci_dev *pdev) } static struct pci_device_id cafe_nand_tbl[] = { - { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 } + { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 }, + { 0, } }; MODULE_DEVICE_TABLE(pci, cafe_nand_tbl); diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 46da5714932c..5ab3492817d1 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -61,7 +61,7 @@ struct rtc_plat_data { struct rtc_device *rtc; void __iomem *ioaddr; - unsigned long baseaddr; + resource_size_t baseaddr; unsigned long last_jiffies; int irq; unsigned int irqen; diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index b2e5481ba3b6..67291b0f8283 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -55,7 +55,7 @@ struct rtc_plat_data { void __iomem *ioaddr_rtc; size_t size_nvram; size_t size; - unsigned long baseaddr; + resource_size_t baseaddr; unsigned long last_jiffies; }; diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index bca57bb94939..e348ba684050 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -58,6 +58,7 @@ struct uart_sunsab_port { unsigned char interrupt_mask1;/* ISR1 masking */ unsigned char pvr_dtr_bit; /* Which PVR bit is DTR */ unsigned char pvr_dsr_bit; /* Which PVR bit is DSR */ + unsigned int gis_shift; int type; /* SAB82532 version */ /* Setting configuration bits while the transmitter is active @@ -305,13 +306,15 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id) struct tty_struct *tty; union sab82532_irq_status status; unsigned long flags; + unsigned char gis; spin_lock_irqsave(&up->port.lock, flags); status.stat = 0; - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA0) + gis = readb(&up->regs->r.gis) >> up->gis_shift; + if (gis & 1) status.sreg.isr0 = readb(&up->regs->r.isr0); - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA1) + if (gis & 2) status.sreg.isr1 = readb(&up->regs->r.isr1); tty = NULL; @@ -327,35 +330,6 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id) transmit_chars(up, &status); } - spin_unlock(&up->port.lock); - - if (tty) - tty_flip_buffer_push(tty); - - up++; - - spin_lock(&up->port.lock); - - status.stat = 0; - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB0) - status.sreg.isr0 = readb(&up->regs->r.isr0); - if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB1) - status.sreg.isr1 = readb(&up->regs->r.isr1); - - tty = NULL; - if (status.stat) { - if ((status.sreg.isr0 & (SAB82532_ISR0_TCD | SAB82532_ISR0_TIME | - SAB82532_ISR0_RFO | SAB82532_ISR0_RPF)) || - (status.sreg.isr1 & SAB82532_ISR1_BRK)) - - tty = receive_chars(up, &status); - if ((status.sreg.isr0 & SAB82532_ISR0_CDSC) || - (status.sreg.isr1 & (SAB82532_ISR1_BRK | SAB82532_ISR1_CSC))) - check_status(up, &status); - if (status.sreg.isr1 & (SAB82532_ISR1_ALLS | SAB82532_ISR1_XPR)) - transmit_chars(up, &status); - } - spin_unlock_irqrestore(&up->port.lock, flags); if (tty) @@ -539,6 +513,10 @@ static int sunsab_startup(struct uart_port *port) struct uart_sunsab_port *up = (struct uart_sunsab_port *) port; unsigned long flags; unsigned char tmp; + int err = request_irq(up->port.irq, sunsab_interrupt, + IRQF_SHARED, "sab", up); + if (err) + return err; spin_lock_irqsave(&up->port.lock, flags); @@ -641,6 +619,7 @@ static void sunsab_shutdown(struct uart_port *port) #endif spin_unlock_irqrestore(&up->port.lock, flags); + free_irq(up->port.irq, up); } /* @@ -1008,9 +987,11 @@ static int __devinit sunsab_init_one(struct uart_sunsab_port *up, if ((up->port.line & 0x1) == 0) { up->pvr_dsr_bit = (1 << 0); up->pvr_dtr_bit = (1 << 1); + up->gis_shift = 2; } else { up->pvr_dsr_bit = (1 << 3); up->pvr_dtr_bit = (1 << 2); + up->gis_shift = 0; } up->cached_pvr = (1 << 1) | (1 << 2) | (1 << 4); writeb(up->cached_pvr, &up->regs->w.pvr); @@ -1023,19 +1004,6 @@ static int __devinit sunsab_init_one(struct uart_sunsab_port *up, up->tec_timeout = SAB82532_MAX_TEC_TIMEOUT; up->cec_timeout = SAB82532_MAX_CEC_TIMEOUT; - if (!(up->port.line & 0x01)) { - int err; - - err = request_irq(up->port.irq, sunsab_interrupt, - IRQF_SHARED, "sab", up); - if (err) { - of_iounmap(&op->resource[0], - up->port.membase, - sizeof(union sab82532_async_regs)); - return err; - } - } - return 0; } @@ -1051,52 +1019,60 @@ static int __devinit sab_probe(struct of_device *op, const struct of_device_id * 0, (inst * 2) + 0); if (err) - return err; + goto out; err = sunsab_init_one(&up[1], op, sizeof(union sab82532_async_regs), (inst * 2) + 1); - if (err) { - of_iounmap(&op->resource[0], - up[0].port.membase, - sizeof(union sab82532_async_regs)); - free_irq(up[0].port.irq, &up[0]); - return err; - } + if (err) + goto out1; sunserial_console_match(SUNSAB_CONSOLE(), op->node, &sunsab_reg, up[0].port.line); - uart_add_one_port(&sunsab_reg, &up[0].port); sunserial_console_match(SUNSAB_CONSOLE(), op->node, &sunsab_reg, up[1].port.line); - uart_add_one_port(&sunsab_reg, &up[1].port); + + err = uart_add_one_port(&sunsab_reg, &up[0].port); + if (err) + goto out2; + + err = uart_add_one_port(&sunsab_reg, &up[1].port); + if (err) + goto out3; dev_set_drvdata(&op->dev, &up[0]); inst++; return 0; -} - -static void __devexit sab_remove_one(struct uart_sunsab_port *up) -{ - struct of_device *op = to_of_device(up->port.dev); - uart_remove_one_port(&sunsab_reg, &up->port); - if (!(up->port.line & 1)) - free_irq(up->port.irq, up); +out3: + uart_remove_one_port(&sunsab_reg, &up[0].port); +out2: of_iounmap(&op->resource[0], - up->port.membase, + up[1].port.membase, sizeof(union sab82532_async_regs)); +out1: + of_iounmap(&op->resource[0], + up[0].port.membase, + sizeof(union sab82532_async_regs)); +out: + return err; } static int __devexit sab_remove(struct of_device *op) { struct uart_sunsab_port *up = dev_get_drvdata(&op->dev); - sab_remove_one(&up[0]); - sab_remove_one(&up[1]); + uart_remove_one_port(&sunsab_reg, &up[1].port); + uart_remove_one_port(&sunsab_reg, &up[0].port); + of_iounmap(&op->resource[0], + up[1].port.membase, + sizeof(union sab82532_async_regs)); + of_iounmap(&op->resource[0], + up[0].port.membase, + sizeof(union sab82532_async_regs)); dev_set_drvdata(&op->dev, NULL); @@ -1143,6 +1119,7 @@ static int __init sunsab_init(void) sunsab_reg.minor = sunserial_current_minor; sunsab_reg.nr = num_channels; + sunsab_reg.cons = SUNSAB_CONSOLE(); err = uart_register_driver(&sunsab_reg); if (err) { diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c index b21d0dec9283..6a47682d8614 100644 --- a/drivers/video/intelfb/intelfbhw.c +++ b/drivers/video/intelfb/intelfbhw.c @@ -1352,7 +1352,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo, /* turn off PLL */ tmp = INREG(dpll_reg); - dpll_reg &= ~DPLL_VCO_ENABLE; + tmp &= ~DPLL_VCO_ENABLE; OUTREG(dpll_reg, tmp); /* Set PLL parameters */ diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 1586807b8177..c1fa1908dba0 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -140,7 +140,8 @@ struct dx_frame struct dx_map_entry { u32 hash; - u32 offs; + u16 offs; + u16 size; }; #ifdef CONFIG_EXT3_INDEX @@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir, entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); + + if (dx_get_limit(entries) != dx_root_limit(dir, + root->info.info_length)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != root limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); + if (!count || count > dx_get_limit(entries)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: no count or count > limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } + p = entries + 1; q = entries + count - 1; while (p <= q) @@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); + if (dx_get_limit(entries) != dx_node_limit (dir)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != node limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } frame++; + frame->bh = NULL; } fail2: while (frame >= frame_in) { @@ -432,6 +455,10 @@ fail2: frame--; } fail: + if (*err == ERR_BAD_DX_DIR) + ext3_warning(dir->i_sb, __FUNCTION__, + "Corrupt dir inode %ld, running e2fsck is " + "recommended.", dir->i_ino); return NULL; } @@ -671,6 +698,10 @@ errout: * Directory block splitting, compacting */ +/* + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { @@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, ext3fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); + map_tail->offs = (u16) ((char *) de - base); + map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); } @@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, return count; } +/* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; @@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb, } #ifdef CONFIG_EXT3_INDEX +/* + * Move count entries from end of map between two memory locations. + * Returns pointer to last entry moved. + */ static struct ext3_dir_entry_2 * dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) { @@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) return (struct ext3_dir_entry_2 *) (to - rec_len); } +/* + * Compact each dir entry in the range to the minimal rec_len. + * Returns pointer to last entry in range. + */ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) { struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; @@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) return prev; } +/* + * Split a full leaf block to make room for a new dir entry. + * Allocate a new block, and move entries so that they are approx. equally full. + * Returns pointer to de in block into which the new entry will be inserted. + */ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error) @@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split; + unsigned split, move, size, i; struct ext3_dir_entry_2 *de = NULL, *de2; int err = 0; @@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, count = dx_make_map ((struct ext3_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - split = count/2; // need to adjust to actual middle dx_sort_map (map, count); + /* Split the existing block in the middle, size-wise */ + size = 0; + move = 0; + for (i = count-1; i >= 0; i--) { + /* is more than half of this entry in 2nd half of the block? */ + if (size + map[i].size/2 > blocksize/2) + break; + size += map[i].size; + move++; + } + /* map index at which we will split */ + split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index da224974af78..5fdb862e71c4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -140,7 +140,8 @@ struct dx_frame struct dx_map_entry { u32 hash; - u32 offs; + u16 offs; + u16 size; }; #ifdef CONFIG_EXT4_INDEX @@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir, entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); + + if (dx_get_limit(entries) != dx_root_limit(dir, + root->info.info_length)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != root limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); + if (!count || count > dx_get_limit(entries)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: no count or count > limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } + p = entries + 1; q = entries + count - 1; while (p <= q) @@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); + if (dx_get_limit(entries) != dx_node_limit (dir)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != node limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } frame++; + frame->bh = NULL; } fail2: while (frame >= frame_in) { @@ -432,6 +455,10 @@ fail2: frame--; } fail: + if (*err == ERR_BAD_DX_DIR) + ext4_warning(dir->i_sb, __FUNCTION__, + "Corrupt dir inode %ld, running e2fsck is " + "recommended.", dir->i_ino); return NULL; } @@ -671,6 +698,10 @@ errout: * Directory block splitting, compacting */ +/* + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { @@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, ext4fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); + map_tail->offs = (u16) ((char *) de - base); + map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); } @@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, return count; } +/* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; @@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb, } #ifdef CONFIG_EXT4_INDEX +/* + * Move count entries from end of map between two memory locations. + * Returns pointer to last entry moved. + */ static struct ext4_dir_entry_2 * dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) { @@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) return (struct ext4_dir_entry_2 *) (to - rec_len); } +/* + * Compact each dir entry in the range to the minimal rec_len. + * Returns pointer to last entry in range. + */ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) { struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; @@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) return prev; } +/* + * Split a full leaf block to make room for a new dir entry. + * Allocate a new block, and move entries so that they are approx. equally full. + * Returns pointer to de in block into which the new entry will be inserted. + */ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error) @@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split; + unsigned split, move, size, i; struct ext4_dir_entry_2 *de = NULL, *de2; int err = 0; @@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, count = dx_make_map ((struct ext4_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - split = count/2; // need to adjust to actual middle dx_sort_map (map, count); + /* Split the existing block in the middle, size-wise */ + size = 0; + move = 0; + for (i = count-1; i >= 0; i--) { + /* is more than half of this entry in 2nd half of the block? */ + if (size + map[i].size/2 > blocksize/2) + break; + size += map[i].size; + move++; + } + /* map index at which we will split */ + split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8ed593766f16..b878528b64c1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void) unregister_shrinker(&acl_shrinker); #ifdef CONFIG_NFS_V4 unregister_filesystem(&nfs4_fs_type); - nfs_unregister_sysctl(); #endif + nfs_unregister_sysctl(); unregister_filesystem(&nfs_fs_type); } diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h index d7f5ddfbaac7..c104c15c6625 100644 --- a/include/asm-powerpc/time.h +++ b/include/asm-powerpc/time.h @@ -149,6 +149,11 @@ static inline u64 get_tb(void) } #endif /* !CONFIG_PPC64 */ +static inline u64 get_tb_or_rtc(void) +{ + return __USE_RTC() ? get_rtc() : get_tb(); +} + static inline void set_tb(unsigned int upper, unsigned int lower) { mtspr(SPRN_TBWL, 0); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5bdd656e88cf..a020eb2d4e2a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr, gfp_t gfp_flags); + unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol); extern unsigned slab_node(struct mempolicy *policy); extern enum zone_type policy_zone; @@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) #define set_cpuset_being_rebound(x) do {} while (0) static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr, gfp_t gfp_flags) + unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) { return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); } diff --git a/include/linux/sched.h b/include/linux/sched.h index f4e324ed2e44..5445eaec6908 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -593,7 +593,7 @@ struct user_struct { #endif /* Hash table maintenance information */ - struct list_head uidhash_list; + struct hlist_node uidhash_node; uid_t uid; }; @@ -1472,6 +1472,7 @@ static inline struct user_struct *get_uid(struct user_struct *u) } extern void free_uid(struct user_struct *); extern void switch_uid(struct user_struct *); +extern void release_uids(struct user_namespace *ns); #include <asm/current.h> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 1101b0ce878f..b5f41d4c2eec 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -11,7 +11,7 @@ struct user_namespace { struct kref kref; - struct list_head uidhash_table[UIDHASH_SZ]; + struct hlist_head uidhash_table[UIDHASH_SZ]; struct user_struct *root_user; }; diff --git a/init/Kconfig b/init/Kconfig index 96b54595f1dc..d54d0cadcc06 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -488,6 +488,7 @@ config SIGNALFD config TIMERFD bool "Enable timerfd() system call" if EMBEDDED select ANON_INODES + depends on BROKEN default y help Enable the timerfd() system call that allows to receive timer diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a6b4c0c08e13..fd4fc12d2624 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -57,8 +57,10 @@ static void __init handle_initrd(void) pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); if (pid > 0) - while (pid != sys_wait4(-1, NULL, 0, NULL)) + while (pid != sys_wait4(-1, NULL, 0, NULL)) { + try_to_freeze(); yield(); + } /* move initrd to rootfs' /old */ sys_fchdir(old_fd); diff --git a/kernel/user.c b/kernel/user.c index e7d11cef6998..9ca2848fc356 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -55,25 +55,22 @@ struct user_struct root_user = { /* * These routines must be called with the uidhash spinlock held! */ -static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent) +static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) { - list_add(&up->uidhash_list, hashent); + hlist_add_head(&up->uidhash_node, hashent); } static inline void uid_hash_remove(struct user_struct *up) { - list_del(&up->uidhash_list); + hlist_del_init(&up->uidhash_node); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) { - struct list_head *up; - - list_for_each(up, hashent) { - struct user_struct *user; - - user = list_entry(up, struct user_struct, uidhash_list); + struct user_struct *user; + struct hlist_node *h; + hlist_for_each_entry(user, h, hashent, uidhash_node) { if(user->uid == uid) { atomic_inc(&user->__count); return user; @@ -122,7 +119,7 @@ void free_uid(struct user_struct *up) struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) { - struct list_head *hashent = uidhashentry(ns, uid); + struct hlist_head *hashent = uidhashentry(ns, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); @@ -202,6 +199,30 @@ void switch_uid(struct user_struct *new_user) suid_keys(current); } +void release_uids(struct user_namespace *ns) +{ + int i; + unsigned long flags; + struct hlist_head *head; + struct hlist_node *nd; + + spin_lock_irqsave(&uidhash_lock, flags); + /* + * collapse the chains so that the user_struct-s will + * be still alive, but not in hashes. subsequent free_uid() + * will free them. + */ + for (i = 0; i < UIDHASH_SZ; i++) { + head = ns->uidhash_table + i; + while (!hlist_empty(head)) { + nd = head->first; + hlist_del_init(nd); + } + } + spin_unlock_irqrestore(&uidhash_lock, flags); + + free_uid(ns->root_user); +} static int __init uid_cache_init(void) { @@ -211,7 +232,7 @@ static int __init uid_cache_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); for(n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(init_user_ns.uidhash_table + n); + INIT_HLIST_HEAD(init_user_ns.uidhash_table + n); /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 85af9422ea6e..7af90fc4f0fd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -39,7 +39,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(ns->uidhash_table + n); + INIT_HLIST_HEAD(ns->uidhash_table + n); /* Insert new root user. */ ns->root_user = alloc_uid(ns, 0); @@ -81,7 +81,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); - free_uid(ns->root_user); + release_uids(ns); kfree(ns); } diff --git a/kernel/utsname.c b/kernel/utsname.c index 9d8180a0f0d8..816d7b24fa03 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -28,7 +28,9 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) if (!ns) return ERR_PTR(-ENOMEM); + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); + up_read(&uts_sem); kref_init(&ns->kref); return ns; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de4cf458d6e1..84c795ee2d65 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, { int nid; struct page *page = NULL; + struct mempolicy *mpol; struct zonelist *zonelist = huge_zonelist(vma, address, - htlb_alloc_mask); + htlb_alloc_mask, &mpol); struct zone **z; for (z = zonelist->zones; *z; z++) { @@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, break; } } + mpol_free(mpol); /* unref if mpol !NULL */ return page; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index bb54b88c3d5a..3d6ac9505d07 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif -/* Return effective policy for a VMA */ +/* + * get_vma_policy(@task, @vma, @addr) + * @task - task for fallback if vma policy == default + * @vma - virtual memory area whose policy is sought + * @addr - address in @vma for shared policy lookup + * + * Returns effective policy for a VMA at specified address. + * Falls back to @task or system default policy, as necessary. + * Returned policy has extra reference count if shared, vma, + * or some other task's policy [show_numa_maps() can pass + * @task != current]. It is the caller's responsibility to + * free the reference in these cases. + */ static struct mempolicy * get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = task->mempolicy; + int shared_pol = 0; if (vma) { - if (vma->vm_ops && vma->vm_ops->get_policy) + if (vma->vm_ops && vma->vm_ops->get_policy) { pol = vma->vm_ops->get_policy(vma, addr); - else if (vma->vm_policy && + shared_pol = 1; /* if pol non-NULL, add ref below */ + } else if (vma->vm_policy && vma->vm_policy->policy != MPOL_DEFAULT) pol = vma->vm_policy; } if (!pol) pol = &default_policy; + else if (!shared_pol && pol != current->mempolicy) + mpol_get(pol); /* vma or other task's policy */ return pol; } @@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol, } #ifdef CONFIG_HUGETLBFS -/* Return a zonelist suitable for a huge page allocation. */ +/* + * huge_zonelist(@vma, @addr, @gfp_flags, @mpol) + * @vma = virtual memory area whose policy is sought + * @addr = address in @vma for shared policy lookup and interleave policy + * @gfp_flags = for requested zone + * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy + * + * Returns a zonelist suitable for a huge page allocation. + * If the effective policy is 'BIND, returns pointer to policy's zonelist. + * If it is also a policy for which get_vma_policy() returns an extra + * reference, we must hold that reference until after allocation. + * In that case, return policy via @mpol so hugetlb allocation can drop + * the reference. For non-'BIND referenced policies, we can/do drop the + * reference here, so the caller doesn't need to know about the special case + * for default and current task policy. + */ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, - gfp_t gfp_flags) + gfp_t gfp_flags, struct mempolicy **mpol) { struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct zonelist *zl; + *mpol = NULL; /* probably no unref needed */ if (pol->policy == MPOL_INTERLEAVE) { unsigned nid; nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); + __mpol_free(pol); /* finished with pol */ return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); } - return zonelist_policy(GFP_HIGHUSER, pol); + + zl = zonelist_policy(GFP_HIGHUSER, pol); + if (unlikely(pol != &default_policy && pol != current->mempolicy)) { + if (pol->policy != MPOL_BIND) + __mpol_free(pol); /* finished with pol */ + else + *mpol = pol; /* unref needed after allocation */ + } + return zl; } #endif @@ -1264,6 +1306,7 @@ struct page * alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct zonelist *zl; cpuset_update_task_memory_state(); @@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); return alloc_page_interleave(gfp, 0, nid); } - return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); + zl = zonelist_policy(gfp, pol); + if (pol != &default_policy && pol != current->mempolicy) { + /* + * slow path: ref counted policy -- shared or vma + */ + struct page *page = __alloc_pages(gfp, 0, zl); + __mpol_free(pol); + return page; + } + /* + * fast path: default or task policy + */ + return __alloc_pages(gfp, 0, zl); } /** @@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v) struct numa_maps *md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; + struct mempolicy *pol; int n; char buffer[50]; @@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v) if (!md) return 0; - mpol_to_str(buffer, sizeof(buffer), - get_vma_policy(priv->task, vma, vma->vm_start)); + pol = get_vma_policy(priv->task, vma, vma->vm_start); + mpol_to_str(buffer, sizeof(buffer), pol); + /* + * unref shared or other task's mempolicy + */ + if (pol != &default_policy && pol != current->mempolicy) + __mpol_free(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); |